In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from torch.utils.data import Dataset, DataLoader, TensorDataset

#Reading the data
akbank_data = pd.read_csv("C:\\Users\\ERALP\\OneDrive\\Desktop\\Software Engineering\\SWE599---Financial-Time-Series-Volatility-and-Return-Forecasting\\Code\\akbank_data.csv")

#Selecting the columns that we will use. We will use the Open, High, Low and Close prices to calculate the returns.

akbank_data = akbank_data[['Date', 'Hour', 'Open', 'High', 'Low', 'Close']]

#Converting Date and Hour to datetime object and setting it as index. This will help us to plot the data in a time series manner.

akbank_data['Datetime'] = pd.to_datetime(akbank_data['Date'] + " " + akbank_data['Hour'])

#Dropping Date and Hour columns
akbank_data.set_index('Datetime', inplace=True)
akbank_data.drop(['Date', 'Hour'], axis=1, inplace=True)


#Creating prices variable to store the Open, High, Low and Close prices in a numpy array. The reason for this is that we will use the prices to calculate the returns.

prices = akbank_data[['Open', 'High', 'Low', 'Close']].values

# Normalizing the prices between 0 and 1. This will help us to train the model faster. I will be using MinMaxScaler to normalize the prices. Scaling will be between 0 and 1. I am using MinMaxScaler because I would like to keep original distribution of the data and data is not normally distrubuted.

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_akbank_data = scaler.fit_transform(prices)

train_data_length = int(len(scaled_akbank_data) * 0.8)

print("Original Data")
print(akbank_data[:5])

print("Original Price Data:")
print(prices[:5])

print("Scaled Price Data:")
print(scaled_akbank_data[:5])



# Defining lookback period. This is the number of previous time steps that will be used to predict the next time step. I will be using a week steps to predict the next time step. This means that the model will use the previous 5 days to predict the next day. So as a day has 10 hours of trading, the model will use the previous 50 hours to predict the next hour.

lookback = 50

# Creating empty list for input and target data. X will contain the previous 50 hours of data and y will contain the next hour of data.
X , y = [], []

# Creating input sequences and the corresponding target values for the training data. The model will use the previous 50 hours of data to predict the next hour of data.
# Using for loop to iterate through the scaled_akbank_data starting from the 50th index until the end of data. The reason for this is that we need 50 previous hours to predict the next hour.
# For each iteration I create input sequence by slicing the scaled_akbank_data from i-50 to i and append it to X. I also append the next hour of data to y.


for i in range(lookback, len(scaled_akbank_data)):
    X.append(scaled_akbank_data[i - lookback:i, :])
    y.append(scaled_akbank_data[i, -1])


# Converting X and y to numpy arrays.
X, y = np.array(X), np.array(y)

print("X shape:", X[:1])
print("y shape:", y[:5])

# Splitting the data into training and test sets. I will be using 80% of the data for training and 20% for testing.
# I am using the train_data_length variable that I created earlier to split the data. I am using the train_data_length variable because I want to use the same data for training and testing. This will help us to compare the results of the model.

X_train, y_train = X[:train_data_length - lookback], y[:train_data_length - lookback]
X_test, y_test = X[train_data_length - lookback:], y[train_data_length - lookback:]

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

#Converting X_train, y_train, X_test and y_test to torch tensors. This will help us to use the data with PyTorch.

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)


#Creating a TensorDataset and DataLoader for training data. I am using a batch size of 32. This means that the model will use 32 samples to calculate the gradients and update the weights. I am also shuffling the data to prevent the model from overfitting.

train_dataset = TensorDataset(X_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

#Defining LSTM the model

class LSTM(nn.Module): # Inherit from nn.Module
    def __init__(self, input_size, hidden_size, num_layers, output_size):# Constructor
        super(LSTM, self).__init__()
        self.num_layers = num_layers # Number of layers. Will be used in the forward function.
        self.hidden_size = hidden_size # Hidden size. Will be used in the forward function.
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) # LSTM layer. batch_first=True means that the input and output will have the batch size as the first dimension. The input shape will be (batch_size, seq_length, input_size)
        self.fc = nn.Linear(hidden_size, output_size) # Fully connected layer. The input size will be the hidden size and the output size will be the number of classes.

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size) # Creating initial hidden state for the LSTM layer. The shape will be (num_layers, batch_size, hidden_size)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size) # Creating initial cell state for the LSTM layer. The shape will be (num_layers, batch_size, hidden_size)

        out, _ = self.lstm(x, (h0, c0)) # Passing the input to the LSTM layer. The output will be of shape (batch_size, seq_length, hidden_size) and the hidden state will be of shape (num_layers, batch_size, hidden_size)
        out = self.fc(out[:, -1, :]) # Passing the last output of the LSTM layer to the fully connected layer. The output will be of shape (batch_size, output_size)
        return out # Returning the output


#Instantiating the model

input_size= X_train.shape[-1] # Number of features
hidden_size = 64 # Number of features in the hidden state
num_layers = 2 # Number of stacked LSTM layers
output_size = 1 # Number of output features

model = LSTM(input_size, hidden_size, num_layers, output_size) #

#Defining the loss function and the optimizer
criterion = nn.MSELoss() # Using Mean Squared Error Loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # Using Adam optimizer


