In [38]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from torch.utils.data import Dataset, DataLoader, TensorDataset

#Reading the data
akbank_data = pd.read_csv("C:\\Users\\ERALP\\OneDrive\\Desktop\\Software Engineering\\SWE599---Financial-Time-Series-Volatility-and-Return-Forecasting\\Code\\akbank_data.csv")

#Selecting the columns that we will use. We will use the Open, High, Low and Close prices to calculate the returns.

akbank_data = akbank_data[['Date', 'Hour', 'Open', 'High', 'Low', 'Close']]

#Converting Date and Hour to datetime object and setting it as index. This will help us to plot the data in a time series manner.

akbank_data['Datetime'] = pd.to_datetime(akbank_data['Date'] + " " + akbank_data['Hour'])

#Dropping Date and Hour columns
akbank_data.set_index('Datetime', inplace=True)
akbank_data.drop(['Date', 'Hour'], axis=1, inplace=True)


#Creating prices variable to store the Open, High, Low and Close prices in a numpy array. The reason for this is that we will use the prices to calculate the returns.

prices = akbank_data[['Open', 'High', 'Low', 'Close']].values

# Normalizing the prices between 0 and 1. This will help us to train the model faster. I will be using MinMaxScaler to normalize the prices. Scaling will be between 0 and 1. I am using MinMaxScaler because I would like to keep original distribution of the data and data is not normally distrubuted.

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_akbank_data = scaler.fit_transform(prices)

print("Original Data")
print(akbank_data[:5])

print("Original Price Data:")
print(prices[:5])

print("Scaled Price Data:")
print(scaled_akbank_data[:5])



Original Data
                     Open  High   Low  Close
Datetime                                    
2019-11-02 11:00:00  6.47  6.59  6.46   6.56
2019-11-02 12:00:00  6.57  6.61  6.55   6.56
2019-11-02 13:00:00  6.54  6.54  6.54   6.54
2019-11-02 14:00:00  6.54  6.57  6.53   6.57
2019-11-02 15:00:00  6.57  6.58  6.52   6.56
Original Price Data:
[[6.47 6.59 6.46 6.56]
 [6.57 6.61 6.55 6.56]
 [6.54 6.54 6.54 6.54]
 [6.54 6.57 6.53 6.57]
 [6.57 6.58 6.52 6.56]]
Scaled Price Data:
[[0.12957409 0.1313783  0.13440197 0.13497301]
 [0.13557289 0.13255132 0.13995068 0.13497301]
 [0.13377325 0.12844575 0.13933416 0.13377325]
 [0.13377325 0.13020528 0.13871763 0.13557289]
 [0.13557289 0.13079179 0.13810111 0.13497301]]
