In [1]:
import os
import datetime
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import matplotlib.pyplot as plt


In [2]:
class DataFormatting():

    def __init__(self):
       
        self.df_data = None
        self.df_datetime = None

    def dataset(df):

        # converting time colum from object type to datetime format
        df['time'] = pd.to_datetime(df['time'])
        # splitting the dataframe in to X and y 
        df_data = df[['open','high','low','close','tick_volume']]
        df_datetime =df[['time']]

        return df_data, df_datetime


data = pd.read_csv('../data/gold_mt5.csv',index_col=[0]) 

data_init = DataFormatting()
df_data, _ = DataFormatting.dataset(data)
print(df_data.head())



# def transform_data(data == True):

#     if data == True:
#         df = pd.read_csv('../data/gold_mt5.csv',index_col=[0])
#         df['time'] = pd.to_datetime(df['time'])
#         df_data = df[['time','open','high','low','close','tick_volume']]
        



#     else: 
#         print('data set do not exist, please run "get_data_mt5.py" file to extract the data from mt5 terminal')
        

    

     open    high    low  close  tick_volume
0  310.30  314.40  310.0  313.5          561
1  312.60  315.20  311.9  314.3          491
2  313.40  314.20  311.8  312.9          431
3  312.25  313.65  308.9  309.9          716
4  309.39  310.90  306.3  308.2          802


In [3]:
def train_test_split(data, train_split=0.9):

    """ This function will split the dataframe into training and testing set.
    Inputs: data: Pandas DatFrame
            train_split: default is set to 0.9. Its a ratio to split the trining and testing datset.
    """
    split = int(train_split*len(data)) # for training
    X_train = data.iloc[:split,:]
    X_test = data.iloc[split:,:]

    return X_train, X_test

X_train, _ = train_test_split(df_data, train_split=0.9)



In [4]:
class Normalize():

    """ class Normalize uses standard scaler method to normalize the dataset"""
    def __init__(self):

        self.data_fit_transformed = None
        self.data_inverse_transformed = None

    def fit_transform(self, data):

        # initialize StandartScaler()
        scaler = StandardScaler()
        # fit the method on the dataset
        scaler = scaler.fit(data)
        # transform the dataset
        data_fit_transformed = scaler.transform(data)

        return data_fit_transformed

    def inverse_transform(self, data):

        # initialize StandartScaler()
        scaler = StandardScaler()
        # inverse transform the dataset
        data_inverse_transformed = scaler.inverse_transform(data)
        
        return data_inverse_transformed

# normalize
scaler_init = Normalize()
scaled_data = scaler_init.fit_transform(X_train)
print(scaled_data[0:11])


[[-1.20962681 -1.20807925 -1.20489845 -1.20420423 -0.66571996]
 [-1.20486861 -1.20643412 -1.20094037 -1.20254869 -0.66671673]
 [-1.20321358 -1.20849053 -1.20114869 -1.20544588 -0.66757111]
 [-1.20559268 -1.20962156 -1.20718997 -1.21165413 -0.66351283]
 [-1.2115094  -1.21527671 -1.21260629 -1.21517214 -0.66228822]
 [-1.21510909 -1.21424851 -1.21114805 -1.21041248 -0.66522158]
 [-1.20861311 -1.21280901 -1.21427284 -1.21786238 -0.66043708]
 [-1.21800538 -1.22288546 -1.22343892 -1.22696782 -0.66166168]
 [-1.22586676 -1.22535316 -1.22156404 -1.22262204 -0.66536397]
 [-1.22338422 -1.22689548 -1.22052244 -1.22469146 -0.66593356]
 [-1.22348766 -1.22884908 -1.22698035 -1.23007194 -0.66344163]]


In [5]:
def data_transformation(data, lags = 5):

    """ this function transforms dataframe to required input shape for the model.
    It required 2 input arguments:
    1. data: this will be the pandas dataframe
    2. lags: how many previous price points to be used to predict the next future value, in
    this case the default is set to 5 for 'XAUUSD' commodity"""

    # initialize lists to store the dataset
    X_data = []
    y_data = []
    
    for i in range(lags, len(data)):
        X_data.append(data[i-lags: i, 0: data.shape[1]])
        y_data.append(data[i,3:4]) # extracts close price with specific lag as price to be predicted.

    # convert the list to numpy array

    X_data = np.array(X_data)
    y_data = np.array(y_data)

    return X_data, y_data


X_data, y_data = data_transformation(scaled_data, lags = 5)


In [6]:
print(X_data[0], y_data[0],X_data[1], y_data[1])

[[-1.20962681 -1.20807925 -1.20489845 -1.20420423 -0.66571996]
 [-1.20486861 -1.20643412 -1.20094037 -1.20254869 -0.66671673]
 [-1.20321358 -1.20849053 -1.20114869 -1.20544588 -0.66757111]
 [-1.20559268 -1.20962156 -1.20718997 -1.21165413 -0.66351283]
 [-1.2115094  -1.21527671 -1.21260629 -1.21517214 -0.66228822]] [-1.21041248] [[-1.20486861 -1.20643412 -1.20094037 -1.20254869 -0.66671673]
 [-1.20321358 -1.20849053 -1.20114869 -1.20544588 -0.66757111]
 [-1.20559268 -1.20962156 -1.20718997 -1.21165413 -0.66351283]
 [-1.2115094  -1.21527671 -1.21260629 -1.21517214 -0.66228822]
 [-1.21510909 -1.21424851 -1.21114805 -1.21041248 -0.66522158]] [-1.21786238]


In [None]:
class build_LSTM():

    def __init__(self,)