In [24]:
import pandas as pd

import numpy as np

import itertools

from datetime import datetime

import pytz
import sys
import pickle

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, max_error, r2_score

import seaborn as sns

import MetaTrader5 as mt5

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
gc_o_TIME_ZONE = pytz.timezone("Etc/UTC")
gc_dt_FROM = datetime(2021, 11, 1, tzinfo=gc_o_TIME_ZONE)
gc_dt_TO = datetime(2021, 11, 20, tzinfo=gc_o_TIME_ZONE)
def dfGetMarketOhlc(sSymbol):
    if not mt5.initialize():
        print("initialize() failed, error code =", mt5.last_error())
        sys.exit()

    aSymbolInfo = mt5.symbol_info(sSymbol)
    if not aSymbolInfo:
        print("symbol_info() failed, error code =", mt5.last_error())
        sys.exit()
    
    dfOhlc = mt5.copy_rates_range(
        sSymbol,
        mt5.TIMEFRAME_M1,
        gc_dt_FROM, 
        gc_dt_TO,
    )

    if len(dfOhlc) == 0:
        print("copy_rates_range() failed, error code =", mt5.last_error())
        sys.exit()

    mt5.shutdown()
    
    dfOhlc = pd.DataFrame(dfOhlc)

    dfOhlc['timestamp'] = pd.to_datetime(dfOhlc['time'], unit= "s")
    dfOhlc.set_index('timestamp', inplace=True)
        
    
    return dfOhlc 

In [3]:
def dfScaleData(sSubModelName, sScalerName,dfTrain,dfValidation, dfTest):
    sScalersDirectory = os.path.join(sSubModelName , "__scalers__")
    oScaler = StandardScaler()

    oScaler.fit(dfTrain)

    aScaledTrain = oScaler.transform(dfTrain)
    aScaledValidation = oScaler.transform(dfValidation)
    aScaledTest = oScaler.transform(dfTest)

    dfScaledTrain = pd.DataFrame(aScaledTrain, columns = dfTrain.columns, index = dfTrain.index)
    dfScaledValidation = pd.DataFrame(aScaledValidation, columns = dfValidation.columns, index = dfValidation.index)
    dfScaledTest = pd.DataFrame(aScaledTest, columns = dfTest.columns, index = dfTest.index)

    sScalerFilePath =os.path.join(sScalersDirectory, sScalerName + ".sav")
    os.makedirs(os.path.dirname(sScalerFilePath), exist_ok=True)
    pickle.dump(oScaler, open(sScalerFilePath, 'wb'))
    
    
    return dfScaledTrain, dfScaledValidation, dfScaledTest

In [4]:
sSymbol = "NAT.GAS"

In [5]:
dfOhlc = dfGetMarketOhlc(sSymbol)

In [6]:
dfOhlc["weekday"] = dfOhlc.index.weekday
dfOhlc["hour"] = dfOhlc.index.hour
dfOhlc["minute"] = dfOhlc.index.minute

In [7]:
dfOhlc.drop(["time"], axis = 1 , inplace = True)

In [8]:
dfOhlc

Unnamed: 0_level_0,open,high,low,close,tick_volume,spread,real_volume,weekday,hour,minute
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-11-01 00:00:00,5.341,5.366,5.335,5.338,160,4,1597000,0,0,0
2021-11-01 00:01:00,5.340,5.342,5.330,5.332,55,4,550000,0,0,1
2021-11-01 00:02:00,5.332,5.345,5.332,5.342,42,10,420000,0,0,2
2021-11-01 00:03:00,5.343,5.346,5.334,5.334,27,10,267000,0,0,3
2021-11-01 00:04:00,5.333,5.333,5.324,5.328,14,10,137000,0,0,4
...,...,...,...,...,...,...,...,...,...,...
2021-11-19 23:55:00,5.132,5.134,5.131,5.134,6,10,60000,4,23,55
2021-11-19 23:56:00,5.136,5.136,5.135,5.135,4,10,40000,4,23,56
2021-11-19 23:57:00,5.137,5.137,5.133,5.133,7,10,70000,4,23,57
2021-11-19 23:58:00,5.135,5.138,5.132,5.138,4,15,40000,4,23,58


# DEEP LEARNING MODEL

In [9]:
sModelName = os.path.join(sSymbol , "__deep learning model__")

## Preprocessing

In [73]:
aInputFeatures = ['open', 'high', 'low', 'close', 'tick_volume', 'spread', 'real_volume','weekday', 'hour', 'minute']
aBackwardTimeSteps = range(-3, 0)


tplInputColumns = list(itertools.product(aBackwardTimeSteps, aInputFeatures))
miInputColums = pd.MultiIndex.from_tuples(tplInputColumns, names= ["time_step", "feature"])

dfInput = pd.DataFrame( columns = miInputColums)

for i in aBackwardTimeSteps:
    for sInputFeature in aInputFeatures:
        dfInput.loc[:, (i, sInputFeature)] = dfOhlc[sInputFeature].shift(-i)

In [75]:
aOutputFeatures = ['open', 'high', 'low', 'close', 'tick_volume', 'spread', 'real_volume','weekday', 'hour', 'minute']
aForwardTimeSteps = range(0, 1)

tplOutputColumns = list(itertools.product(aForwardTimeSteps, aOutputFeatures))
ixOutputColums = pd.MultiIndex.from_tuples(tplOutputColumns, names= ["time_step", "feature"])

dfOutput = pd.DataFrame(columns = ixOutputColums)

for i in aForwardTimeSteps:
    for sOutputFeature in aOutputFeatures:
        dfOutput.loc[:, (i, sOutputFeature)] = dfOhlc[sOutputFeature].shift(-i)

In [76]:
dfMerged = dfInput.join(dfOutput, how= "inner")
dfMerged.dropna(inplace=True)
dfInput = dfMerged[dfInput.columns]
dfOutput = dfMerged[dfOutput.columns]

In [None]:
sns.lineplot(data = dfOutput)

### Split Data

In [None]:
gc_dec_TRAINING_RATIO = 0.6
gc_dec_VALIDATION_RATIO = 0.2
gc_dec_TEST_RATIO = 0.2

dfInputTrainValidation, dfInputTest, dfOutputTrainValidation, dfOutputTest = train_test_split(
    dfInput,
    dfOutput,
    test_size=gc_dec_TEST_RATIO,
    shuffle=False)

dfInputTrain, dfInputValidation, dfOutputTrain, dfOutputValidation = train_test_split(
    dfInputTrainValidation,
    dfOutputTrainValidation,
    test_size=(1/(1 -gc_dec_TEST_RATIO))-1,
    shuffle=False)


dfInputTrain = dfInputTrain.astype(float)
dfInputValidation = dfInputValidation.astype(float)
dfInputTest = dfInputTest.astype(float)
dfOutputTrain = dfOutputTrain.astype(float)
dfOutputValidation = dfOutputValidation.astype(float)
dfOutputTest = dfOutputTest.astype(float)

### Scale Data

In [None]:
dfScaledInputTrain, dfScaledInputValidation, dfScaledInputTest = dfScaleData(sModelName,
                                                                             "input", 
                                                                             dfInputTrain,
                                                                             dfInputValidation, 
                                                                             dfInputTest)

dfScaledOutputTrain, dfScaledOutputValidation, dfScaledOutputTest = dfScaleData(sModelName,
                                                                                "output", 
                                                                                dfOutputTrain,
                                                                                dfOutputValidation, 
                                                                                dfOutputTest)

## Train

### Set Hyperparameters

In [None]:
iBatchSize = 128

oLrSchedule = keras.optimizers.schedules.ExponentialDecay(
    1e-05, decay_steps=100000, decay_rate=0.50, staircase=True
)


oOptimizer = tf.keras.optimizers.Adam(learning_rate=1e-05)

oEarlyStop = EarlyStopping(
    monitor = 'val_loss', 
    mode = 'min', 
    verbose = 0 , 
    patience = 20, 
    restore_best_weights = True)
iEpochSize = 10000

### Build Model

In [None]:
oInputRates = keras.Input(
    shape=(
        dfScaledInputTrain.shape[1]), 
    name="aRates")

aW = Dense((100))(oInputRates)
aW = Dense((100))(aW)
aW = Dense((100))(aW)
aOutputRegression = Dense(1, name = "Regression")(aW)

oPredictiveModel = keras.Model(
    inputs=oInputRates, 
    outputs=aOutputRegression
)

### Compile Model

In [None]:
oPredictiveModel.compile(optimizer=oOptimizer,
                         loss = tf.keras.losses.MeanSquaredError()
                        )


### Fit Model

In [None]:
oPredictiveModel.fit(
    dfScaledInputTrain, 
    dfScaledOutputTrain, 
    epochs=iEpochSize, 
    batch_size=iBatchSize, 
    verbose=1, 
    validation_data= (dfScaledInputValidation, dfScaledOutputValidation),
    validation_batch_size= iBatchSize,
    callbacks=[oEarlyStop]
)

oPredictiveModel.save_weights(sModelName)

pd.DataFrame(oPredictiveModel.history.history).plot()

## Test

In [None]:
oPredictiveModel.load_weights(sModelName)

aPrediction = oPredictiveModel.predict(dfScaledInputTest)

sOutputScalerPath = os.path.join(sModelName , "__scalers__")
sOutputScalerPath = os.path.join(sOutputScalerPath , "output" + ".sav")
oScalerOutput = pickle.load(open(sOutputScalerPath, 'rb'))
aPrediction = oScalerOutput.inverse_transform(aPrediction)

dfPrediction = pd.DataFrame(aPrediction, index = dfScaledOutputTest.index)
dfPrediction.columns = dfOutput.columns

In [None]:
dfTestComparision = pd.DataFrame(dfPrediction.iloc[:,0])
dfTestComparision = dfTestComparision.join(dfOutputTest.iloc[:,0], how = "inner", lsuffix="prediction")
dfTestComparision.columns = ["Prediction", "Actual"]

sns.scatterplot(data = dfTestComparision, x = "Actual", y ="Prediction")

In [None]:
y_pred = dfPrediction.iloc[:, 0]
y_true = dfOutputTest.iloc[:, 0]
aMetrics = [
        ('mean absolute error', mean_absolute_error(y_true, y_pred)),
        ('mean squared error', mean_squared_error(y_true, y_pred)),
        ('max error', max_error(y_true, y_pred)),
        ('r2 score', r2_score(y_true, y_pred))
    ]

print('Metrics for regression:')
for metric_name, metric_value in aMetrics:
    print(f'{metric_name:>25s}: {metric_value: >20.3f}')

In [None]:
iFrom = 195 * 15
iTo = iFrom + 15

print(r2_score(dfOutputTest.iloc[iFrom:iTo], dfPrediction.iloc[iFrom:iTo]))
sns.lineplot(data = dfTestComparision.iloc[iFrom:iTo])

# REFERENCES

https://www.tensorflow.org/guide/keras/train_and_evaluate#passing_data_to_multi-input_multi-output_models

https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch/

https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit/

https://towardsdatascience.com/customize-loss-function-to-make-lstm-model-more-applicable-in-stock-price-prediction-b1c50e50b16c

https://keras.io/getting_started/faq/