# IMPORT LIBRARIES

In [1]:
import pandas as pd

import numpy as np

import itertools

import time


import pickle

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

import seaborn as sns

import matplotlib.pyplot as plt

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping

# CONFIGURATION

In [2]:
sOutputSymbol = 'ETHUSD'
sModelType = 'MLP'
sDesignType = 'Full Factorial Design'

iTrialId = 0

In [3]:
sFolderPath = 'Data/'+ sOutputSymbol +'//'+ sModelType + '//'+ sDesignType+ '//'
dfDesign = pd.read_csv( sFolderPath + 'Design.csv', index_col = 'Run ID')
iBatchSize = dfDesign.loc[iTrialId, 'Batch Size']
iNrOfHiddenNeurons = dfDesign.loc[iTrialId, 'Number of Hidden Neurons']
iBackwardTimeWindow = 3
iForwardTimeWindow = 3

In [4]:
sModelName = os.path.join(sFolderPath + str(iTrialId))

# LOAD DATA

## Cryptocurrency List

In [5]:
dfCrpytocurrencies = pd.read_csv('Data\cryptocurrencies.csv')
dfCrpytocurrencies.head()

Unnamed: 0,Symbol
0,BCHUSD
1,BTCUSD
2,ETHUSD
3,LTCUSD
4,RPLUSD


## Market Data

In [6]:
dfOhlc = pd.read_csv('Data\dfOhlc.csv')
dfOhlc['timestamp'] = pd.DatetimeIndex(dfOhlc['timestamp'])
dfOhlc.set_index('timestamp', inplace=True)

# PREPROCESSING

## Drop Unnecessary Features

In [7]:
aNecessaryFeatures = ['weekday', 'hour', 'minute' ,'upper_shadow', 'lower_shadow' ,'return']
aColumns = dfOhlc.columns

aNecessaryColumns = aColumns[aColumns.str.contains('|'.join(aNecessaryFeatures))]

dfOhlc = dfOhlc[aNecessaryColumns]

## Split Data

In [8]:
fTrainingRatio = 0.7
fValidationRatio = 0.15
fTestRatio = 0.15

ixTrain, ixTest = train_test_split(
    dfOhlc.index,
    test_size=1-fTrainingRatio,
    shuffle=False)

ixValidation, ixTest = train_test_split(
    ixTest,
    test_size=fTestRatio/(fTestRatio + fValidationRatio),
    shuffle=False)

## Scale Data

In [9]:
dfScaledOhlc = pd.DataFrame(index = dfOhlc.index, columns  = dfOhlc.columns)
aFeaturesToMinMaxScale = [':weekday', ':hour', ':minute']
aFaeturesNotToScale = ['upper_shadow', 'lower_shadow' ,'return']

for sColumn in dfOhlc.columns:
    
    if any(ele in sColumn  for ele in aFaeturesNotToScale):
        dfScaledOhlc.loc[:, sColumn] = dfOhlc.loc[:, sColumn]
        continue
    elif  any(ele in sColumn  for ele in aFeaturesToMinMaxScale):
        oScaler = MinMaxScaler()
    else:
        oScaler = StandardScaler()
    
    dfTrain = pd.DataFrame(dfOhlc.loc[ixTrain, sColumn])
    dfValidation = pd.DataFrame(dfOhlc.loc[ixValidation, sColumn])
    dfTest = pd.DataFrame(dfOhlc.loc[ixTest, sColumn])
    
    oScaler.fit(dfTrain.append(dfValidation))
    
    dfScaledOhlc.loc[ixTrain, sColumn] = np.reshape(oScaler.transform(dfTrain), (-1))
    dfScaledOhlc.loc[ixValidation, sColumn] = np.reshape(oScaler.transform(dfValidation), (-1))
    dfScaledOhlc.loc[ixTest, sColumn] = np.reshape(oScaler.transform(dfTest), (-1))

    sScalerFilePath = os.path.join(sModelName , "__scalers__")
    sScalerFilePath = os.path.join(sScalerFilePath , sColumn + ".sav")
    os.makedirs(os.path.dirname(sScalerFilePath), exist_ok=True)
    
    pickle.dump(oScaler, open(sScalerFilePath, 'wb'))
    
    
dfScaledOhlc = dfScaledOhlc.astype('float')

## Log Transform

In [38]:
np.log(dfScaledOhlc+1).describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
BTCUSD:weekday,6516.0,0.3772,0.243769,0.0,0.223144,0.405465,0.559616,0.693147
BTCUSD:hour,6516.0,0.384921,0.206476,0.0,0.231802,0.419854,0.578078,0.693147
BTCUSD:minute,6516.0,0.346574,0.3466,0.0,0.0,0.346574,0.693147,0.693147
BTCUSD:return,6516.0,-1.1e-05,0.005382,-0.057812,-0.002613,4.2e-05,0.00257,0.039602
BTCUSD:upper_shadow,6516.0,0.001983,0.00195,0.0,0.000744,0.001519,0.002621,0.035928
BTCUSD:lower_shadow,6516.0,0.002207,0.002497,0.0,0.000817,0.001648,0.002863,0.087064
ETHUSD:weekday,6516.0,0.3772,0.243769,0.0,0.223144,0.405465,0.559616,0.693147
ETHUSD:hour,6516.0,0.384921,0.206476,0.0,0.231802,0.419854,0.578078,0.693147
ETHUSD:minute,6516.0,0.346574,0.3466,0.0,0.0,0.346574,0.693147,0.693147
ETHUSD:return,6516.0,-3.2e-05,0.006729,-0.070352,-0.003332,8.6e-05,0.003434,0.039653


In [None]:
dfScaledOhlc =  np.log(dfScaledOhlc + 1)

## Create Input Dataset

In [None]:
aInputSymbols = dfCrpytocurrencies['Symbol'].values
aInputFeatures = ['weekday', 'hour', 'minute' ,'upper_shadow', 'lower_shadow' ,'return']

aInputFeatures = list(map(":".join, itertools.product(aInputSymbols, aInputFeatures)))

iNrInputFeatures = len(aInputFeatures)

aBackwardTimeSteps = range(-iBackwardTimeWindow, 0)

aTplInputColumns = list(itertools.product(aBackwardTimeSteps, aInputFeatures))
aIxInputColumns = pd.MultiIndex.from_tuples(aTplInputColumns, names= ['time_step', 'feature'])

dfInput = pd.DataFrame(columns = aIxInputColumns)

for tplColumn in list(dfInput.columns):
    dfInput.loc[:, tplColumn] = dfScaledOhlc[(tplColumn[1])].shift(-tplColumn[0])
    
ixNas = dfInput[dfInput.isna().any(axis=1)].index
dfInput.drop(ixNas, inplace = True, errors = 'ignore') 
ixTrain= ixTrain.drop(ixNas, errors = 'ignore') 
ixValidation= ixValidation.drop(ixNas,   errors = 'ignore') 
ixTest = ixTest.drop(ixNas,   errors = 'ignore') 

dfInput.head()

## Create Output Dataset

In [None]:
aOutputFeatures = ['return']
aOutputFeatures = list(map(":".join, itertools.product([sOutputSymbol], aOutputFeatures)))
iNrOutputFeatures = len(aOutputFeatures)

aForwardTimeSteps = range(0, iForwardTimeWindow)

aTplOutputColumns = list(itertools.product(aForwardTimeSteps, aOutputFeatures))
aIxOutputColumns = pd.MultiIndex.from_tuples(aTplOutputColumns, names= ['time_step', 'feature'])

dfOutput = pd.DataFrame(columns = aIxOutputColumns)

for tplColumn in list(dfOutput.columns):
    dfOutput.loc[:, tplColumn] =  dfOhlc[(tplColumn[1])].shift(-tplColumn[0])

ixNas = dfOutput[dfOutput.isna().any(axis=1)].index
dfOutput.drop(ixNas, inplace = True, errors = 'ignore') 
ixTrain= ixTrain.drop(ixNas, errors = 'ignore') 
ixValidation= ixValidation.drop(ixNas,   errors = 'ignore') 
ixTest = ixTest.drop(ixNas,   errors = 'ignore') 
dfOutput.head()

## Reshape Datasets

In [None]:
axMerged = dfInput.index.join(dfOutput.index, how = 'inner')

dfInput = dfInput.loc[axMerged]
dfOutput = dfOutput.loc[axMerged]

ixTrain = ixTrain.join(axMerged, how = "inner")
ixValidation = ixValidation.join(axMerged, how = "inner")
ixTest = ixTest.join(axMerged, how = "inner")


dfInputTrain = dfInput.loc[ixTrain]
aInputTrain = np.reshape(dfInputTrain.values, (dfInputTrain.shape[0], iBackwardTimeWindow, iNrInputFeatures))

dfInputValidation = dfInput.loc[ixValidation]
aInputValidation = np.reshape(dfInputValidation.values, (dfInputValidation.shape[0], iBackwardTimeWindow, iNrInputFeatures))

dfInputTest = dfInput.loc[ixTest]
aInputTest = np.reshape(dfInputTest.values, (dfInputTest.shape[0], iBackwardTimeWindow, iNrInputFeatures))

dfOutputTrain = dfOutput.loc[ixTrain]
aOutputTrain = np.reshape(dfOutputTrain.values, (dfOutputTrain.shape[0], iForwardTimeWindow, iNrOutputFeatures))

dfOutputValidation = dfOutput.loc[ixValidation]
aOutputValidation = np.reshape(dfOutputValidation.values, (dfOutputValidation.shape[0], iForwardTimeWindow, iNrOutputFeatures))

dfOutputTest = dfOutput.loc[ixTest]
aOutputTest = np.reshape(dfOutputTest.values, (dfOutputTest.shape[0], iForwardTimeWindow, iNrOutputFeatures))


aInputTrain = np.asarray(aInputTrain, np.float32)
aInputValidation = np.asarray(aInputValidation, np.float32)
aInputTest = np.asarray(aInputTest, np.float32)
aOutputTrain = np.asarray(aOutputTrain, np.float32)
aOutputValidation = np.asarray(aOutputValidation, np.float32)
aOutputTest = np.asarray(aOutputTest, np.float32)

#  MODEL DEVELOPMENT

## Set Early Stopping

In [None]:
i_c_Seed = 1
oInitilizer = tf.keras.initializers.GlorotUniform(seed = i_c_Seed)

oEarlyStop = EarlyStopping(
    monitor = 'val_loss', 
    mode = 'min', 
    verbose = 0 , 
    patience = 20, 
    restore_best_weights = True)

## Define Custom Loss Function

While loss function is defined following criteria is taken into consideration:
1. Opposite signs should be penalized.
1. Opposite sings will be worse when the magnitute of error increases.
1. Any of same sign is better than any of the opposite signs.
1. Same sign is the best when the error is 0.

Following logic also should have been implemented but it was unsuccessful to implement due to forcing negative errors. It will be used as 'metric' function.
1. Same sign is positive error is better than negative error (err = act - pred )

In [None]:
def fCalculateLoss(aActual, aPrediction):
    aLossDueToError = tf.math.subtract(aActual ,aPrediction)
    aLossDueToError = tf.math.abs(aLossDueToError)
    
    iMultiplier = aActual.shape[len(aActual.shape) - 1]
    fPenalty = tf.math.reduce_max(aLossDueToError)
    fPenalty = fPenalty * iMultiplier
    
    aLossDueToSignDiff = tf.math.abs(tf.math.subtract(tf.math.sign(aActual), tf.math.sign(aPrediction)) )
    aLossDueToSignDiff = tf.where(aLossDueToSignDiff == 0, aLossDueToSignDiff, fPenalty)

    aTotalLoss = aLossDueToError + aLossDueToSignDiff

    return tf.math.reduce_mean(aTotalLoss)


## Build Model

### MLP

In [None]:
if sModelType == 'MLP':
    aInputMlp = keras.Input(
        shape=(iBackwardTimeWindow, iNrInputFeatures))

    aW = keras.layers.Flatten()(aInputMlp)
    aW = keras.layers.Dense(iNrOfHiddenNeurons, kernel_initializer = oInitilizer)(aW)
    aW = keras.layers.Dense(iForwardTimeWindow*iNrOutputFeatures, kernel_initializer = oInitilizer)(aW)
    aW = keras.layers.Reshape((iForwardTimeWindow, iNrOutputFeatures))(aW)

    aOutputMlp = aW
    oModelMlp = keras.Model(
        inputs=aInputMlp,
        outputs=aOutputMlp
    )

    oOptimizerMlp = tf.keras.optimizers.Adam(learning_rate=1e-04)
    oModelMlp.compile(optimizer=oOptimizerMlp,
                             loss = fCalculateLoss
                            )

    oPredictiveModel = oModelMlp

### LSTM

In [None]:
if sModelType == 'LSTM':
    aInputDeepLstm = keras.Input(
        shape=(iBackwardTimeWindow, iNrInputFeatures))

    aW = keras.layers.LSTM(64, return_sequences = True)(aInputDeepLstm)
    aW = keras.layers.Flatten()(aW)
    aW = keras.layers.Dense(iForwardTimeWindow*iNrOutputFeatures)(aW)
    aW = keras.layers.Reshape((iForwardTimeWindow, iNrOutputFeatures))(aW)

    aOutputDeepLstm = aW
    oModelDeepLstm = keras.Model(
        inputs=aInputDeepLstm,
        outputs=aOutputDeepLstm
    )

    oOptimizerDeepLstm = tf.keras.optimizers.Adam(learning_rate=1e-04)
    oModelDeepLstm.compile(optimizer=oOptimizerDeepLstm,
                             loss = fCalculateLoss
                            )

    oPredictiveModel = oModelDeepLstm


### Convolutional Encoder-Decoder Model

In [None]:
if sModelType == 'Convolutional Encoder Decoder':
    aInputs = keras.Input(
        shape=(iBackwardTimeWindow, iNrInputFeatures))

    aEncoderHiddens, aFinalH, aFinalC = keras.layers.LSTM(iNrOfHiddenNeurons,
                                             return_state = True, 
                                             return_sequences = True
                                            )(aInputs)
    aFinalH = keras.layers.BatchNormalization()(aFinalH)
    aFinalC = keras.layers.BatchNormalization()(aFinalC)

    aFeatureMap = keras.layers.Conv1D(64, 2)(aEncoderHiddens)
    aFeatureMap = keras.layers.MaxPooling1D(2)(aFeatureMap)
    aFlatted = keras.layers.Flatten()(aFeatureMap)

    aDecoderInputs = keras.layers.RepeatVector(iForwardTimeWindow)(aFlatted)

    aDecoderHiddens = keras.layers.LSTM(iNrOfHiddenNeurons, 
                           return_state = False, 
                           return_sequences = True
                          )(aDecoderInputs, initial_state=[aFinalH, aFinalC])


    aOutputs = keras.layers.TimeDistributed(
        Dense(iNrOutputFeatures)
    )(aDecoderHiddens)

    oPredictiveModel = keras.Model(
        inputs=aInputs,
        outputs=aOutputs
    )

    oOptimizer = tf.keras.optimizers.Adam(learning_rate=1e-05)
    oPredictiveModel.compile(loss = fCalculateLoss,
                             optimizer=oOptimizer
                            )


### Luong's Attention Model

In [None]:
if sModelType == 'Luongs Attention':
    aEncoderInputs = keras.Input(
        shape=(iBackwardTimeWindow, iNrInputFeatures))

    aEncoderHiddens, aFinalH, aFinalC = keras.layers.LSTM(iNrOfHiddenNeurons,
                                             return_state = True, 
                                             return_sequences = True
                                            )(aEncoderInputs)
    aFinalH = keras.layers.BatchNormalization()(aFinalH)
    aFinalC = keras.layers.BatchNormalization()(aFinalC)

    aDecoderInputs = keras.layers.RepeatVector(iForwardTimeWindow)(aFinalH)

    aDecoderHiddens = keras.layers.LSTM(iNrOfHiddenNeurons, 
                           return_state = False, 
                           return_sequences = True
                          )(aDecoderInputs, initial_state=[aFinalH, aFinalC])

    aAttentions = keras.layers.dot([aDecoderHiddens, aEncoderHiddens], axes=[2, 2])
    aAttentions = keras.layers.Activation('softmax')(aAttentions)

    aContextVector = keras.layers.dot([aAttentions, aEncoderHiddens], axes=[2,1])
    aContextVector = keras.layers.BatchNormalization()(aContextVector)
    aContextVector = keras.layers.concatenate([aContextVector, aDecoderHiddens])

    aDecoderOutputs = keras.layers.TimeDistributed(
        keras.layers.Dense(iNrOutputFeatures)
    )(aContextVector)

    oPredictiveModel = keras.Model(
        inputs=aEncoderInputs,
        outputs=aDecoderOutputs
    )

    oOptimizer = tf.keras.optimizers.Adam(learning_rate=1e-05)
    oPredictiveModel.compile(loss = fCalculateLoss, 
                             optimizer=oOptimizer
                            )


## Plot Model Architecture

In [None]:
tf.keras.utils.plot_model(oPredictiveModel,  show_shapes=True)

## Fit Model

In [None]:
iEpochSize = 10000
dtStartTime = time.time()
oPredictiveModel.fit(
    aInputTrain, 
    aOutputTrain, 
    epochs=iEpochSize, 
    batch_size=iBatchSize, 
    verbose=0, 
    validation_data= (aInputValidation, aOutputValidation),
    validation_batch_size= iBatchSize
    ,callbacks=[oEarlyStop]
)
dtEndTime = time.time()
dtTrainingDuration = dtEndTime -dtStartTime

## Epoch History

In [None]:
plt.figure(figsize = (20,10))
dfHistory = pd.DataFrame(oPredictiveModel.history.history)
oFig = sns.lineplot(data = dfHistory)

## Test Model

In [None]:
aPrediction = oPredictiveModel.predict(aInputTest)
aPrediction = aPrediction.reshape((-1, iForwardTimeWindow * iNrOutputFeatures))
dfPrediction = pd.DataFrame(data = aPrediction, index = ixTest, columns = aIxOutputColumns)

aActual = aOutputTest.reshape((-1, iForwardTimeWindow * iNrOutputFeatures))
dfActual =  pd.DataFrame(data = aActual, index = ixTest, columns = aIxOutputColumns).copy()

# REFERENCES

https://www.tensorflow.org/guide/keras/train_and_evaluate#passing_data_to_multi-input_multi-output_models

https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch/

https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit/

https://towardsdatascience.com/customize-loss-function-to-make-lstm-model-more-applicable-in-stock-price-prediction-b1c50e50b16c

https://keras.io/getting_started/faq/

https://machinelearningmastery.com/how-to-develop-lstm-models-for-multi-step-time-series-forecasting-of-household-power-consumption/

https://www.tensorflow.org/tutorials/structured_data/time_series

https://towardsdatascience.com/encoder-decoder-model-for-multistep-time-series-forecasting-using-pytorch-5d54c6af6e60

https://levelup.gitconnected.com/building-seq2seq-lstm-with-luong-attention-in-keras-for-time-series-forecasting-1ee00958decb