In [5]:
import pandas as pd

import numpy as np

from datetime import datetime

import pytz
import sys
import pickle

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.utils import shuffle
from sklearn import metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, max_error, median_absolute_error, r2_score, explained_variance_score

import seaborn as sns

import plotly.graph_objects as go

from Long_Short_Term_Memory import Long_Short_Term_Memory

from Optimize_Portfolio import PortfolioManagement

import MetaTrader5 as mt5

import ta

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import GlobalMaxPooling1D
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import LSTM
from tensorflow.keras import regularizers

In [58]:
gc_o_TIME_ZONE = pytz.timezone("Etc/UTC")
gc_dt_FROM = datetime(2019, 1, 1, tzinfo=gc_o_TIME_ZONE)
gc_dt_TO = datetime(2021, 11, 10, tzinfo=gc_o_TIME_ZONE)

gc_i_BACKWARD_TIME_WINDOW = -1
gc_i_FORWARD_TIME_WINDOW = 5

gc_dec_TRAINING_RATIO = 0.6
gc_dec_VALIDATION_RATIO = 0.2
gc_dec_TEST_RATIO = 0.2

g_aBackwardTimeSteps = range(gc_i_BACKWARD_TIME_WINDOW, 0)
g_aForwardTimeSteps = range(gc_i_FORWARD_TIME_WINDOW)

g_aInputFeatures = set(['open', 'high', 'low', 'close', 'spread' ,'tick_volume'])

In [7]:
def ConvertSpreadValues(dfRates, aSymbolInfo):
    iDigits = aSymbolInfo.digits
    dfRates['spread'] = dfRates['spread'] * pow(10, -iDigits)

In [8]:
def dfShiftTimeSteps(dfRates, aTimeSteps):
    
    lstColumnNames = list([])
    for iTimeStep in aTimeSteps:
        for tplCol in dfRates.columns:
            lstColumnNames.append((iTimeStep, ) + tplCol)
    
    
    lstIndexNames = ("Time Step",) +  tuple(dfRates.columns.names)
    
    dicColumnIndices = pd.MultiIndex.from_tuples(
        lstColumnNames,
        names = lstIndexNames
        )


    dfShiftedRates = pd.DataFrame(
        columns=dicColumnIndices, 
        index=dfRates.index)
    
    

    for i in aTimeSteps:
        dfShiftedRates[i] = dfRates.shift(-i)

    dfShiftedRates.dropna(inplace=True)

    return dfShiftedRates

In [9]:
def dfGetMarketData(sSymbol):

    if not mt5.initialize():
        print("initialize() failed, error code =", mt5.last_error())
        sys.exit()

    aSymbolInfo = mt5.symbol_info(sSymbol)
    if not aSymbolInfo:
        print("symbol_info() failed, error code =", mt5.last_error())
        sys.exit()

    aRates = mt5.copy_rates_range(
        sSymbol, mt5.TIMEFRAME_H1, gc_dt_FROM, gc_dt_TO)
    if len(aRates) == 0:
        print("copy_rates_range() failed, error code =", mt5.last_error())
        sys.exit()

    mt5.shutdown()

    dfRates = pd.DataFrame(aRates)

    dfRates['time'] = pd.to_datetime(dfRates['time'], unit='s')
    dfRates.set_index('time', inplace=True)
    dfRates.drop('real_volume', axis=1, inplace=True)

    ConvertSpreadValues(dfRates, aSymbolInfo)
    AddSeasonalFeatures(dfRates)
    AddReturns(dfRates)
#     dfRates = dfAddTechnicalIndicators(dfRates)

    dfRates.columns  = pd.MultiIndex.from_product(
        [[sSymbol], dfRates.columns], 
        names=["Time Series", "Feature"])
    
    
    return dfRates

In [10]:
def dfAddTechnicalIndicators(dfRates):
    global g_aInputFeatures 
    
    
    iTimeWindow = 24
    
    dfHigh = dfRates["high"]
    dfLow = dfRates["low"]
    dfClose = dfRates["close"]
    
    # Average Dricetional Movement Index
    oAdx = ta.trend.ADXIndicator(dfHigh, dfLow, dfClose, iTimeWindow, False)
    
    dfAdx = oAdx.adx()
    dfAdx.drop(dfAdx[dfAdx == 0].index, inplace = True)
    g_aInputFeatures.add(dfAdx.name)
    
    dfAdxNeg = oAdx.adx_neg()
    dfAdxNeg.drop(dfAdxNeg[dfAdxNeg == 0].index, inplace = True)
    g_aInputFeatures.add(dfAdxNeg.name)
    
    
    dfAdxPos = oAdx.adx_pos()
    dfAdxPos.drop(dfAdxPos[dfAdxPos == 0].index, inplace = True)
    g_aInputFeatures.add(dfAdxPos.name)
    
    
    dfRates = dfRates.join(dfAdx, how = "inner")
    dfRates = dfRates.join(dfAdxNeg, how = "inner")
    dfRates = dfRates.join(dfAdxPos, how = "inner")

    
    # Aroon Indicator
    oAroon = ta.trend.AroonIndicator(dfClose, iTimeWindow, False)
    dfAroonDown = oAroon.aroon_down()
    dfAroonDown.dropna(inplace = True)
    g_aInputFeatures.add(dfAroonDown.name)
    
    dfAroonIndicator = oAroon.aroon_indicator() 
    dfAroonIndicator.dropna(inplace = True)
    g_aInputFeatures.add(dfAroonIndicator.name)

    
    dfAroonUp = oAroon.aroon_up()
    dfAroonUp.dropna(inplace = True)
    g_aInputFeatures.add(dfAroonUp.name)
    
    dfRates = dfRates.join(dfAroonDown, how = "inner")
    dfRates = dfRates.join(dfAroonIndicator, how = "inner")
    dfRates = dfRates.join(dfAroonUp, how = "inner")
    
    
    # Commodity Channel Index
    oCci = ta.trend.CCIIndicator(dfHigh, dfLow,dfClose, iTimeWindow)
    dfCci = oCci.cci()
    dfCci.dropna(inplace = True)
    g_aInputFeatures.add(dfCci.name)
    
    dfRates = dfRates.join(dfCci, how = "inner")
    
    
    # Detrended Price Oscillator (DPO)
    oDpo = ta.trend.DPOIndicator(dfClose, iTimeWindow)
    dfDpo = oDpo.dpo()
    dfDpo.dropna(inplace = True)
    g_aInputFeatures.add(dfDpo.name)    
    
    dfRates = dfRates.join(dfDpo, how = "inner")
    
    
    # EMA - Exponential Moving Average
    oEma = ta.trend.EMAIndicator(dfClose, iTimeWindow)
    dfEma = oEma.ema_indicator()
    dfEma.dropna(inplace = True)
    g_aInputFeatures.add(dfEma.name)
    
    dfRates = dfRates.join(dfEma, how = "inner")
    
    
    return dfRates

In [11]:
def AddSeasonalFeatures(dfRates):
    global g_aInputFeatures 
    
    c_a_SEASONAL_FEATURES = ["year", "month", "day", "dayofweek", "hour"]
    for sSeasonalFeature in c_a_SEASONAL_FEATURES:
        exec("dfRates[sSeasonalFeature] = dfRates.index." + sSeasonalFeature)
        g_aInputFeatures.add(sSeasonalFeature)

In [12]:
def AddReturns(dfRates):
    dfRates["return"] = (dfRates["open"] - dfRates["close"])/dfRates["open"]

In [13]:
def dfOversampleImbalancedData(dfX, dfY):
    
#     oOversample = SMOTE()
#     aX, aY = oOversample.fit_resample(dfX.values, dfY.values)
    
#     dfX = pd.DataFrame(data = aX, columns = dfX.columns)
#     dfY = pd.DataFrame(data = aY, columns = dfY.columns)
    
    dfXCopy = dfX.copy()
    dfYCopy = dfY.copy()
        
    dfCombinations = dfYCopy.astype(str).agg('-'.join, axis=1)
    dfCombinationsStats = dfCombinations.value_counts()
    dfCombinationsStats = pd.DataFrame(dfCombinationsStats).reset_index()
    
    
    iMaxAmount = dfCombinationsStats.iloc[0,1]
    for i in range(1, len(dfCombinationsStats) ):
        
        sCombination = dfCombinationsStats.iloc[i, 0]
        iSamplesNeeded = iMaxAmount - dfCombinationsStats.iloc[i, 1]
        
        dfSampledIndex =  dfCombinations[dfCombinations == sCombination].sample(iSamplesNeeded, replace = True).index
        
        dfSampledX = dfXCopy.loc[dfSampledIndex]
        dfSampledY = dfYCopy.loc[dfSampledIndex]
        
    
        dfX = dfX.append(dfSampledX , ignore_index= True)
        dfY = dfY.append(dfSampledY , ignore_index= True)
        
    
    dfX,dfY = shuffle(dfX,dfY )
    

    return dfX, dfY

In [14]:
def dfSplitData(dfInput, dfOutput):
    dfInputTrainValidation, dfInputTest, dfOutputTrainValidation, dfOutputTest = train_test_split(
        dfInput,
        dfOutput,
        test_size=gc_dec_TEST_RATIO,
        shuffle=False)

    dfInputTrain, dfInputValidation, dfOutputTrain, dfOutputValidation = train_test_split(
        dfInputTrainValidation,
        dfOutputTrainValidation,
        test_size=(1/(1 -gc_dec_TEST_RATIO))-1,
        shuffle=False)
    
    
    dfInputTrain = dfInputTrain.astype(float)
    dfInputValidation = dfInputValidation.astype(float)
    dfInputTest = dfInputTest.astype(float)
    dfOutputTrain = dfOutputTrain.astype(float)
    dfOutputValidation = dfOutputValidation.astype(float)
    dfOutputTest = dfOutputTest.astype(float)
    
    return dfInputTrain, dfInputValidation, dfInputTest, dfOutputTrain, dfOutputValidation, dfOutputTest

In [15]:
def dfScaleData(sScalerName,dfTrain,dfValidation, dfTest, bIsStandard = True):
    sScalersDirectory = os.path.join(sSubModelName , "__scalers__")
    if bIsStandard == True:
        oScaler = StandardScaler()
    else:
        oScaler = MinMaxScaler()

    oScaler.fit(dfTrain)

    aScaledTrain = oScaler.transform(dfTrain)
    aScaledValidation = oScaler.transform(dfValidation)
    aScaledTest = oScaler.transform(dfTest)

    dfScaledTrain = pd.DataFrame(aScaledTrain, columns = dfTrain.columns, index = dfTrain.index)
    dfScaledValidation = pd.DataFrame(aScaledValidation, columns = dfValidation.columns, index = dfValidation.index)
    dfScaledTest = pd.DataFrame(aScaledTest, columns = dfTest.columns, index = dfTest.index)

    sScalerFilePath =os.path.join(sScalersDirectory, sScalerName + ".sav")
    os.makedirs(os.path.dirname(sScalerFilePath), exist_ok=True)
    pickle.dump(oScaler, open(sScalerFilePath, 'wb'))
    
    
    return dfScaledTrain, dfScaledValidation, dfScaledTest

In [40]:
aLimits = [0.00003, 0.0005, 0.0025, 0.005]
aNearLimits = [0.0001]

In [41]:
def iLen(dfT):
    return dfT.shape[0]

def fOp(dfCi):
    return dfCi["open"]

def fHp(dfCi):
    return dfCi["high"]

def fLp(dfCi):
    return dfCi["low"]

def fCp(dfCi):
    return dfCi["close"]

def fHb(dfCi):
    return abs(fCp(dfCi)-fOp(dfCi))

def iId(dfS):
    return dfS.iloc[0].name

def fTpBody(dfCi):
    return max(fOp(dfCi), fCp(dfCi))

def fBmBody(dfCi):
    return min(fOp(dfCi), fCp(dfCi))

def fUs(dfCi):
    return fHp(dfCi) - fTpBody(dfCi)

def fLs(dfCi):
    return fBmBody(dfCi) - fLp(dfCi)

def fHs(dfCi):
    return fUs(dfCi) + fLs(dfCi)

def fAp(dfSTC):
    return dfSTC["close"].mean()

def iPt(dfTC):
    if fAp(dfTC.iloc[0:5]) < fAp(dfTC.iloc[1:6]):
        if fAp(dfTC.iloc[2:7]) < fAp(dfTC.iloc[3:8]):
            return 1
    if fAp(dfTC.iloc[0:5]) > fAp(dfTC.iloc[1:6]):
        if fAp(dfTC.iloc[2:7]) > fAp(dfTC.iloc[3:8]):
            return -1
        
    return 0
    
def bSliGreater(fX, fY):
    fRatio = abs((fX-fY)/fY)
    if aLimits[0]<= fRatio and fRatio<aLimits[1]:
        return True
    else:
        return False
    
def bModGreater(fX, fY):
    fRatio = abs((fX-fY)/fY)
    if aLimits[1]<= fRatio and fRatio<aLimits[2]:
        return True
    else:
        return False

def bLarGreater(fX, fY):
    fRatio = abs((fX-fY)/fY)
    if aLimits[2]<= fRatio and fRatio<aLimits[3]:
        return True
    else:
        return False
    
def bExtGreater(fX, fY):
    fRatio = abs((fX-fY)/fY)
    if fRatio>=aLimits[3]:
        return True
    else:
        return False

    
def bSliLess(fX, fY):
    fRatio = abs((fY-fX)/fX)
    if aLimits[0]<= fRatio and fRatio<aLimits[1]:
        return True
    else:
        return False
    

def bModLess(fX, fY):
    fRatio = abs((fY-fX)/fX)
    if aLimits[1]<= fRatio and fRatio<aLimits[2]:
        return True
    else:
        return False

def bLarLess(fX, fY):
    fRatio = abs((fY-fX)/fX)
    if aLimits[2]<= fRatio and fRatio<aLimits[3]:
        return True
    else:
        return False
    
def bExtLess(fX, fY):
    fRatio = abs((fY-fX)/fX)
    if fRatio>=aLimits[3]:
        return True
    else:
        return False
    
def bExtNear(fX, fY):
    fRatio = abs((abs(fX-fY))/max(fX,fY))
    if fRatio<=aLimits[0]:
        return True
    else:
        return False
    
    
def bModNear(fX, fY):
    fRatio = abs((abs(fX-fY))/max(fX,fY))
    if aLimits[0]<= fRatio and fRatio<aLimits[1]:
        return True
    else:
        return False
    
def bNear(fX, fY):
    fRatio = abs((abs(fX-fY))/max(fX,fY))
    if 0<= fRatio and fRatio<aLimits[1]:
        return True
    else:
        return False
    
def bNearUp(fX, fY):
    fRatio = abs((fY-fX)/fY)
    if aNearLimits[0]<= fRatio and fRatio<aLimits[0]:
        return True
    else:
        return False
    
def bNearDown(fX, fY):
    fRatio = abs((fX-fY)/fX)
    if aNearLimits[0]<= fRatio and fRatio<aLimits[0]:
        return True
    else:
        return False
    
def dfGetElement(dfT, sEle):
    return dfT.apply(sEle, axis = 1)

def fAvgGetElement(dfT, sEle):
    return dfT.apply(sEle, axis = 1).mean()

def bDoji(dfCi):
    return bExtNear(fOp(dfCi), fCp(dfCi))

def bSmallBody(dfCi):
    return bSliLess(fBmBody(dfCi), fTpBody(dfCi))

def bNormalBody(dfCi):
    return bModLess(fBmBody(dfCi), fTpBody(dfCi))

def bLongBody(dfCi):
    return bLarLess(fBmBody(dfCi), fTpBody(dfCi))

def bElBody(dfCi):
    return bExtLess(fBmBody(dfCi), fTpBody(dfCi))

def bNoUs(dfCi):
    return bExtNear(fHp(dfCi), fTpBody(dfCi))

def bSmallUs(dfCi):
    return bSliGreater(fHp(dfCi), fTpBody(dfCi))

def bNormalUs(dfCi):
    return bModGreater(fHp(dfCi), fTpBody(dfCi))

def bLongUs(dfCi):
    return bLarGreater(fHp(dfCi), fTpBody(dfCi))

def bElUs(dfCi):
    return bExtGreater(fHp(dfCi), fTpBody(dfCi))

def bNoLs(dfCi):
    return bExtNear(fLp(dfCi), fBmBody(dfCi))

def bSmallLs(dfCi):
    return bSliLess(fLp(dfCi), fBmBody(dfCi))

def bNormalLs(dfCi):
    return bModLess(fLp(dfCi), fBmBody(dfCi))

def bLongLs(dfCi):
    return bLarLess(fLp(dfCi), fBmBody(dfCi))

def bElLs(dfCi):
    return bExtLess(fLp(dfCi), fBmBody(dfCi))

def bBlackBody(dfCi):
    return fOp(dfCi)> fCp(dfCi)

def bWhiteBody(dfCi):
    return fOp(dfCi)< fCp(dfCi)

def bSmallBlackBody(dfCi):
    return bSmallBody(dfCi) and bBlackBody(dfCi)

def bSmallWhiteBody(dfCi):
    return bSmallBody(dfCi) and bWhiteBody(dfCi)

def bNormalBlackBody(dfCi):
    return bNormalBody(dfCi) and bBlackBody(dfCi)

def bNormalWhiteBody(dfCi):
    return bNormalBody(dfCi) and bWhiteBody(dfCi)

def bLongBlackBody(dfCi):
    return bLongBody(dfCi) and bBlackBody(dfCi)

def bLongWhiteBody(dfCi):
    return bLongBody(dfCi) and bWhiteBody(dfCi)

def bElBlackBody(dfCi):
    return bElBody(dfCi) and bBlackBody(dfCi)

def bElWhiteBody(dfCi):
    return bElBody(dfCi) and bWhiteBody(dfCi)

def bDownShadowGap(dfCi, dfCj):
    return fLp(dfCi)>fHp(dfCj)

def bUpShadowGap(dfCi, dfCj):
    return fHp(dfCi)<fLp(dfCj)

def bDownBodyGap(dfCi, dfCj):
    return fBmBody(dfCi)>fTpBody(dfCj)

def bUpBodyGap(dfCi, dfCj):
    return fTpBody(dfCi)<fTpBody(dfCj)




In [42]:
def bMarubozuBlack(dfCi):
    return bNoUs(dfCi) and bLongBlackBody(dfCi) and bNoLs(dfCi)
    
def bMarubozuWhite(dfCi):
    return bNoUs(dfCi) and bLongWhiteBody(dfCi) and bNoLs(dfCi)

def bBeltHoldBullish(dfCi, dfTC):
    return iPt(dfTC) == -1 and bLongWhiteBody(dfCi) and bNoLs(dfCi) and bModNear(fCp(dfCi), fHp(dfCi))

def bMarubozuClosingBlack(dfCi):
    return bLongBlackBody(dfCi) and bNoUs(dfCi) == False and bNoLs(dfCi)

def bMarubozuOpeningWhite(dfCi):
    return bLongWhiteBody(dfCi) and bNoUs(dfCi) == False and bNoLs(dfCi)

def bShootingStarOneCandle(dfCi, dfTC):
    return iPt(dfTC) == 1 and bLongUs(dfCi) and (fUs(dfCi)> (2*fHb(dfCi))) and bSmallBody(dfCi) and bNoLs(dfCi)

def bDojiGravestone(dfCi):
    return bDoji(dfCi) and bNoLs(dfCi) and bLongUs(dfCi)

def bBeltHoldBearish(dfCi, dfTC):
    return iPt(dfTC) == 1 and bNoUs(dfCi) and bSmallLs(dfCi) and bLongBlackBody(dfCi)

def bDojiDragonfly(dfCi):
    return bDoji(dfCi) and bSmallUs(dfCi) and bLongLs(dfCi)

def bHammer(dfCi, dfTC):
    return iPt(dfTC) == -1 and bSmallBody(dfCi) and bNoLs(dfCi) == False and (2*fHb(dfCi))< fLs(dfCi) and fLs(dfCi) < (3*fHb(dfCi)) and ((bSmallUs(dfCi)) or (bNoUs(dfCi)))

def bHangingMan(dfCi, dfTC):
    return iPt(dfTC) == 1 and bNoUs(dfCi) and bLongLs(dfCi) and bSmallBody(dfCi)
    
def bMarubozuOpeningBlack(dfCi):
    return bLongBlackBody(dfCi) and bNoLs(dfCi) == False and bNoUs(dfCi)

def bMarubozuClosingWhite(dfCi):
    return bLongWhiteBody(dfCi) and bNoUs(dfCi) and bNoLs(dfCi) == False

def bTakuriLine(dfCi, dfTC):
    return iPt(dfTC) == -1 and bSmallBody(dfCi) and bNoUs(dfCi) and (fLs(dfCi) > (3*fHb(dfCi)))

def bCandleBlack(dfCi):
    return bNormalBlackBody(dfCi) and bNoUs(dfCi) == False and bNoLs(dfCi) == False and (fUs(dfCi)<fHb(dfCi)) and (fLs(dfCi)<fHb(dfCi))

def bCandleShortBlack(dfCi):
    return bSmallBlackBody(dfCi) and bNoUs(dfCi) == False and bNoLs(dfCi) == False and (fUs(dfCi)< fHb(dfCi)) and (fLs(dfCi)<fHb(dfCi))

def bCandleWhite(dfCi):
    return bNormalWhiteBody(dfCi) and bNoUs(dfCi) == False and bNoLs(dfCi) == False and (fUs(dfCi)< fHb(dfCi)) and (fLs(dfCi)<fHb(dfCi))

def bCandleShortWhite(dfCi):
    return bSmallWhiteBody(dfCi) and bNoUs(dfCi) == False and bNoLs(dfCi) == False and (fUs(dfCi)< fHb(dfCi)) and (fLs(dfCi)<fHb(dfCi))

def bDojiGappingDown(dfCi, dfTC):
    return iPt(dfTC) == -1 and bDoji(dfCi) and bDownShadowGap(dfTC.iloc[-1], dfCi)

def bDojiGappingUp(dfCi, dfTC):
    return iPt(dfTC) == 1 and bDoji(dfCi) and bUpShadowGap(dfTC.iloc[-1], dfCi)

def bDojiLongLegged(dfCi):
    return bDoji(dfCi) and bLongUs(dfCi) and bLongLs(dfCi)

def bDojiNorthern(dfCi, dfTC):
    return iPt(dfTC) == 1 and bDoji(dfCi)

def bDojiSouthern(dfCi, dfTC):
    return iPt(dfTC) == -1 and bDoji(dfCi)
    
def bHighWave(dfCi):
    return bElUs(dfCi) and bElLs(dfCi) and bSmallBody(dfCi)

def bLongBlackDay(dfCi, dfT):
    return bLongBlackBody(dfCi) and ((fHb(dfCi) > (3*fAvgGetElement(dfT.iloc[-7:],fHb))) or (fHb(dfCi) > (3*fAvgGetElement(dfT.iloc[-14:],fHb)))) and fUs(dfCi) < fHb(dfCi) and fLs(dfCi) < fHb(dfCi)

def bLongWhiteDay(dfCi, dfT):
    return bLongWhiteBody(dfCi) and ((fHb(dfCi) > (3*fAvgGetElement(dfT.iloc[-7:],fHb))) or (fHb(dfCi) > (3*fAvgGetElement(dfT.iloc[-14:],fHb)))) and fUs(dfCi) < fHb(dfCi) and fLs(dfCi) < fHb(dfCi)
    
    
def bRickshawMan(dfCi):
    return bDoji(dfCi) and bNear( 0.5 * (fOp(dfCi) + fCp(dfCi)) , 0.5 * (fHp(dfCi) + fLp(dfCi))) and bElLs(dfCi) and bElUs(dfCi)

def bSpinningTopBlack(dfCi):
    return bSmallBlackBody(dfCi) and fUs(dfCi) > fHb(dfCi) and fLs(dfCi) > fHb(dfCi) and bNoLs(dfCi) == False and bNoUs(dfCi) == False


def bSpinningTopWhite(dfCi):
    return bSmallWhiteBody(dfCi) and fUs(dfCi) > fHb(dfCi) and fLs(dfCi) > fHb(dfCi) and bNoLs(dfCi) == False and bNoUs(dfCi) == False





In [43]:
dfRates = dfGetMarketData("EURUSD")
dfRates = dfRates.loc[:, "EURUSD"]

In [63]:
dfOutput = pd.DataFrame(index = dfRates.index)
for i in g_aForwardTimeSteps:
    dfOutput[i] = dfRates["return"].shift(-i)

In [65]:
dfOutput

Unnamed: 0_level_0,0,1,2,3,4
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-01-02 06:00:00,0.000061,-0.002884,-0.000645,0.000766,0.002483
2019-01-02 07:00:00,-0.002884,-0.000645,0.000766,0.002483,0.001223
2019-01-02 08:00:00,-0.000645,0.000766,0.002483,0.001223,-0.000245
2019-01-02 09:00:00,0.000766,0.002483,0.001223,-0.000245,0.000271
2019-01-02 10:00:00,0.002483,0.001223,-0.000245,0.000271,0.002519
...,...,...,...,...,...
2021-11-09 20:00:00,0.000138,-0.000371,-0.000129,0.000267,-0.000026
2021-11-09 21:00:00,-0.000371,-0.000129,0.000267,-0.000026,
2021-11-09 22:00:00,-0.000129,0.000267,-0.000026,,
2021-11-09 23:00:00,0.000267,-0.000026,,,


In [47]:
# dfOhlc = dfRates.copy()

# iLoc = 0 
# for dtIndex, dfCi in dfOhlc.iterrows():
    
#     dfOhlc.loc[dtIndex, "BARUBOZU_BLACK"] = bMarubozuBlack(dfCi)
#     dfOhlc.loc[dtIndex, "BARUBOZU_WHITE"] = bMarubozuWhite(dfCi)
    
#     if iLoc > 7:
#         dfTC = dfOhlc.iloc[iLoc-7:iLoc+1]
        
#         dfOhlc.loc[dtIndex, "BELT_HOLD_BULLISH"] = bBeltHoldBullish(dfCi, dfTC)
#         dfOhlc.loc[dtIndex, "SHOOTING_STAR_ONE_CANDLE"] = bShootingStarOneCandle(dfCi, dfTC)
#         dfOhlc.loc[dtIndex, "BELT_HOLD_BEARISH"] = bBeltHoldBearish(dfCi, dfTC)
#         dfOhlc.loc[dtIndex, "HAMMER"] = bHammer(dfCi, dfTC)
#         dfOhlc.loc[dtIndex, "HANGING_BODY"] = bHangingMan(dfCi, dfTC)
#         dfOhlc.loc[dtIndex, "TAKURI_LINE"] = bTakuriLine(dfCi, dfTC)
#         dfOhlc.loc[dtIndex, "DOJI_GAPPING_DOWN"] = bDojiGappingDown(dfCi, dfTC)
#         dfOhlc.loc[dtIndex, "DOJI_GAPPING_UP"] = bDojiGappingUp(dfCi, dfTC)
#         dfOhlc.loc[dtIndex, "DOJI_NORTHERN"] = bDojiNorthern(dfCi, dfTC)
#         dfOhlc.loc[dtIndex, "DOJI_SOUTHERN"] = bDojiSouthern(dfCi, dfTC)
    
#         if iLoc > 13:
#             dfT = dfOhlc.iloc[iLoc-13:iLoc+1]
#             dfOhlc.loc[dtIndex, "LONG_BLACK_DAY"] = bLongBlackDay(dfCi, dfT)
#             dfOhlc.loc[dtIndex, "LONG_WHITE_DAY"] = bLongWhiteDay(dfCi, dfT)
            
        
        
#     dfOhlc.loc[dtIndex, "MARUBOZU_CLOSING_BLACK"] = bMarubozuClosingBlack(dfCi)
#     dfOhlc.loc[dtIndex, "MARUBOZU_OPENING_WHITE"] = bMarubozuOpeningWhite(dfCi)
#     dfOhlc.loc[dtIndex, "DOJI_GRAVESTONE"] = bDojiGravestone(dfCi)
#     dfOhlc.loc[dtIndex, "DOJI_DRAGONFLY"] = bDojiDragonfly(dfCi)
#     dfOhlc.loc[dtIndex, "MARUBOZU_OPENING_BLACK"] = bMarubozuOpeningBlack(dfCi)
#     dfOhlc.loc[dtIndex, "MARUBOZU_CLOSING_WHITE"] = bMarubozuClosingWhite(dfCi)
#     dfOhlc.loc[dtIndex, "CANDLE_BLACK"] = bCandleBlack(dfCi)
#     dfOhlc.loc[dtIndex, "CANDLE_SHORT_BLACK"] = bCandleShortBlack(dfCi)
#     dfOhlc.loc[dtIndex, "CANDLE_WHITE"] = bCandleWhite(dfCi)
#     dfOhlc.loc[dtIndex, "CANDLE_SHORT_WHITE"] = bCandleShortWhite(dfCi)
#     dfOhlc.loc[dtIndex, "DOJI_LONG_LEGGED"] = bDojiLongLegged(dfCi)
#     dfOhlc.loc[dtIndex, "HIGH_WAVE"] = bHighWave(dfCi)
#     dfOhlc.loc[dtIndex, "RICKSHAW_MAN"] = bRickshawMan(dfCi)
#     dfOhlc.loc[dtIndex, "SPINNING_TOP_BLACK"] = bSpinningTopBlack(dfCi)
#     dfOhlc.loc[dtIndex, "SPINNING_TOP_WHITE"] = bSpinningTopWhite(dfCi)
    
    
#     iLoc = iLoc + 1


In [48]:
for sColumn in dfOhlc.iloc[:, 12:].columns:
    print(sColumn)
    print(dfOhlc[dfOhlc[sColumn] == True].shape)


BARUBOZU_BLACK
(0, 29)
BARUBOZU_WHITE
(0, 29)
MARUBOZU_CLOSING_BLACK
(3, 29)
MARUBOZU_OPENING_WHITE
(10, 29)
DOJI_GRAVESTONE
(0, 29)
DOJI_DRAGONFLY
(1, 29)
MARUBOZU_OPENING_BLACK
(20, 29)
MARUBOZU_CLOSING_WHITE
(4, 29)
CANDLE_BLACK
(1934, 29)
CANDLE_SHORT_BLACK
(1112, 29)
CANDLE_WHITE
(2024, 29)
CANDLE_SHORT_WHITE
(1216, 29)
DOJI_LONG_LEGGED
(0, 29)
HIGH_WAVE
(0, 29)
RICKSHAW_MAN
(0, 29)
SPINNING_TOP_BLACK
(1460, 29)
SPINNING_TOP_WHITE
(1448, 29)


In [None]:
sSymbol = "USDCAD"
aRelevantSymbols = []

In [None]:
dfRates = dfGetMarketData(sSymbol)

for sRelevantSymbol in aRelevantSymbols:
    dfRelevantRates = dfGetMarketData(sRelevantSymbol)
    dfRates = dfRates.join(dfRelevantRates, how = "inner")

In [None]:
dfInput  = dfRates.loc[:, dfRates.columns.get_level_values(1).isin(g_aInputFeatures)]
dfInput = dfShiftTimeSteps(dfInput, g_aBackwardTimeSteps)

In [None]:
i = 0
j = 1

In [None]:
aColumns = pd.MultiIndex.from_tuples(
    [(i,j, "Regression")],
    names = ["From","To", "Model"])

In [None]:
dfOutput = pd.DataFrame(index = dfRates.index, 
                        columns = aColumns)

    
dfSpread = dfRates[sSymbol]["spread"]
dfOpen = dfRates[sSymbol]["open"].shift(-i)
dfClose = dfRates[sSymbol]["close"].shift(-j)

dfNetReturn = (abs(dfClose - dfOpen) - dfSpread)
dfReturn = (dfClose - dfOpen)/dfOpen

In [None]:
dfIndexInvestable = dfNetReturn[dfNetReturn>0].index
dfIndexNonInvestable = dfNetReturn[dfNetReturn<=0].index

dfIndexUpward = dfReturn[dfReturn>0].index
dfIndexDownward = dfReturn[dfReturn<=0].index

# DEEP LEARNING MODEL

In [None]:
sModelName = os.path.join(sSymbol , "__deep learning model__")

In [None]:
sSubModelName = os.path.join(sModelName , "__"+ str(i) +"  " + str(j) + "__")

## Preprocessing

In [None]:
dfOutput.loc[:,(i,j,"Regression")] = dfClose.loc[dfOutput.index]

# dfOutput.loc[dfIndexInvestable,(i,j,"Investability")] = 1
# dfOutput.loc[dfIndexNonInvestable,(i,j,"Investability")] = 0

# dfOutput.loc[dfIndexUpward,(i,j,"Directional")] = 1
# dfOutput.loc[dfIndexDownward,(i,j,"Directional")] = 0

In [None]:
dfOutput.head()

### Remove Missing Data

In [None]:
dfMerged =pd.merge(dfInput, dfOutput, left_index=True, right_index=True)
dfMerged.dropna(inplace = True)
dfInput = dfMerged[dfInput.columns]
dfOutput= dfMerged[dfOutput.columns]

dfOutput = dfOutput.astype("float64")

### Split Data

In [None]:
dfInputTrain, dfInputValidation, dfInputTest, dfOutputTrain, dfOutputValidation, dfOutputTest = dfSplitData(dfInput, 
                                                                                                            dfOutput)

### Scale Data

In [None]:
dfScaledInputTrain, dfScaledInputValidation, dfScaledInputTest = dfScaleData("input", 
                                                                             dfInputTrain,
                                                                             dfInputValidation, 
                                                                             dfInputTest)

dfScaledOutputTrain, dfScaledOutputValidation, dfScaledOutputTest = dfScaleData("output", 
                                                                                dfOutputTrain,
                                                                                dfOutputValidation, 
                                                                                dfOutputTest,
                                                                               False)

## Train

### Set Hyperparameters

In [None]:
iBatchSize = 32

oLrSchedule = keras.optimizers.schedules.ExponentialDecay(
    1e-05, decay_steps=100000, decay_rate=0.50, staircase=True
)


oOptimizer = tf.keras.optimizers.Adam(learning_rate=1e-05 , beta_1=0.9)

oEarlyStop = EarlyStopping(
    monitor = 'val_loss', 
    mode = 'min', 
    verbose = 0 , 
    patience = 20, 
    restore_best_weights = True)
iEpochSize = 10000




### Build Model

In [None]:
oInputRates = keras.Input(
    shape=(
        dfScaledInputTrain.shape[1]), 
    name="aRates")

aW = Dense((100))(oInputRates)
aOutputRegression = Dense((100))(aW)

aOutputRegression = Dense(1, name = "Regression", activation = "relu")(aOutputRegression)

oPredictiveModel = keras.Model(
    inputs=oInputRates, 
    outputs=aOutputRegression
)

### Loss Function

### Compile Model

In [None]:
oPredictiveModel.compile(optimizer=oOptimizer,
                         loss = tf.keras.losses.MeanSquaredError()
#                          loss ={
#                             "Regression": tf.keras.losses.MeanSquaredError(),
#                         }
                        )


### Fit Model

In [None]:
oPredictiveModel.fit(
    dfScaledInputTrain, 
    dfScaledOutputTrain, 
    epochs=iEpochSize, 
    batch_size=iBatchSize, 
    verbose=1, 
    validation_data= (dfScaledInputValidation, dfScaledOutputValidation),
    validation_batch_size= iBatchSize,
    callbacks=[oEarlyStop]
)

oPredictiveModel.save_weights(sSubModelName)

pd.DataFrame(oPredictiveModel.history.history).plot()

## Test

In [None]:
oPredictiveModel.load_weights(sSubModelName)

aPrediction = oPredictiveModel.predict(dfScaledInputTest)

# dfPrediction.iloc[:,0] = aPrediction[0]
# dfPrediction.loc[dfPrediction.iloc[:,0] <= 0.5] = 0
# dfPrediction.loc[dfPrediction.iloc[:,0] > 0.5] = 1

# dfPrediction.iloc[:,1] = aPrediction[1]
# dfPrediction.loc[dfPrediction.iloc[:,1] <= 0.5] = 0
# dfPrediction.loc[dfPrediction.iloc[:,1] > 0.5] = 1

# dfPrediction.iloc[:,2] = aPrediction[2]

sOutputScalerPath = os.path.join(sSubModelName , "__scalers__")
sOutputScalerPath = os.path.join(sOutputScalerPath , "output" + ".sav")
oScalerOutput = pickle.load(open(sOutputScalerPath, 'rb'))
aPrediction = oScalerOutput.inverse_transform(aPrediction)

dfPrediction = pd.DataFrame(aPrediction, index = dfScaledOutputTest.index)
dfPrediction.columns = aColumns

In [None]:
dfTestComparision =pd.DataFrame(dfPrediction.iloc[:,0])
dfTestComparision = dfTestComparision.join(dfOutputTest.iloc[:,0], how = "inner", lsuffix="prediction")
dfTestComparision.columns = ["Prediction", "Actual"]

sns.scatterplot(data = dfTestComparision, x = "Actual", y ="Prediction")

In [None]:
iFrom = 90
iTo = 100
print(r2_score(dfOutputTest.iloc[iFrom:iTo, 0], dfPrediction.iloc[iFrom:iTo, 0]))
sns.lineplot(data = dfTestComparision.iloc[iFrom:iTo])

In [None]:
y_pred = dfPrediction.iloc[:, 0]
y_true = dfOutputTest.iloc[:, 0]
aMetrics = [
        ('mean absolute error', mean_absolute_error(y_true, y_pred)),
        ('median absolute error', median_absolute_error(y_true, y_pred)),
        ('mean squared error', mean_squared_error(y_true, y_pred)),
        ('max error', max_error(y_true, y_pred)),
        ('r2 score', r2_score(y_true, y_pred)),
        ('explained variance score', explained_variance_score(y_true, y_pred))
    ]

print('Metrics for regression:')
for metric_name, metric_value in aMetrics:
    print(f'{metric_name:>25s}: {metric_value: >20.3f}')

In [None]:
y_pred = dfPrediction.iloc[:, 1]
y_true = dfOutputTest.iloc[:, 1]
print(classification_report(y_true, y_pred, zero_division = 0))

In [None]:
y_pred = dfPrediction.iloc[:, 0]
y_true = dfOutputTest.iloc[:, 0]
print(classification_report(y_true, y_pred, zero_division = 0))

# REFERENCES

https://www.tensorflow.org/guide/keras/train_and_evaluate#passing_data_to_multi-input_multi-output_models

https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch/

https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit/

https://towardsdatascience.com/customize-loss-function-to-make-lstm-model-more-applicable-in-stock-price-prediction-b1c50e50b16c