In [1]:
import os
import numpy as np
import pandas as pd
import warnings

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Flatten
from tensorflow.compat.v1.keras.layers import TimeDistributed
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D
from tensorflow.keras.layers import ConvLSTM2D

# This is not recommended but I am doing this to suppress warnings from SARIMAX
warnings.simplefilter('ignore')

countryName = 'US'

nFeatures = 1

nDaysMin = 10
k = 3

nValid = 10
nTest = 10

dataDir = os.path.join('data', 'JHU', 'upto07082020_forPublication')


# confirmedFilename = 'time_series_covid19_confirmed_global.csv'
# deathsFilename = 'time_series_covid19_deaths_global.csv'
# recoveredFilename = 'time_series_covid19_recovered_global.csv'

confirmedFilename = 'https://raw.githubusercontent.com/arkobarman/covid-19_timeSeriesAnalysis/master/data/JHU/upto07082020_forPublication/time_series_covid19_confirmed_global.csv'
deathsFilename = 'https://raw.githubusercontent.com/arkobarman/covid-19_timeSeriesAnalysis/master/data/JHU/upto07082020_forPublication/time_series_covid19_deaths_global.csv'
recoveredFilename = 'https://raw.githubusercontent.com/arkobarman/covid-19_timeSeriesAnalysis/master/data/JHU/upto07082020_forPublication/time_series_covid19_recovered_global.csv'

In [2]:
# split a univariate sequence into samples
def split_sequence(sequence, n_steps, k):
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix + k >= len(sequence):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:end_ix+k]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

In [3]:
def meanAbsolutePercentageError(yTrueList, yPredList):
    absErrorList = [np.abs(yTrue - yPred) for yTrue, yPred in zip(yTrueList, yPredList)]
    absPcErrorList = [absError/yTrue for absError, yTrue in zip(absErrorList, yTrueList)]
    MAPE = 100*np.mean(absPcErrorList)
    return MAPE

def meanAbsolutePercentageError_kDay(yTrueListList, yPredListList):
    # Store true and predictions for day 1 in a list, day 2 in a list and so on
    # Keep each list of these lists in a respective dict with key as day #
    yTrueForDayK = {}
    yPredForDayK = {}
    for i in range(len(yTrueListList[0])):
        yTrueForDayK[i] = []
        yPredForDayK[i] = []
    for yTrueList, yPredList in zip(yTrueListList, yPredListList):
        for i in range(len(yTrueList)):
            yTrueForDayK[i].append(yTrueList[i])
            yPredForDayK[i].append(yPredList[i])
            
    # Get MAPE for each day in a list
    MAPEList = []
    for i in yTrueForDayK.keys():
        MAPEList.append(meanAbsolutePercentageError(yTrueForDayK[i], yPredForDayK[i]))
    return np.mean(MAPEList)

def meanForecastError(yTrueList, yPredList):
    forecastErrors = [yTrue - yPred for yTrue, yPred in zip(yTrueList, yPredList)]
    MFE = np.mean(forecastErrors)
    return MFE

def meanAbsoluteError(yTrueList, yPredList):
    absErrorList = [np.abs(yTrue - yPred) for yTrue, yPred in zip(yTrueList, yPredList)]
    return np.mean(absErrorList)

def meanSquaredError(yTrueList, yPredList):
    sqErrorList = [np.square(yTrue - yPred) for yTrue, yPred in zip(yTrueList, yPredList)]
    return np.mean(sqErrorList)

def rootMeanSquaredError(yTrueList, yPredList):
    return np.sqrt(meanSquaredError(yTrueList, yPredList))

def medianSymmetricAccuracy(yTrueList, yPredList):
    '''https://helda.helsinki.fi//bitstream/handle/10138/312261/2017SW001669.pdf?sequence=1'''
    logAccRatioList = [np.abs(np.log(yPred/yTrue)) for yTrue, yPred in zip(yTrueList, yPredList)]
    MdSA = 100*(np.exp(np.median(logAccRatioList))-1)
    return MdSA

def medianSymmetricAccuracy_kDay(yTrueListList, yPredListList):
    # Store true and predictions for day 1 in a list, day 2 in a list and so on
    # Keep each list of these lists in a respective dict with key as day #
    yTrueForDayK = {}
    yPredForDayK = {}
    for i in range(len(yTrueListList[0])):
        yTrueForDayK[i] = []
        yPredForDayK[i] = []
    for yTrueList, yPredList in zip(yTrueListList, yPredListList):
        for i in range(len(yTrueList)):
            yTrueForDayK[i].append(yTrueList[i])
            yPredForDayK[i].append(yPredList[i])
    # Get MdSA for each day in a list
    MdSAList = []
    for i in yTrueForDayK.keys():
        MdSAList.append(medianSymmetricAccuracy(yTrueForDayK[i], yPredForDayK[i]))
    return(np.mean(MdSAList))

In [4]:
# Function to get all three frames for a given country
def getCountryCovidFrDict(countryName):
    countryCovidFrDict = {}
    for key in covidFrDict.keys():
        dataFr = covidFrDict[key]
        countryCovidFrDict[key] = dataFr[dataFr['Country/Region'] == countryName]
    return countryCovidFrDict

In [5]:
# Load all 3 csv files
covidFrDict = {}
# covidFrDict['confirmed'] = pd.read_csv(os.path.join(dataDir, confirmedFilename))
# covidFrDict['deaths'] = pd.read_csv(os.path.join(dataDir, deathsFilename))
covidFrDict['confirmed'] = pd.read_csv(confirmedFilename)
covidFrDict['deaths'] = pd.read_csv(deathsFilename)

# Recovered is back again!
covidFrDict['recovered'] = pd.read_csv(recoveredFilename)

countryCovidFrDict = getCountryCovidFrDict(countryName)

# Get list of dates
colNamesList = list(countryCovidFrDict['confirmed'])
dateList = [colName for colName in colNamesList if '/20' in colName]
dataList = [countryCovidFrDict['confirmed'][date].iloc[0] for date in dateList]
dataDict = dict(zip(dateList, dataList))

# Get time series for cases > 100 only
daysSince = 100
nCasesGreaterDaysSinceList = []
datesGreaterDaysSinceList = []

for key in dataDict.keys():
    if dataDict[key] > daysSince:
        datesGreaterDaysSinceList.append(key)
        nCasesGreaterDaysSinceList.append(dataDict[key])
        
XList, yList = split_sequence(nCasesGreaterDaysSinceList, nDaysMin, k)

XTrainList = XList[0:len(XList)-(nValid + nTest)]
XValidList = XList[len(XList)-(nValid+nTest):len(XList)-(nTest)]
XTestList = XList[-nTest:]

yTrain = yList[0:len(XList)-(nValid + nTest)]
yValid = yList[len(XList)-(nValid+nTest):len(XList)-(nTest)]
yTest = yList[-nTest:]

print('Total size of data points for LSTM:', len(yList))
print('Size of training set:', len(yTrain))
print('Size of validation set:', len(yValid))
print('Size of test set:', len(yTest))

# Convert from list to matrix
XTrain = XTrainList.reshape((XTrainList.shape[0], XTrainList.shape[1], nFeatures))
XValid = XValidList.reshape((XValidList.shape[0], XValidList.shape[1], nFeatures))
XTest = XTestList.reshape((XTestList.shape[0], XTestList.shape[1], nFeatures))

Total size of data points for LSTM: 114
Size of training set: 94
Size of validation set: 10
Size of test set: 10


# Vanilla LSTM

In [7]:
nNeurons = 100
nFeatures = 1

bestValidMAPE = 100
bestSeed = -1
for seed in range(100):
    tf.random.set_seed(seed=seed)
    
    # define model
    model = Sequential()
    model.add(LSTM(nNeurons, activation='relu', input_shape=(nDaysMin, nFeatures)))
    model.add(Dense(1))
    opt = Adam(learning_rate=0.1)
    model.compile(optimizer=opt, loss='mse')

    # fit model
    model.fit(XTrain, yTrain[:,0], epochs=1000, verbose=0)

    yPredListList = []
    for day in range(nTest):
        yPredListList.append([])
    XValidNew = XValid.copy()
    for day in range(k):
        yPred = model.predict(XValidNew, verbose=0)
        for i in range(len(yPred)):
            yPredListList[i].append(yPred[i][0])
        XValidNew = np.delete(XValidNew, 0, axis=1)
        yPred = np.expand_dims(yPred, 2)
        XValidNew = np.append(XValidNew, yPred, axis=1)

#     for yTrue, yPred in zip(yTest, yPredList):
#         print(yTrue, yPred)

    MAPE = meanAbsolutePercentageError_kDay(yValid, yPredListList)
    print(seed, MAPE)
    if MAPE < bestValidMAPE:
        print('Updating best MAPE to {}...'.format(MAPE))
        bestValidMAPE = MAPE
        print('Updating best seed to {}...'.format(seed))
        bestSeed = seed

# define model
print('Training model with best seed...')
tf.random.set_seed(seed=bestSeed)
model = Sequential()
model.add(LSTM(nNeurons, activation='relu', input_shape=(nDaysMin, nFeatures)))
model.add(Dense(1))
opt = Adam(learning_rate=0.1)
model.compile(optimizer=opt, loss='mse')

# fit model
model.fit(XTrain, yTrain[:,0], epochs=1000, verbose=0)

yPredListList = []
for day in range(nTest):
    yPredListList.append([])
XTestNew = XTest.copy()
for day in range(k):
    yPred = model.predict(XTestNew, verbose=0)
    for i in range(len(yPred)):
        yPredListList[i].append(yPred[i][0])
    XTestNew = np.delete(XTestNew, 0, axis=1)
    yPred = np.expand_dims(yPred, 2)
    XTestNew = np.append(XTestNew, yPred, axis=1)
    
MAPE = meanAbsolutePercentageError_kDay(yTest, yPredListList)
print('Test MAPE:', MAPE)
MdSA = medianSymmetricAccuracy_kDay(yTest, yPredListList)
print('Test MdSA:', MdSA)
yPredVanillaLSTM = yPredListList

0 0.23483468430909257
Updating best MAPE to 0.23483468430909257...
Updating best seed to 0...
1 0.9047488446919676
2 99.98896429447086
3 2.127100840163092
4 99.98985602224748
5 99.98888410096284
6 0.25812058307987357
7 99.98819769043962
8 0.3823427106732428
9 1.3273987672378569
10 0.6157565958648269
11 99.98831443838378
12 1.2316472406549581
13 0.4027173835289588
14 99.99079510402966
15 99.99388197813794
16 1.01741004045782
17 0.9980496257618926
18 3.257532576667142
19 0.7502670726274605
20 0.6510495141099608
21 99.84148048076493
22 0.3892681129886511
23 0.6084756267693666
24 0.7638660659247
25 0.3229913885585036
26 0.4079342002104139
27 1.4155174766740342
28 99.98748149278607
29 99.99083528666243
30 5.975865072971494
31 99.98742878909826
32 99.98881217436185
33 0.2800764663423337
34 0.36945830528708523
35 0.7052585236624976
36 0.8611378927821861
37 0.296137877230424
38 1.798916946085691
39 2.912711104241351
40 99.98808513748604
41 99.98794485073893
42 1.129988105244522
43 1.3942977914

# Stacked LSTM

In [8]:
# define model
nNeurons = 50
nFeatures = 1

bestValidMAPE = 100
bestSeed = -1
for seed in range(100):
    tf.random.set_seed(seed=seed)
    model = Sequential()
    model.add(LSTM(nNeurons, activation='relu', return_sequences=True, input_shape=(nDaysMin, nFeatures)))
    model.add(LSTM(nNeurons, activation='relu'))
    model.add(Dense(1))
    opt = Adam(learning_rate=0.1)
    model.compile(optimizer=opt, loss='mse')

    # fit model
    model.fit(XTrain, yTrain[:,0], epochs=1000, verbose=0)

    yPredListList = []
    for day in range(nTest):
        yPredListList.append([])
    XValidNew = XValid.copy()
    for day in range(k):
        yPred = model.predict(XValidNew, verbose=0)
        for i in range(len(yPred)):
            yPredListList[i].append(yPred[i][0])
        XValidNew = np.delete(XValidNew, 0, axis=1)
        yPred = np.expand_dims(yPred, 2)
        XValidNew = np.append(XValidNew, yPred, axis=1)

#     for yTrue, yPred in zip(yTest, yPredList):
#         print(yTrue, yPred)

    MAPE = meanAbsolutePercentageError_kDay(yValid, yPredListList)
    print(seed, MAPE)
    if MAPE < bestValidMAPE:
        print('Updating best MAPE to {}...'.format(MAPE))
        bestValidMAPE = MAPE
        print('Updating best seed to {}...'.format(seed))
        bestSeed = seed

# define model
print('Training model with best seed...')
tf.random.set_seed(seed=bestSeed)
model = Sequential()
model.add(LSTM(nNeurons, activation='relu', return_sequences=True, input_shape=(nDaysMin, nFeatures)))
model.add(LSTM(nNeurons, activation='relu'))
model.add(Dense(1))
opt = Adam(learning_rate=0.1)
model.compile(optimizer=opt, loss='mse')

# fit model
model.fit(XTrain, yTrain[:,0], epochs=1000, verbose=0)

yPredListList = []
for day in range(nTest):
    yPredListList.append([])
XTestNew = XTest.copy()
for day in range(k):
    yPred = model.predict(XTestNew, verbose=0)
    for i in range(len(yPred)):
        yPredListList[i].append(yPred[i][0])
    XTestNew = np.delete(XTestNew, 0, axis=1)
    yPred = np.expand_dims(yPred, 2)
    XTestNew = np.append(XTestNew, yPred, axis=1)
    
MAPE = meanAbsolutePercentageError_kDay(yTest, yPredListList)
print('Test MAPE:', MAPE)
MdSA = medianSymmetricAccuracy_kDay(yTest, yPredListList)
print('Test MdSA:', MdSA)
yPredStackedLSTM = yPredListList

0 1.1570940465428279
Updating best MAPE to 1.1570940465428279...
Updating best seed to 0...
1 59.24458048123329
2 55.49201988963472
3 99.98771196614427
4 1.3040251310632291
5 99.99498978513257
6 1.2914078777667115
7 0.5200346170313883
Updating best MAPE to 0.5200346170313883...
Updating best seed to 7...
8 0.30548870571324865
Updating best MAPE to 0.30548870571324865...
Updating best seed to 8...
9 2.1742985949116913
10 0.3098071081225156
11 1.603497408993742
12 55.96346705548208
13 99.9875475471376
14 57.29064557413536
15 0.7605627584391805
16 59.656679208113964
17 50.5172490288197
18 99.99141424404088
19 0.583410556776924
20 nan
21 56.285143510806506
22 0.6711087033956397
23 99.99943139081729
24 99.99765370309393
25 53.55231018495632
26 61.226485010121195
27 99.99755991222806
28 nan
29 99.99146086459552
30 107.05623047580737
31 99.98867045254444
32 59.815503561346155
33 56.59070151068744
34 59.90325054224376
35 52.42284486731764
36 0.9320636258214611
37 62.219934901761
38 55.54593006

# Bidirectional LSTM

In [9]:
# define model
nNeurons = 50
nFeatures = 1

bestValidMAPE = 100
bestSeed = -1
for seed in range(100):
    tf.random.set_seed(seed=seed)
    model = Sequential()
    model.add(Bidirectional(LSTM(nNeurons, activation='relu'), input_shape=(nDaysMin, nFeatures)))
    model.add(Dense(1))
    opt = Adam(learning_rate=0.1)
    model.compile(optimizer=opt, loss='mse')

    # fit model
    model.fit(XTrain, yTrain[:,0], epochs=1000, verbose=0)

    yPredListList = []
    for day in range(nTest):
        yPredListList.append([])
    XValidNew = XValid.copy()
    for day in range(k):
        yPred = model.predict(XValidNew, verbose=0)
        for i in range(len(yPred)):
            yPredListList[i].append(yPred[i][0])
        XValidNew = np.delete(XValidNew, 0, axis=1)
        yPred = np.expand_dims(yPred, 2)
        XValidNew = np.append(XValidNew, yPred, axis=1)

#     for yTrue, yPred in zip(yTest, yPredList):
#         print(yTrue, yPred)

    MAPE = meanAbsolutePercentageError_kDay(yValid, yPredListList)
    print(seed, MAPE)
    if MAPE < bestValidMAPE:
        print('Updating best MAPE to {}...'.format(MAPE))
        bestValidMAPE = MAPE
        print('Updating best seed to {}...'.format(seed))
        bestSeed = seed

# define model
print('Training model with best seed...')
tf.random.set_seed(seed=bestSeed)
model = Sequential()
model.add(Bidirectional(LSTM(nNeurons, activation='relu'), input_shape=(nDaysMin, nFeatures)))
model.add(Dense(1))
opt = Adam(learning_rate=0.1)
model.compile(optimizer=opt, loss='mse')

# fit model
model.fit(XTrain, yTrain[:,0], epochs=1000, verbose=0)

yPredListList = []
for day in range(nTest):
    yPredListList.append([])
XTestNew = XTest.copy()
for day in range(k):
    yPred = model.predict(XTestNew, verbose=0)
    for i in range(len(yPred)):
        yPredListList[i].append(yPred[i][0])
    XTestNew = np.delete(XTestNew, 0, axis=1)
    yPred = np.expand_dims(yPred, 2)
    XTestNew = np.append(XTestNew, yPred, axis=1)
    
MAPE = meanAbsolutePercentageError_kDay(yTest, yPredListList)
print('Test MAPE:', MAPE)
MdSA = medianSymmetricAccuracy_kDay(yTest, yPredListList)
print('Test MdSA:', MdSA)

0 8.603495204047773
Updating best MAPE to 8.603495204047773...
Updating best seed to 0...
1 0.5243558620108356
Updating best MAPE to 0.5243558620108356...
Updating best seed to 1...
2 3.873307472946348
3 99.98791281232933
4 0.9854503868670186
5 129.33653804232674
6 2.4918591396930356
7 0.423293065971358
Updating best MAPE to 0.423293065971358...
Updating best seed to 7...
8 nan
9 0.843133047915598
10 0.254922164504914
Updating best MAPE to 0.254922164504914...
Updating best seed to 10...
11 0.4049146422930226
12 5.0050106961644145
13 0.2662139284258271
14 1.2320117472783778
15 0.25385083516314316
Updating best MAPE to 0.25385083516314316...
Updating best seed to 15...
16 0.5040228231530929
17 99.98719370282451
18 1.8449687930921534
19 99.98904906641036
20 0.9426962471209858
21 0.7150727890721441
22 0.5914565578759925
23 1.5312934570049606
24 2.8318534657267787
25 1.0998411898625104
26 0.7017746926316909
27 1.918701847963937
28 0.6198757452844487
29 1.2703057419871566
30 1.4630977454550

# CNN LSTM

In [11]:
# Number of subsequences to break X into (we do 15 = 5x3, 5 subsequences of size 3 each)
nSeq = 5
nSteps = 2

# define model
nNeurons = 50
nFeatures = 1
nFilters = 64

bestValidMAPE = 100
bestSeed = -1

# Reshape input
XTrainCNN = XTrainList.reshape((XTrainList.shape[0], nSeq, nSteps, nFeatures))

# print(XTrainCNN.shape)
# print(XValidCNN.shape)
# print(XTestCNN.shape)

for seed in range(100):
    tf.random.set_seed(seed=seed)
    model = Sequential()
    model.add(TimeDistributed(Conv1D(filters=nFilters, kernel_size=1, activation='relu'), input_shape=(None, nSteps, nFeatures)))
    model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(nNeurons, activation='relu'))
    model.add(Dense(1))
    opt = Adam(learning_rate=0.1)
    model.compile(optimizer=opt, loss='mse')

    # fit model
    model.fit(XTrainCNN, yTrain[:,0], epochs=1000, verbose=0)
    
    yPredListList = []
    for day in range(nTest):
        yPredListList.append([])
    XValidListNew = XValidList.copy()
    for day in range(k):
        XValidCNN = XValidListNew.reshape((XValidListNew.shape[0], nSeq, nSteps, nFeatures))
        yPred = model.predict(XValidCNN, verbose=0)
        for i in range(len(yPred)):
            yPredListList[i].append(yPred[i][0])
        XValidListNew = np.delete(XValidListNew, 0, axis=1)
        XValidListNew = np.append(XValidListNew, yPred, axis=1)

#     for yTrue, yPred in zip(yTest, yPredList):
#         print(yTrue, yPred)

    MAPE = meanAbsolutePercentageError_kDay(yValid, yPredListList)
    print(seed, MAPE)
    if MAPE < bestValidMAPE:
        print('Updating best MAPE to {}...'.format(MAPE))
        bestValidMAPE = MAPE
        print('Updating best seed to {}...'.format(seed))
        bestSeed = seed

# define model
print('Training model with best seed...')
tf.random.set_seed(seed=bestSeed)
model = Sequential()
model.add(TimeDistributed(Conv1D(filters=nFilters, kernel_size=1, activation='relu'), input_shape=(None, nSteps, nFeatures)))
model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(nNeurons, activation='relu'))
model.add(Dense(1))
opt = Adam(learning_rate=0.1)
model.compile(optimizer=opt, loss='mse')
# fit model
model.fit(XTrainCNN, yTrain[:,0], epochs=1000, verbose=0)

yPredListList = []
for day in range(nTest):
    yPredListList.append([])
XTestListNew = XTestList.copy()
for day in range(k):
    XTestCNN = XTestListNew.reshape((XTestListNew.shape[0], nSeq, nSteps, nFeatures))
    yPred = model.predict(XTestCNN, verbose=0)
    for i in range(len(yPred)):
        yPredListList[i].append(yPred[i][0])
    XTestListNew = np.delete(XTestListNew, 0, axis=1)
    XTestListNew = np.append(XTestListNew, yPred, axis=1)
    
MAPE = meanAbsolutePercentageError_kDay(yTest, yPredListList)
print('Test MAPE:', MAPE)
MdSA = medianSymmetricAccuracy_kDay(yTest, yPredListList)
print('Test MdSA:', MdSA)

0 2.050561301724997
Updating best MAPE to 2.050561301724997...
Updating best seed to 0...
1 51.725649687743804
2 0.42666380846942137
Updating best MAPE to 0.42666380846942137...
Updating best seed to 2...
3 57.112878078938344
4 0.7944393543846759
5 0.9083961235676328
6 1.4419178148387752
7 56.254609712754565
8 0.3003760366987085
Updating best MAPE to 0.3003760366987085...
Updating best seed to 8...
9 0.8233233935266369
10 55.11992640507426
11 1.2618288140958613
12 55.6576156964525
13 0.5873733464189749
14 0.38397365291387214
15 1.929334195244583
16 2.0809207043919113
17 48.92583540591435
18 56.67703873789865
19 3.4293514864399484
20 53.20925382353219
21 0.44257394213646145
22 55.86800038291435
23 60.60526985003931
24 51.137279887153454
25 53.90665544102541
26 53.659196134012824
27 56.55128001694775
28 58.214490575188826
29 1.0075351904967422
30 50.872588446447985
31 57.89108656043365
32 0.4107239030257217
33 5.199892427753288
34 57.760549888164064
35 53.76758147427336
36 0.433858502394

# ConvLSTM

In [12]:
# Number of subsequences to break X into (we do 15 = 5x3, 5 subsequences of size 3 each)
nSeq = 5
nSteps = 2
# Each input is rows x columns, we have rows=1 and columns=nSteps

# define model
nNeurons = 50
nFeatures = 1
nFilters = 64

bestValidMAPE = 100
bestSeed = -1

# Reshape input
XTrainConv = XTrainList.reshape((XTrainList.shape[0], nSeq, 1, nSteps, nFeatures))
# XValidConv = XValidList.reshape((XValidList.shape[0], nSeq, 1, nSteps, nFeatures))
# XTestConv = XTestList.reshape((XTestList.shape[0], nSeq, 1, nSteps, nFeatures))

for seed in range(100):
    tf.random.set_seed(seed=seed)
    model = Sequential()
    model.add(ConvLSTM2D(filters=64, kernel_size=(1,2), activation='relu', input_shape=(nSeq, 1, nSteps, nFeatures)))
    model.add(Flatten())
    model.add(Dense(1))
    opt = Adam(learning_rate=0.1)
    model.compile(optimizer=opt, loss='mse')
    
    # fit model
    model.fit(XTrainConv, yTrain[:,0], epochs=1000, verbose=0)

    yPredListList = []
    for day in range(nTest):
        yPredListList.append([])
    XValidListNew = XValidList.copy()
    for day in range(k):
        XValidConv = XValidListNew.reshape((XValidListNew.shape[0], nSeq, 1, nSteps, nFeatures))
        yPred = model.predict(XValidConv, verbose=0)
        for i in range(len(yPred)):
            yPredListList[i].append(yPred[i][0])
        XValidListNew = np.delete(XValidListNew, 0, axis=1)
        XValidListNew = np.append(XValidListNew, yPred, axis=1)

#     for yTrue, yPred in zip(yTest, yPredList):
#         print(yTrue, yPred)

    MAPE = meanAbsolutePercentageError_kDay(yValid, yPredListList)
    print(seed, MAPE)
    if MAPE < bestValidMAPE:
        print('Updating best MAPE to {}...'.format(MAPE))
        bestValidMAPE = MAPE
        print('Updating best seed to {}...'.format(seed))
        bestSeed = seed

# define model
print('Training model with best seed...')
tf.random.set_seed(seed=bestSeed)
model = Sequential()
model.add(ConvLSTM2D(filters=64, kernel_size=(1,2), activation='relu', input_shape=(nSeq, 1, nSteps, nFeatures)))
model.add(Flatten())
model.add(Dense(1))
opt = Adam(learning_rate=0.1)
model.compile(optimizer=opt, loss='mse')
# fit model
model.fit(XTrainConv, yTrain[:,0], epochs=1000, verbose=0)

yPredListList = []
for day in range(nTest):
    yPredListList.append([])
XTestListNew = XTestList.copy()
for day in range(k):
    XTestConv = XTestListNew.reshape((XTestListNew.shape[0], nSeq, 1, nSteps, nFeatures))
    yPred = model.predict(XTestConv, verbose=0)
    for i in range(len(yPred)):
        yPredListList[i].append(yPred[i][0])
    XTestListNew = np.delete(XTestListNew, 0, axis=1)
    XTestListNew = np.append(XTestListNew, yPred, axis=1)
    
MAPE = meanAbsolutePercentageError_kDay(yTest, yPredListList)
print('Test MAPE:', MAPE)
MdSA = medianSymmetricAccuracy_kDay(yTest, yPredListList)
print('Test MdSA:', MdSA)

0 0.670041590043498
Updating best MAPE to 0.670041590043498...
Updating best seed to 0...
1 2.988982938190708
2 99.62492075635477
3 0.3086672359120057
Updating best MAPE to 0.3086672359120057...
Updating best seed to 3...
4 0.9325827503649372
5 0.30124953407751215
Updating best MAPE to 0.30124953407751215...
Updating best seed to 5...
6 0.37551462073376624
7 0.8969308594929885
8 1.0789918906282105
9 99.99185758221581
10 0.7800021374974099
11 0.3462554400873135
12 0.8399750862085194
13 95.14421156694806
14 99.91404849231627
15 0.22788934740481215
Updating best MAPE to 0.22788934740481215...
Updating best seed to 15...
16 0.27763380104781227
17 0.6865163831488168
18 0.2738661309554351
19 0.5985240561001616
20 99.64326366497664
21 99.3887546959496
22 81.97876874910703
23 0.9549783626659479
24 1.24416773309569
25 1.7987899183804206
26 101.10823673816321
27 0.4556424064338446
28 97.89745698542716
29 0.24510404082056225
30 1.9416711225329113
31 0.3093109297364475
32 2.1136216035291198
33 122