# CNN-LSTM Model

In [186]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras

In [187]:
np.random.seed(42)
tf.random.set_seed(42)

Import the `initial_variables.csv` dataset

In [188]:
X = pd.read_csv('../csv/X.csv', index_col='date', parse_dates=True, infer_datetime_format=True)

### Split data into train and test sets

In [189]:
y = X['target'].copy()
X = X.drop(columns='target')


In [190]:
X

Unnamed: 0_level_0,open,high,low,close,volume,trend,rsi,rsi_fast_k,rsi_fast_d,williams_r,...,stk_visa,stk_wmt,stk_xom,usd_aud,usd_cad,usd_cny,usd_eur,usd_hkd,usd_jpy,day_of_week
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-08-01,1379.319946,1385.030029,1373.349976,1375.319946,4440920000,1363.075664,55.682374,39.427848,73.266820,-26.272070,...,29.716949,58.796188,57.188221,0.953740,1.00320,6.3618,0.81340,7.75376,78.120003,2
2012-08-02,1375.130005,1375.130005,1354.650024,1365.000000,4193740000,1364.794109,52.843782,0.000000,40.518757,-42.783984,...,30.138939,59.139595,56.510475,0.955110,1.00472,6.3688,0.81690,7.75500,78.379997,3
2012-08-03,1365.449951,1394.160034,1365.449951,1390.989990,3751170000,1366.193092,58.457868,93.721587,44.383145,-4.882997,...,30.537630,59.538914,57.609375,0.956130,1.00705,6.3676,0.82100,7.75426,78.220001,4
2012-08-06,1391.040039,1399.630005,1391.040039,1394.229980,3122050000,1367.441839,59.096882,100.000000,64.573862,-7.671577,...,30.549278,59.323273,57.543545,0.947060,1.00057,6.3715,0.80460,7.75430,78.610001,0
2012-08-07,1394.459961,1407.140015,1394.459961,1401.349976,3682490000,1369.489085,60.502301,100.000000,97.907196,-7.432654,...,30.402399,59.091679,57.852829,0.946700,1.00060,6.3740,0.80710,7.75500,78.199997,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-07-26,3953.219971,3953.219971,3910.739990,3921.050049,3083420000,3871.253815,49.880434,0.000000,32.941861,-31.418430,...,212.490005,121.980003,89.629997,1.438600,1.28507,6.7495,0.97800,7.84913,136.431000,1
2022-07-27,3951.429932,4039.560059,3951.429932,4023.610107,3584170000,3880.207597,55.308438,100.000000,42.153586,-5.015708,...,210.470001,126.589996,91.570000,1.439390,1.28737,6.7623,0.98721,7.84926,136.970001,2
2022-07-28,4026.129883,4078.949951,3992.969971,4072.429932,3882850000,3893.329737,57.608834,100.000000,66.666667,-1.824344,...,211.350006,129.750000,92.639999,1.429613,1.28171,6.7574,0.97950,7.84910,136.110992,3
2022-07-29,4087.330078,4140.149902,4079.219971,4130.290039,3817740000,3908.661969,60.166728,100.000000,100.000000,-2.355495,...,212.110001,132.050003,96.930000,1.428400,1.28070,6.7458,0.98113,7.84950,134.397003,4


In [191]:

# Split the data into 60% train, 20% validation, 20% test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.25, shuffle=False)

Normalize the data with `StandardScaler()`

In [192]:
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)

In [193]:
pd.DataFrame(X_train)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,64,65,66,67,68,69,70,71,72,73
0,-1.843907,-1.848833,-1.840132,-1.857611,1.381047,-1.866031,-0.027444,-0.376548,0.556563,0.272185,...,-1.516832,-0.783624,-1.413559,-1.807790,-1.525976,-0.098979,-0.298034,-0.559884,-2.655255,-0.016571
1,-1.855262,-1.875613,-1.890951,-1.885600,1.007202,-1.861343,-0.356020,-1.326575,-0.446912,-0.276939,...,-1.500770,-0.745236,-1.585104,-1.798300,-1.514151,-0.071636,-0.248964,-0.516774,-2.631405,0.697890
2,-1.881494,-1.824135,-1.861601,-1.815113,0.337840,-1.857527,0.293827,0.931678,-0.328498,0.983503,...,-1.485595,-0.700599,-1.306960,-1.791234,-1.496023,-0.076324,-0.191483,-0.542497,-2.646082,1.412351
3,-1.812146,-1.809338,-1.792057,-1.806326,-0.613669,-1.854121,0.367795,1.082958,0.290191,0.890766,...,-1.485151,-0.724704,-1.323622,-1.854063,-1.546437,-0.061091,-0.421408,-0.541105,-2.610306,-1.445494
4,-1.802878,-1.789022,-1.782763,-1.787016,0.233965,-1.848537,0.530476,1.082958,1.311600,0.898711,...,-1.490742,-0.750593,-1.245339,-1.856557,-1.546204,-0.051325,-0.386358,-0.516774,-2.647917,-0.731032
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1504,2.054123,2.108758,2.085140,2.131144,0.038134,1.968588,1.114133,1.082958,0.491239,1.088972,...,2.644714,1.795317,1.008564,0.913435,0.900031,1.575545,0.295845,2.649700,0.383037,-0.016571
1505,2.102252,2.102103,2.132780,2.107738,0.189862,1.982003,0.860802,0.207259,0.883784,0.690284,...,2.639519,1.829674,1.139961,0.871941,0.811963,1.490393,0.242711,2.618424,0.346711,0.697890
1506,2.120843,2.095611,2.059622,2.057240,-0.169525,1.992628,0.343873,-1.326575,-0.016893,-0.111435,...,2.573102,1.819263,0.670985,0.971760,0.836781,1.545469,0.339728,2.698361,0.374323,1.412351
1507,2.057565,2.037640,2.031821,2.013251,-0.426549,2.003855,-0.069970,-1.326575,-1.038301,-0.809615,...,2.416147,1.897348,0.634597,0.945437,0.830557,1.662258,0.321642,2.710874,0.354875,-1.445494


In [205]:
# pd.DataFrame(X_train, columns=X.columns).hist(figsize=(30,30))
# plt.show()

X_20 = X_train[:20]
y_20 = y_train[:20]

In [229]:
def cnn_lstm(seq_len=20, n_features=75, droprate=0.2):


    lstm_512  = keras.layers.LSTM(512, return_sequences=True)
    lstm_256  = keras.layers.LSTM(256, return_sequences=True)
    lstm_128  = keras.layers.LSTM(128, return_sequences=True)
    lstm_64   = keras.layers.LSTM(64, return_sequences=True)
    lstm_32   = keras.layers.LSTM(32, return_sequences=True)
    lstm_16   = keras.layers.LSTM(16, return_sequences=True)
    tdl_512   = keras.layers.TimeDistributed(keras.layers.Dense(512))
    dropout_1 = keras.layers.Dropout(0.2)
    tdl_256   = keras.layers.TimeDistributed(keras.layers.Dense(256))
    dropout_2 = keras.layers.Dropout(0.2)
    tdl_128   = keras.layers.TimeDistributed(keras.layers.Dense(128))
    tdl_64    = keras.layers.TimeDistributed(keras.layers.Dense(64))
    # tdl_32    = keras.layers.TimeDistributed(keras.layers.Dense(32))
    # tdl_16    = keras.layers.TimeDistributed(keras.layers.Dense(16))
    dense     = keras.layers.Dense(1, activation='sigmoid')

    conv_2d   = keras.layers.Conv2D(
        filters=1,
        kernel_size=1,
        input_shape=(seq_len, n_features, 1),
        activation='relu',
        data_format='channels_last',
        padding='same'
    )

    model = keras.models.Sequential([
        conv_2d,
        # lstm_512,
        # lstm_128,
        # lstm_64,
        # tdl_512,
        # dropout_1,
        # tdl_256,
        # dropout_2,
        # tdl_128,
        # tdl_64,
        # tdl_32,
        # tdl_16,
        dense
    ])

    return model

model = cnn_lstm()
model.summary()

model.compile(loss='mae', optimizer='adam', metrics=[tf.keras.metrics.MeanAbsoluteError()])
history = model.fit(X_20, y_20, epochs=10, batch_size=10)


Model: "sequential_48"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_51 (Conv2D)          (None, 20, 75, 1)         2         
                                                                 
 dense_279 (Dense)           (None, 20, 75, 1)         2         
                                                                 
Total params: 4
Trainable params: 4
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10


ValueError: in user code:

    File "/Users/adam/miniforge3/envs/cnn/lib/python3.10/site-packages/keras/engine/training.py", line 1051, in train_function  *
        return step_function(self, iterator)
    File "/Users/adam/miniforge3/envs/cnn/lib/python3.10/site-packages/keras/engine/training.py", line 1040, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/adam/miniforge3/envs/cnn/lib/python3.10/site-packages/keras/engine/training.py", line 1030, in run_step  **
        outputs = model.train_step(data)
    File "/Users/adam/miniforge3/envs/cnn/lib/python3.10/site-packages/keras/engine/training.py", line 889, in train_step
        y_pred = self(x, training=True)
    File "/Users/adam/miniforge3/envs/cnn/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Users/adam/miniforge3/envs/cnn/lib/python3.10/site-packages/keras/engine/input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "sequential_48" is incompatible with the layer: expected shape=(None, 20, 75, 1), found shape=(10, 74)


In [224]:
X_20[20]

IndexError: index 20 is out of bounds for axis 0 with size 20

In [216]:
    # model = keras.models.Sequential([
    #     keras.layers.Conv2D(filters=1, kernel_size=(1, 74), activation='relu', input_shape=(20, 74, 1)),
    #     keras.layers.LSTM(512),
    #     keras.layers.LSTM(128),
    #     keras.layers.LSTM(64),
    #     keras.layers.TimeDistributed(keras.layers.Dense(512)),
    #     keras.layers.Dropout(0.2),
    #     keras.layers.TimeDistributed(keras.layers.Dense(256)),
    #     keras.layers.Dropout(0.2),
    #     keras.layers.TimeDistributed(keras.layers.Dense(128)),
    #     keras.layers.TimeDistributed(keras.layers.Dense(64)),
    #     keras.layers.TimeDistributed(keras.layers.Dense(32)),
    #     keras.layers.TimeDistributed(keras.layers.Dense(16)),
    #     keras.layers.Dense(1)
    # ])

   
    
    model.compile(loss='mse', optimizer='adam', metrics=[tf.keras.metrics.MeanAbsoluteError()])
    history = model.fit(X_20, y_20, epochs=10)