Here, we would like to implement a multivariate time series LSTM model that utilizes historical prices along with technical indicators to predict prices.

<img src="https://api.wandb.ai/files/ayush-thakur/images/projects/103390/4fc355be.png" width=500 height=300>

In [172]:
import os
import numpy as np
from Functions.sp500.sp500 import get_sp500_tickers

In [213]:
def load_data(tickers):
    final_X, final_y = None, None
    for ticker in tickers:
        print(f"Loading {ticker}")
        a = f"../Functions/csv_files/sampling_data/{ticker}/X.npy"
        b = f"../Functions/csv_files/sampling_data/{ticker}/y.npy"
        if not os.path.exists(a) or not os.path.exists(b):
            print(f"No {ticker} data")
            continue
        data_X = np.load(a, allow_pickle=True)
        data_y = np.load(b, allow_pickle=True)
        if len(data_X.shape) != 3 or data_X.shape[1] != 14 or data_X.shape[2] != 7:
            print(f"{ticker} is corrupted: Training Data is incorrect")
            continue
        if len(data_y.shape) != 1 or data_y.shape[0] != data_X.shape[0]:
            print(f"{ticker} is corrupted: Label is incorrect")
            continue
        if final_X is None and final_y is None:
            final_X = data_X
            final_y = data_y
        else:
            final_X = np.append(final_X, data_X, axis=0)
            final_y = np.append(final_y, data_y, axis=0)
    return final_X, final_y

In [204]:
tickers = get_sp500_tickers(False)
end = tickers.index("WTW.US")
tickers = tickers[:end]
print(tickers)

['AAPL.US', 'MSFT.US', 'AMZN.US', 'TSLA.US', 'GOOGL.US', 'GOOG.US', 'BRKB.US', 'UNH.US', 'JNJ.US', 'XOM.US', 'META.US', 'JPM.US', 'NVDA.US', 'PG.US', 'V.US', 'HD.US', 'CVX.US', 'LLY.US', 'PFE.US', 'MA.US', 'ABBV.US', 'PEP.US', 'MRK.US', 'KO.US', 'BAC.US', 'COST.US', 'TMO.US', 'WMT.US', 'AVGO.US', 'DIS.US', 'MCD.US', 'ABT.US', 'DHR.US', 'CSCO.US', 'ACN.US', 'VZ.US', 'NEE.US', 'WFC.US', 'BMY.US', 'CRM.US', 'TXN.US', 'LIN.US', 'COP.US', 'CMCSA.US', 'ADBE.US', 'PM.US', 'QCOM.US', 'CVS.US', 'UNP.US', 'RTX.US', 'AMGN.US', 'LOW.US', 'UPS.US', 'HON.US', 'SCHW.US', 'T.US', 'ELV.US', 'INTU.US', 'IBM.US', 'MDT.US', 'INTC.US', 'MS.US', 'NKE.US', 'NFLX.US', 'SPGI.US', 'AMD.US', 'GS.US', 'AMT.US', 'PYPL.US', 'SBUX.US', 'ADP.US', 'DE.US', 'ORCL.US', 'LMT.US', 'CAT.US', 'CI.US', 'BLK.US', 'AXP.US', 'TMUS.US', 'C.US', 'GILD.US', 'NOW.US', 'CB.US', 'PLD.US', 'MDLZ.US', 'MMC.US', 'VRTX.US', 'REGN.US', 'ADI.US', 'TJX.US', 'MO.US', 'SO.US', 'DUK.US', 'AMAT.US', 'ZTS.US', 'TGT.US', 'NOC.US', 'SYK.US', 'PGR.

In [214]:
X, y = load_data(tickers)

Loading AAPL.US
Loading MSFT.US
Loading AMZN.US
Loading TSLA.US
Loading GOOGL.US
Loading GOOG.US
Loading BRKB.US
Loading UNH.US
Loading JNJ.US
Loading XOM.US
Loading META.US
Loading JPM.US
Loading NVDA.US
Loading PG.US
Loading V.US
Loading HD.US
Loading CVX.US
Loading LLY.US
Loading PFE.US
Loading MA.US
Loading ABBV.US
Loading PEP.US
Loading MRK.US
Loading KO.US
Loading BAC.US
Loading COST.US
Loading TMO.US
Loading WMT.US
Loading AVGO.US
Loading DIS.US
Loading MCD.US
Loading ABT.US
Loading DHR.US
Loading CSCO.US
Loading ACN.US
Loading VZ.US
Loading NEE.US
Loading WFC.US
Loading BMY.US
Loading CRM.US
Loading TXN.US
Loading LIN.US
Loading COP.US
Loading CMCSA.US
Loading ADBE.US
Loading PM.US
Loading QCOM.US
Loading CVS.US
Loading UNP.US
Loading RTX.US
Loading AMGN.US
Loading LOW.US
Loading UPS.US
Loading HON.US
Loading SCHW.US
Loading T.US
Loading ELV.US
Loading INTU.US
Loading IBM.US
Loading MDT.US
Loading INTC.US
Loading MS.US
Loading NKE.US
Loading NFLX.US
Loading SPGI.US
Loading AMD.

In [216]:
X[0]

array([['2017-05-08', 35.9085, 195009600, 34.4564, 0.1639, 75.8309,
        0.5258],
       ['2017-05-09', 36.1385, 156521600, 34.728, 0.1863, 77.2823,
        0.6881],
       ['2017-05-10', 35.9672, 103222800, 34.947, 0.2037, 73.731, 0.7906],
       ['2017-05-11', 36.2782, 109020400, 35.1834, 0.2175, 75.8966,
        0.8831],
       ['2017-05-12', 36.7849, 130108000, 35.4597, 0.2436, 78.9424,
        1.0699],
       ['2017-05-15', 36.6906, 104038800, 35.6942, 0.2605, 76.9925,
        1.1783],
       ['2017-05-16', 36.6364, 80194000, 35.8951, 0.257, 75.833, 1.1548],
       ['2017-05-17', 35.4063, 203070800, 35.9042, 0.2064, 55.431,
        0.9596],
       ['2017-05-18', 35.946, 134272800, 35.9694, 0.1628, 60.4576,
        0.7337],
       ['2017-05-19', 36.0685, 107843200, 36.0296, 0.1356, 61.5185,
        0.6283],
       ['2017-05-22', 36.2876, 91865600, 36.1032, 0.1145, 63.4094,
        0.5465],
       ['2017-05-23', 36.2429, 79675600, 36.1549, 0.0813, 62.7321,
        0.4132],
      

In [217]:
def remove_date(X):
    X_train = np.delete(X, 0, 2)
    X_train = np.asarray(X_train).astype(np.float32)
    return X_train
X_train = remove_date(X)

In [219]:
y[0]

36.2122

In [158]:
from keras.models import Sequential
from keras.layers import Dense, LSTM

In [163]:
from ".Fun"

array([['2019-11-29', 267.25, 64.7021, 0.0627, 73.6282, 0.2749],
       ['2019-12-02', 264.16, 64.7364, 0.0416, 65.8583, 0.2515],
       ['2019-12-03', 259.45, 64.6925, -0.0165, 56.1351, 0.3342],
       ['2019-12-04', 261.74, 64.6448, -0.0356, 59.2829, 0.3638],
       ['2019-12-05', 265.58, 64.6962, -0.0397, 63.9539, 0.357],
       ['2019-12-06', 270.71, 64.7827, 0.0141, 69.0601, 0.5081],
       ['2019-12-09', 266.92, 64.7795, 0.0469, 62.0652, 0.5048],
       ['2019-12-10', 268.48, 64.8178, 0.0842, 63.6954, 0.5512],
       ['2019-12-11', 270.77, 64.9502, 0.1102, 66.0045, 0.663],
       ['2019-12-12', 271.46, 65.1153, 0.1233, 66.6921, 0.7335],
       ['2019-12-13', 275.15, 65.3489, 0.1502, 70.1668, 0.9377],
       ['2019-12-16', 279.86, 65.5846, 0.2327, 73.9083, 1.5694],
       ['2019-12-17', 280.41, 65.8663, 0.2877, 74.3137, 2.0732],
       ['2019-12-18', 279.74, 66.0742, 0.3493, 72.8301, 2.4897]],
      dtype=object)

In [142]:
X_train[0][0]

array([[ 2.02496e+01,  1.44200e-01, -2.00000e-04,  4.56746e+01,
         1.00000e-04],
       [ 2.04960e+01,  1.43900e-01,  6.00000e-04,  4.70359e+01,
         1.00000e-04],
       [ 2.08096e+01,  1.44700e-01,  1.10000e-03,  4.87819e+01,
         1.00000e-04],
       [ 2.10000e+01,  1.45500e-01,  1.70000e-03,  4.98210e+01,
         1.00000e-04],
       [ 2.19968e+01,  1.47000e-01,  2.30000e-03,  5.51416e+01,
         2.00000e-04],
       [ 2.22544e+01,  1.48800e-01,  2.90000e-03,  5.64502e+01,
         2.00000e-04],
       [ 2.13696e+01,  1.50200e-01,  3.00000e-03,  5.10023e+01,
         2.00000e-04],
       [ 2.19968e+01,  1.52500e-01,  3.00000e-03,  5.43513e+01,
         2.00000e-04],
       [ 2.12464e+01,  1.54200e-01,  2.80000e-03,  4.99824e+01,
         2.00000e-04],
       [ 2.16272e+01,  1.56600e-01,  2.30000e-03,  5.20923e+01,
         2.00000e-04],
       [ 2.10000e+01,  1.58800e-01,  1.50000e-03,  4.84493e+01,
         1.00000e-04],
       [ 2.15040e+01,  1.61400e-01,  6.0000

In [141]:
control_model = Sequential()
control_model.add(LSTM(32, activation='relu', input_shape=(14, 1)))
control_model.add(Dense(1))
control_model.compile(optimizer='adam', loss='mse')

control_history = control_model.fit(X_train, y, epochs=1000, validation_split=0.2, verbose=1)

Epoch 1/1000


ValueError: in user code:

    File "C:\Users\Admin\Desktop\Equities_Price_Prediction_XAI\venv\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\Admin\Desktop\Equities_Price_Prediction_XAI\venv\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Admin\Desktop\Equities_Price_Prediction_XAI\venv\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\Admin\Desktop\Equities_Price_Prediction_XAI\venv\lib\site-packages\keras\engine\training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\Admin\Desktop\Equities_Price_Prediction_XAI\venv\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\Admin\Desktop\Equities_Price_Prediction_XAI\venv\lib\site-packages\keras\engine\input_spec.py", line 295, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer "sequential_7" "                 f"(type Sequential).
    
    Input 0 of layer "lstm_7" is incompatible with the layer: expected shape=(None, None, 1), found shape=(None, 14, 5)
    
    Call arguments received by layer "sequential_7" "                 f"(type Sequential):
      • inputs=tf.Tensor(shape=(None, 14, 5), dtype=float32)
      • training=True
      • mask=None


In [35]:
NUM_OF_TIMESTEPS = 14
NUM_OF_FEATURES = 5

model = Sequential()
model.add(LSTM(32, activation='relu', input_shape=(NUM_OF_TIMESTEPS, NUM_OF_FEATURES)))
model.add(Dense(1))
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 32)                4864      
                                                                 
 dense_2 (Dense)             (None, 1)                 33        
                                                                 
Total params: 4,897
Trainable params: 4,897
Non-trainable params: 0
_________________________________________________________________


In [36]:
model.compile(optimizer='adam', loss='mse')

In [31]:
X_train = np.asarray(X_train).astype(np.float32)

In [38]:
model.fit(X_train, y)



<keras.callbacks.History at 0x2c35e4ceaf0>

In [39]:
print(y)

[ 23.0048 190.2488  19.5272  59.5112  40.8912 279.44    70.     210.24
  26.4992  14.6384 115.36   136.96    39.7488 169.5792 268.2988  58.2512
 102.5024  20.8768  26.6224 166.23    33.7008 131.7708  19.0904 113.2544
  20.44    91.8752 247.86    74.49    28.2528  37.29    27.13    36.48
  48.      25.41    46.752   21.73    26.1    100.85   293.47    66.7504
 178.6     28.49    72.26   119.      73.35    26.44    31.39    67.32
  27.283   64.41  ]


In [41]:
model.predict([X_train[0]])

ValueError: in user code:

    File "C:\Users\Admin\Desktop\Equities_Price_Prediction_XAI\venv\lib\site-packages\keras\engine\training.py", line 2041, in predict_function  *
        return step_function(self, iterator)
    File "C:\Users\Admin\Desktop\Equities_Price_Prediction_XAI\venv\lib\site-packages\keras\engine\training.py", line 2027, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Admin\Desktop\Equities_Price_Prediction_XAI\venv\lib\site-packages\keras\engine\training.py", line 2015, in run_step  **
        outputs = model.predict_step(data)
    File "C:\Users\Admin\Desktop\Equities_Price_Prediction_XAI\venv\lib\site-packages\keras\engine\training.py", line 1983, in predict_step
        return self(x, training=False)
    File "C:\Users\Admin\Desktop\Equities_Price_Prediction_XAI\venv\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\Admin\Desktop\Equities_Price_Prediction_XAI\venv\lib\site-packages\keras\engine\input_spec.py", line 295, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_2" is incompatible with the layer: expected shape=(None, 14, 5), found shape=(None, 5)
