In [1]:
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D
#import helper
import time
import os
from sklearn.metrics import mean_squared_error
import numpy as np
import matplotlib.pyplot as plt

In [54]:
#helper functions

import pprint


def normalize_windows(win_data):
    """ Normalize a window
    Input: Window Data
    Output: Normalized Window

    Note: Run from load_data()

    Note: Normalization data using n_i = (p_i / p_0) - 1,
    denormalization using p_i = p_0(n_i + 1)
    """
    norm_data = []
    for w in win_data:
        norm_win = [((float(p) / float(w[0])) - 1) for p in w]
        norm_data.append(norm_win)
    return norm_data


def load_data(filename, seq_len, norm_win):
    """
    Loads the data from a csv file into arrays

    Input: Filename, sequence Length, normalization window(True, False)
    Output: X_tr, Y_tr, X_te, Y_te

    Note: Normalization data using n_i = (p_i / p_0) - 1,
    denormalization using p_i = p_0(n_i + 1)

    Note: Run from timeSeriesPredict.py
    """
    fid = open(filename, 'r').read()
    data = fid.split('\n')
    sequence_length = seq_len + 1
    out = []
    for i in range(len(data) - sequence_length):
        out.append(data[i: i + sequence_length])
    if norm_win:
        out = normalize_windows(out)
    out = np.array(out)
    split_ratio = 0.9
    split = round(split_ratio * out.shape[0])
    train = out[:int(split), :]
    np.random.shuffle(train)
    X_tr = train[:, :-1]
    Y_tr = train[:, -1]
    X_te = out[int(split):, :-1]
    Y_te = out[int(split):, -1]
    X_tr = np.reshape(X_tr, (X_tr.shape[0], X_tr.shape[1], 1))
    X_te = np.reshape(X_te, (X_te.shape[0], X_te.shape[1], 1))
    return [X_tr, Y_tr, X_te, Y_te]


def predict_seq_mul(model, data, win_size, pred_len):
    """
    Predicts multiple sequences
    Input: keras model, testing data, window size, prediction length
    Output: Predicted sequence

    Note: Run from timeSeriesPredict.py
    """
    pred_seq = []
    for i in range(len(data)//pred_len):
        current = data[i * pred_len]
        predicted = []
        for j in range(pred_len):
            predicted.append(model.predict(current[None, :, :])[0, 0])
            current = current[1:]
            current = np.insert(current, [win_size - 1], predicted[-1], axis=0)
        pred_seq.append(predicted)
    return pred_seq


def predict_pt_pt(model, data):
    """
    Predicts only one timestep ahead
    Input: keras model, testing data
    Output: Predicted sequence

    Note: Run from timeSeriesPredict.py
    """
    predicted = model.predict(data)
    predicted = np.reshape(predicted, (predicted.size, ))
    return predicted


def plot_mul(Y_hat, Y, pred_len):
    """
    PLots the predicted data versus true data

    Input: Predicted data, True Data, Length of prediction
    Output: return plot

    Note: Run from timeSeriesPredict.py
    """
    fig = plt.figure(facecolor='white')
    ax = fig.add_subplot(111)
    ax.plot(Y, label='Y')
    # Print the predictions in its respective series-length
    for i, j in enumerate(Y_hat):
        shift = [None for p in range(i * pred_len)]
        plt.plot(shift + j, label='Y_hat')
        plt.legend()
    plt.show()

In [55]:
# Load Data
seq_len = 50
norm_win = True
print("For debugging purposes, the current directory is",os.getcwd())
#filename = './../sp500_prices.csv'
filename = './../../sp500_prices.csv'
X_tr, Y_tr, X_te, Y_te = load_data(filename, seq_len, norm_win)

For debugging purposes, the current directory is c:\Users\jlsim\OneDrive\Documents\codingfolder\Tradingtools\Stock-Price-Prediction\src


In [49]:
# Model Build
model = Sequential()
model.add(LSTM(input_dim=1,
               units=seq_len,
               return_sequences=True))
model.add(Dropout(0.2))
model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(LSTM(100,
               return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=1))  # Linear dense layer to aggregate into 1 val
model.add(Activation('linear'))
timer_start = time.time()
model.compile(loss='mse', optimizer='rmsprop')
print('Model built in: ', time.time()-timer_start)

Model built in:  0.016638517379760742


In [58]:
# Training model
model.fit(X_tr,
          Y_tr,
          batch_size=512,
          epochs=200,
          validation_split=0.05
          )

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.src.callbacks.History at 0x145885a1510>

In [None]:
# Predictions
win_size = seq_len
pred_len = seq_len
plot = True


if plot:
    pred = predict_seq_mul(model, X_te, win_size, pred_len)
    plot_mul(pred, Y_te, pred_len)
else:
    pred = predict_pt_pt(model, X_te)
    mse_model = mean_squared_error(Y_te, pred)
    print("MSE of DL model ", mse_model)
    # Stupid Model
    y_bar = np.mean(X_te, axis=1)
    y_bar = np.reshape(y_bar, (y_bar.shape[0]))
    mse_base = mean_squared_error(Y_te, y_bar)
    print("MSE of y_bar Model", mse_base)
    # t-1 Model
    y_t_1 = X_te[:, -1]
    y_t_1 = np.reshape(y_t_1, (y_t_1.shape[0]))
    mse_t_1 = mean_squared_error(Y_te, y_t_1)
    print("MSE of t-1 Model", mse_t_1)
    # Comparisons
    improv = (mse_model - mse_base)/mse_base
    improv_t_1 = (mse_model - mse_t_1)/mse_t_1
    print("%ge improvement over naive model", improv)
    print("%ge improvement over t-1 model", improv_t_1)
    corr_model = np.corrcoef(Y_te, pred)
    corr_base = np.corrcoef(Y_te, y_bar)
    corr_t_1 = np.corrcoef(Y_te, y_t_1)
    print("Correlation of y_bar \n ", corr_base, "\n t-1 model \n", corr_t_1,
          "\n DL model\n", corr_model)