# Stock Prediction Example

This example uses a 2-layered LSTM model for predicting the next stock closing price based upon the previous 50 ones. The model is modified from [this article](https://neptune.ai/blog/predicting-stock-prices-using-machine-learning) and the complete example could be found in [this repository](https://github.com/george-kuanli-peng/stock-pred).

⚠️ This example is only for demonstration. Never use this in investment decisions.

## Configure program

In [None]:
# Append import path
import sys
sys.path.append('/root/.local/lib/python3.9/site-packages')

# Install dependencies
!PYTHONPATH=/root/.local/lib/python3.9/site-packages pip install "matplotlib>=3.5.2" "scikit-learn>=1.0.2"

In [None]:
# Global settings
WINDOW = 50
EPOCHS = 15
BATCH = 20
TEST_RATIO = .2

In [None]:
%%writefile train.py
import os
import pickle
from pathlib import Path

import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import Input, Model
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.models import load_model as tf_load_model


def transform_data(data):
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(data[:, None])
    return scaler, scaled_data


def extract_x_y(data, window, offset):
    # Ref: https://neptune.ai/blog/predicting-stock-prices-using-machine-learning
    x, y = [], []
    for i in range(offset, len(data)):
        x.append(data[i-window:i])
        y.append(data[i])
    return np.array(x), np.array(y)


def build_LSTM(x_train, units):
    l_in = Input(shape=(x_train.shape[1], 1))
    l_hid = LSTM(units=units, return_sequences=True)(l_in)
    l_hid = LSTM(units=units)(l_hid)
    l_out = Dense(units=1, activation='linear')(l_hid)
    model = Model(l_in, l_out)

    model.compile(loss='mean_squared_error', optimizer='adam')
    return model


def train_model(model, x_train, y_train, epochs, batch,
                interactive_progress=True, tensorboard_path=None,
                mlsteam_track=False):
    if mlsteam_track:
        track = get_mlsteam_track()
        track['epochs'] = epochs
        track['batch'] = batch
        track['x_train'] = x_train
        track['y_train'] = y_train

    callbacks = []
    if tensorboard_path:
        cb = TensorBoard(log_dir=tensorboard_path, histogram_freq=1)
        callbacks.append(cb)
    history = model.fit(x_train, y_train,
                        epochs=epochs, batch_size=batch,
                        verbose=1 if interactive_progress else 2,
                        validation_split=.1, shuffle=True,
                        callbacks=callbacks)
    return history


def predict(model, x_test):
    return model.predict(x_test)


def get_rmse(pred, actual):
    return np.sqrt(np.mean((pred-actual)**2))


def get_mape(pred, actual):
    return (np.fabs(actual-pred)/actual)[actual != 0].mean()


## Prepare historical data for training

In [None]:
# closing prices in history (at least 90 records are needed, the more the better)
import numpy as np

stock_prices = np.array([
        589.89001465,  571.69000244,  589.73999023,  576.83001709,
        577.86999512,  563.46002197,  586.7800293 ,  580.88000488,
        606.44000244,  604.69000244,  619.13000488,  630.84997559,
        625.2199707 ,  623.90002441,  605.11999512,  572.84002686,
        599.04998779,  605.13000488,  603.59002686,  598.7800293 ,
        610.11999512,  609.89001465,  617.69000244,  599.35998535,
        604.86999512,  616.59997559,  623.30999756,  620.83001709,
        623.71002197,  656.57000732,  679.82000732,  671.86999512,
        688.7199707 ,  680.76000977,  679.70001221,  677.91998291,
        678.90002441,  659.58001709,  644.65002441,  652.80999756,
        656.95001221,  685.70001221,  668.53997803,  653.38000488,
        650.59997559,  644.2199707 ,  646.2199707 ,  660.5       ,
        655.28997803,  649.26000977,  643.38000488,  657.61999512,
        644.7800293 ,  646.97998047,  677.34997559,  687.20001221,
        709.66998291,  709.73999023,  710.91998291,  714.63000488,
        699.09997559,  713.76000977,  709.98999023,  707.82000732,
        722.25      ,  717.16998291,  686.16998291,  665.71002197,
        688.98999023,  673.4699707 ,  680.26000977,  706.29998779,
        708.48999023,  711.20001221,  701.15997314,  711.91998291,
        730.90997314,  735.7199707 ,  734.09002686,  732.39001465,
        733.57000732,  752.91998291,  753.86999512,  754.85998535,
        736.27001953,  743.        ,  744.48999023,  755.83001709,
        756.98999023,  759.48999023,  730.16998291,  739.38000488,
        751.94000244,  753.64001465,  774.39001465,  791.35998535,
        777.55999756,  781.30999756,  775.47998047,  775.2199707 ,
        781.5300293 ,  780.59002686,  782.75      ,  793.60998535,
        785.48999023,  791.94000244,  805.7199707 ,  811.08001709,
        818.32000732,  843.0300293 ,  870.10998535,  864.27001953,
        865.79998779,  894.        ,  909.67999268, 1024.85998535,
       1018.42999268, 1037.85998535, 1077.04003906, 1114.        ,
       1208.58996582, 1172.        , 1213.85998535, 1229.91003418,
       1222.08996582, 1162.93994141, 1023.5       , 1067.94995117,
       1063.51000977, 1033.42004395, 1013.39001465, 1054.72998047,
       1089.01000977, 1096.38000488, 1137.06005859, 1156.86999512,
       1109.0300293 , 1116.        , 1081.92004395, 1136.98999023,
       1144.76000977, 1095.        , 1084.59997559, 1014.9699707 ,
       1009.01000977, 1051.75      , 1068.95996094, 1003.79998779,
       1017.0300293 ,  966.40997314,  958.51000977,  975.98999023,
        926.91998291,  932.57000732,  899.94000244,  938.5300293 ,
       1008.86999512, 1067.        , 1093.93994141, 1088.4699707 ,
       1086.18994141, 1070.33996582, 1056.7800293 , 1199.7800293 ,
       1149.58996582, 1088.11999512, 1064.69995117, 1026.95996094,
       1058.11999512, 1064.40002441, 1106.2199707 , 1031.56005859,
       1049.60998535, 1030.51000977,  995.65002441,  996.27001953,
        943.90002441,  930.        ,  918.40002441,  937.40997314,
        829.09997559,  846.34997559,  936.7199707 ,  931.25      ,
        905.65997314,  891.14001465,  923.32000732,  907.34002686,
        922.        ,  932.        ,  904.54998779,  860.        ,
        875.76000977,  922.42999268,  923.39001465,  876.34997559,
        856.97998047,  821.5300293 ,  764.03997803,  800.77001953,
        809.86999512,  870.42999268,  864.36999512,  879.89001465,
        839.28997803,  838.28997803,  804.58001709,  824.40002441,
        858.9699707 ,  838.29998779,  795.34997559,  766.36999512,
        801.89001465,  840.22998047,  871.59997559,  905.39001465,
        921.15997314,  993.97998047,  999.10998535, 1013.91998291,
       1010.64001465, 1091.83996582, 1099.56994629, 1093.98999023,
       1077.59997559, 1084.58996582, 1145.44995117, 1091.26000977,
       1045.76000977, 1057.26000977, 1025.48999023,  975.92999268,
        986.95001221, 1022.36999512,  985.        , 1004.28997803,
       1028.15002441,  977.20001221, 1008.7800293 , 1005.04998779,
        998.02001953,  876.41998291,  881.51000977,  877.51000977,
        870.76000977,  902.94000244,  909.25      ,  952.61999512,
        873.2800293 ,  865.65002441,  787.10998535,  800.03997803,
        734.        ])

In [None]:
from train import transform_data, extract_x_y

scaler, scaled_data = transform_data(stock_prices)

train_split = int(len(scaled_data) * (1.0 - TEST_RATIO))
scaled_data_train = scaled_data[:train_split]
x_train, y_train = extract_x_y(scaled_data_train, window=WINDOW, offset=WINDOW)

## Train model

In [None]:
from train import build_LSTM, train_model

model = build_LSTM(x_train, units=WINDOW)
train_model(model, x_train, y_train, EPOCHS, BATCH, interactive_progress=True)

In [None]:
from train import predict, get_rmse, get_mape

x_test, _ = extract_x_y(scaled_data, window=WINDOW, offset=train_split)
y_lstm_scaled = model.predict(x_test)
y_lstm = scaler.inverse_transform(y_lstm_scaled)
y_actual = stock_prices[train_split:]

rmse_lstm = get_rmse(pred=y_lstm, actual=y_actual)
mape_lstm = get_mape(pred=y_lstm, actual=y_actual)
print(f'RMSE: {rmse_lstm}, MAPE: {mape_lstm}')

In [None]:
import matplotlib.pyplot as plt

def plot_stock(dates, y_train, y_pred, y_actual, train_pos):
    fig = plt.figure(figsize=(20, 10))
    plt.plot(dates[:train_pos], y_train, label='Train Closing Price')
    plt.plot(dates[train_pos:], y_actual, label='Test Closing Price')
    plt.plot(dates[train_pos:], y_pred, label='Pred Closing Price')
    plt.title('Stock Price Prediction')
    plt.xlabel('Date')
    plt.ylabel('Stock Price')
    plt.legend(loc='upper left')

plot_stock(list(range(len(stock_prices))), stock_prices[:train_split], y_lstm, y_actual, train_split)