<a href="https://colab.research.google.com/github/cmcvista/MLHelloWorld/blob/main/LSTMExample.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LSTM Stock Price Prediction Example

Assuming the stock price should be dependent to history records, this example uses Long-Short-Term-Memory Neural Network to find the pattern in stock price, and then make some sensible predictions.

*Reference:*
- https://www.datacamp.com/community/tutorials/lstm-python-stock-market
- https://github.com/thushv89/datacamp_tutorials/blob/master/Reviewed/lstm_stock_market_prediction.ipynb
- https://www.tensorflow.org/guide/keras/rnn
- https://www.analyticsvidhya.com/blog/2017/12/fundamentals-of-deep-learning-introduction-to-lstm/


In [1]:
# Load Data
import pandas as pd
df = pd.read_csv("KO.csv")


In [2]:
# Display the CSV content
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2019-11-08,52.459999,52.599998,52.09,52.209999,50.501057,7141800
1,2019-11-11,52.330002,52.369999,51.779999,51.84,50.143169,8198300
2,2019-11-12,51.91,51.91,51.580002,51.709999,50.017422,12656900
3,2019-11-13,52.18,52.450001,51.959999,52.41,50.694511,12257900
4,2019-11-14,52.529999,52.669998,52.349998,52.630001,50.90731,8660300


In [3]:
# For simplicity, we only want Open price
open_prices = df.loc[:,'Open'].array
print("Size: ", open_prices.size)

# Set training and testing dataset size
train_data_len = open_prices.size*7//10   # 80%

# Separate training and testing dataset
train_data = open_prices[:train_data_len]
test_data = open_prices[train_data_len:]
print(train_data.size)
print(test_data.size)


Size:  252
176
76


In [4]:
import numpy as np

# Generate one batch sized with "batch_size" from a "data"
# For example
# input_dataset: x0, x1 ... x100,
# data: x0, x10, x20 ... x90
# label: x1, x11, x21 ... x91
def gen_one_batch(input_dataset, batch_size):
  batch_data = np.zeros(batch_size, dtype=np.float32)
  batch_labels = np.zeros(batch_size, dtype=np.float32)
  for i in range(batch_size):
    batch_data[i] = input_dataset[batch_size*i]
    batch_labels[i]= input_dataset[batch_size*i+1]
  return batch_data, batch_labels

# Generate many batches sized "batch_size"
# For example
# no_of_batches = 3, batch_size = 10
# input_dataset: x0, x1 ... x100,
# data: [x0, x10, x20 ... x90], [x1, x11, x21 ... x91], [x2, x12, x22 ... x92]
# labels: [x1, x11, x21 ... x91], [x2, x12, x22 ... x92], [x3, x13, x23 ... x93]
def gen_batches(input_dataset, no_of_batches, batch_size):
  data_batches, labels_batches = [], []
  for i in range(no_of_batches):
    data, labels = gen_one_batch(input_dataset[i:], batch_size)
    data_batches.append(data)
    labels_batches.append(labels)
  return data_batches, labels_batches

# One batch contains 6 days data
batch_size = 6
train_X, train_Y = gen_batches(train_data, train_data.size//batch_size, batch_size)
test_X, test_Y = gen_batches(test_data, test_data.size//batch_size, batch_size)

print("Training Set: ", len(train_X))
print("Testing Set: ", len(test_X))


Training Set:  29
Testing Set:  12


In [5]:
from tensorflow import keras
from tensorflow.keras import layers

input_dim = batch_size
units = 20
output_size = batch_size

# Build the RNN model
def build_model(allow_cudnn_kernel=True):
    # CuDNN is only available at the layer level, and not at the cell level.
    # This means `LSTM(units)` will use the CuDNN kernel,
    # while RNN(LSTMCell(units)) will run on non-CuDNN kernel.
    if allow_cudnn_kernel:
        # The LSTM layer with default options uses CuDNN.
        lstm_layer = keras.layers.LSTM(units, input_shape=(None, input_dim))
    else:
        # Wrapping a LSTMCell in a RNN layer will not use CuDNN.
        lstm_layer = keras.layers.RNN(
            keras.layers.LSTMCell(units), input_shape=(None, input_dim)
        )
    model = keras.models.Sequential(
        [
            lstm_layer,
            #keras.layers.BatchNormalization(),
            keras.layers.Dense(output_size),
        ]
    )
    return model

In [6]:
model = build_model(allow_cudnn_kernel=False)

model.compile(
    loss=keras.losses.CategoricalCrossentropy(from_logits=True),
    optimizer="sgd",
    metrics=["accuracy"],
)

model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
rnn (RNN)                    (None, 20)                2160      
_________________________________________________________________
dense (Dense)                (None, 6)                 126       
Total params: 2,286
Trainable params: 2,286
Non-trainable params: 0
_________________________________________________________________


In [7]:
train_X_reshaped = np.reshape(train_X, (len(train_X), 1, len(train_X[0])))
train_Y_reshaped = np.reshape(train_Y, (len(train_Y), len(train_Y[0])))

test_X_reshaped = np.reshape(test_X, (len(test_X), 1, len(test_X[0])))
test_Y_reshaped = np.reshape(test_Y, (len(test_Y), len(test_Y[0])))

model.fit(train_X_reshaped, train_Y_reshaped, batch_size=batch_size, epochs=5)
predictions = model(test_X_reshaped)
print(predictions)
print(test_Y_reshaped)

results = model.evaluate(test_X_reshaped, test_Y_reshaped, batch_size=batch_size)
print("test loss, test acc:", results)



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
tf.Tensor(
[[74.67931  76.10727  76.3457   75.96924  76.654724 78.49681 ]
 [74.67931  76.107254 76.34567  75.9693   76.65476  78.49681 ]
 [74.679306 76.10724  76.34564  75.969345 76.65479  78.496796]
 [74.6793   76.107216 76.34558  75.96947  76.65486  78.49679 ]
 [74.679306 76.10723  76.34561  75.969406 76.65483  78.496796]
 [74.679306 76.10724  76.34563  75.969406 76.65482  78.4968  ]
 [74.67931  76.10722  76.34558  75.9695   76.654884 78.496796]
 [74.67931  76.10723  76.34561  75.96945  76.65486  78.4968  ]
 [74.67931  76.10723  76.34561  75.96947  76.65486  78.4968  ]
 [74.67931  76.10725  76.34564  75.96939  76.65481  78.49681 ]
 [74.67931  76.10725  76.34563  75.96942  76.65483  78.49681 ]
 [74.679306 76.10722  76.3456   75.96948  76.65487  78.4968  ]], shape=(12, 6), dtype=float32)
[[48.4  47.14 48.46 48.12 48.25 50.94]
 [48.18 46.29 48.3  47.27 48.75 50.93]
 [48.34 46.9  48.26 47.34 49.8  49.6 ]
 [48.14 46.66 48.34 47.43 49.31 50