# LSTM

Predict the next day closing price of google using google and apple stock features.

👇 Load google 

In [29]:
import pandas as pd
apple_data = pd.read_csv("AAPL.csv")
apple_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2010-03-08,7.8575,7.860357,7.794643,7.824286,6.659394,429889600
1,2010-03-09,7.796786,8.035714,7.781786,7.965,6.779158,920259200
2,2010-03-10,7.993929,8.052857,7.971429,8.03,6.834479,596218000
3,2010-03-11,7.996786,8.053571,7.975714,8.053571,6.854542,405700400
4,2010-03-12,8.120357,8.133214,8.0625,8.092857,6.887981,416323600


In [30]:
data_train = apple_data[:2583]
data_test = apple_data[2583:]

## Preprocessing

👇 Making subsamples

In [31]:
import numpy as np
def subsample_sequence(sequence, length, horizon):

    # Bounds of sampling
    last_possible = len(sequence) - length - horizon
    random_start = np.random.randint(0, last_possible)

    # Sample
    X = sequence[random_start:random_start +length].drop(columns=['Close','Date']) # Remove target closing price & date
    y = sequence.loc[random_start+length+horizon-1]['Close'] # Target is closing price

    return X, y


subsample_sequence(apple_data, 3, 1)


(          Open       High        Low  Adj Close     Volume
 234  12.631429  12.697143  12.576786  10.797042  381040800
 235  12.685357  12.821429  12.673929  10.887021  482745200
 236  12.763929  12.857143  12.428571  10.776980  928550000,
 12.744643)

In [32]:
def get_X_y(sequence, length, horizon, number_of_samples):
    
    X, y = [], []
    
    # Do as many samples as specified
    for sample in range(1,number_of_samples + 1):
        
        # Record sample X & y
        xi, yi = subsample_sequence(sequence, length, horizon)
        X.append(np.array(xi.values.T.tolist()).T) # Getting the right shape (sequences, observations,features)
        y.append(yi)

    return np.array(X), np.array(y)

In [33]:
X_train, y_train = get_X_y(sequence=data_train,
               length=500,
               horizon=1,
               number_of_samples=10000)


In [34]:
# 1000 subsamples, each of lenght 100 , each containg 5 features
X_train.shape

(10000, 500, 5)

In [36]:
y_train = y_train.reshape(10000,1)

In [37]:
from tensorflow.keras import models
from tensorflow.keras import layers

def init_model():

    model = models.Sequential()

    model.add(layers.LSTM(10, return_sequences=True, activation='tanh', input_shape=(500,5)))
    model.add(layers.LSTM(20, return_sequences=True, activation='tanh'))
    model.add(layers.LSTM(10, return_sequences=True, activation='tanh'))
    model.add(layers.Dense(5, activation='relu'))
    model.add(layers.Dense(1, activation='linear'))

    model.compile(loss='mse',
                  optimizer='rmsprop',
                  metrics=['mae'])


    return model

init_model().summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_10 (LSTM)              (None, 500, 10)           640       
                                                                 
 lstm_11 (LSTM)              (None, 500, 20)           2480      
                                                                 
 lstm_12 (LSTM)              (None, 500, 10)           1240      
                                                                 
 dense_10 (Dense)            (None, 500, 5)            55        
                                                                 
 dense_11 (Dense)            (None, 500, 1)            6         
                                                                 
Total params: 4,421
Trainable params: 4,421
Non-trainable params: 0
_________________________________________________________________


In [38]:
from tensorflow.keras.callbacks import EarlyStopping
model = init_model()

es = EarlyStopping(patience=20)

model.fit(X_train, y_train, 
          epochs=50, 
          batch_size=32, 
          verbose=1, 
          callbacks = [es],
          validation_split=0.2)

Epoch 1/50