In [1]:
#!pip install tensorflow
#!pip install keras

In [2]:
# keras.layers.LSTM(units = 50, return_sequences = True, input_shape = (batch_size=32,timesteps=1,features=1)))
# keras.layers.GRU(units = 50, return_sequences = True, input_shape = (batch_size=32,timesteps=1,features=1)))
# only 2 arguments in input shape indicate any batch size 

In [3]:
import pandas as pd
import numpy as np

In [4]:
df=pd.read_csv("MSFT.csv", na_values=['null'], index_col='Date', parse_dates=True, infer_datetime_format=True)
df.head(5)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1986-03-13,0.088542,0.101563,0.088542,0.097222,0.061751,1031788800
1986-03-14,0.097222,0.102431,0.097222,0.100694,0.063956,308160000
1986-03-17,0.100694,0.103299,0.100694,0.102431,0.065059,133171200
1986-03-18,0.102431,0.103299,0.098958,0.099826,0.063405,67766400
1986-03-19,0.099826,0.100694,0.097222,0.09809,0.062302,47894400


In [5]:
df.tail(5)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-04-27,261.579987,263.190002,260.119995,261.970001,261.970001,31014200
2021-04-28,256.079987,256.540009,252.949997,254.559998,254.559998,46903100
2021-04-29,255.460007,256.100006,249.0,252.509995,252.509995,40589000
2021-04-30,249.740005,253.080002,249.600006,252.179993,252.179993,30929200
2021-05-03,253.399994,254.350006,251.119995,251.860001,251.860001,19598900


In [6]:
df.shape

(8857, 6)

In [7]:
#Set Target Variable
high = df['High']

In [8]:
# the following example is for 1 feature, 60 timesteps
X = []
y = []
for i in range(60, df.shape[0]):
    X.append(high.iloc[i-60:i])    # 0-59 rows
    y.append(high[i])              # 60th row
X = np.array(X)
y = np.array(y)
print(X[0], y[0])

[0.101563 0.102431 0.103299 0.103299 0.100694 0.09809  0.097222 0.092882
 0.092014 0.095486 0.096354 0.096354 0.095486 0.097222 0.098958 0.097222
 0.097222 0.097222 0.09809  0.098958 0.101563 0.101563 0.100694 0.105035
 0.105035 0.105035 0.102431 0.101563 0.100694 0.111979 0.121962 0.118924
 0.118056 0.115451 0.111979 0.111979 0.110243 0.111979 0.111111 0.111111
 0.111111 0.113715 0.112847 0.111979 0.112847 0.114583 0.111979 0.110243
 0.110243 0.108507 0.109375 0.111111 0.114583 0.118924 0.123264 0.121528
 0.118056 0.118924 0.118924 0.118924] 0.1189239993691444


In [9]:
X.shape

(8797, 60)

In [10]:
y.shape

(8797,)

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)

In [12]:
X_train.shape

(7037, 60)

In [13]:
X_test.shape

(1760, 60)

In [14]:
# add one more dimension to X_train i.e. (9023, 60) to (9023,60,1)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [15]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout

model = Sequential()
# LSTM input_shape(60,1) means number_of_timesteps=60, features=1)
# input_shape(batch_size,timesteps,features), only 2 arguments indicate any batch size 
model.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))
model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))
model.add(LSTM(units = 50))
model.add(Dropout(0.2))
model.add(Dense(units = 1))     # as we are dealing with one feature at a time


model.compile(optimizer = 'adam', loss = 'mean_squared_error')

In [16]:
from keras.callbacks import ModelCheckpoint, EarlyStopping

# ModelCheckpoint saves the model while monitoring a specific parameter of the model. 
# In this case we are monitoring loss. 
# The model will only be saved if the loss in current epoch is less than in the last epoch.
checkpoint = ModelCheckpoint("lstm_stock.h5", monitor='loss', verbose=1, 
                             save_best_only=True, save_weights_only=False, mode='auto')

# EarlyStopping stops training of the model early if there is no increase in the monitored parameter. 
# In this case we are monitoring loss. 
# patience=20 means that model will stop to train if it doesn’t see any decrease in loss in 20 epochs.
early = EarlyStopping(monitor='loss', min_delta=0, patience=20, verbose=1, mode='auto')

model.fit(X_train, y_train, epochs = 100, batch_size = 32, callbacks=[checkpoint,early])      

Epoch 1/100
Epoch 1: loss improved from inf to 2444.24780, saving model to lstm_stock.h5
Epoch 2/100
Epoch 2: loss improved from 2444.24780 to 1973.85339, saving model to lstm_stock.h5
Epoch 3/100
Epoch 3: loss improved from 1973.85339 to 1697.73889, saving model to lstm_stock.h5
Epoch 4/100
Epoch 4: loss improved from 1697.73889 to 1486.42944, saving model to lstm_stock.h5
Epoch 5/100
Epoch 5: loss improved from 1486.42944 to 1307.54614, saving model to lstm_stock.h5
Epoch 6/100
Epoch 6: loss improved from 1307.54614 to 1169.60315, saving model to lstm_stock.h5
Epoch 7/100
Epoch 7: loss improved from 1169.60315 to 1038.64832, saving model to lstm_stock.h5
Epoch 8/100
Epoch 8: loss improved from 1038.64832 to 927.32330, saving model to lstm_stock.h5
Epoch 9/100
Epoch 9: loss improved from 927.32330 to 832.39392, saving model to lstm_stock.h5
Epoch 10/100
Epoch 10: loss improved from 832.39392 to 736.54779, saving model to lstm_stock.h5
Epoch 11/100
Epoch 11: loss improved from 736.5477

Epoch 36/100
Epoch 36: loss did not improve from 44.85764
Epoch 37/100
Epoch 37: loss improved from 44.85764 to 39.64232, saving model to lstm_stock.h5
Epoch 38/100
Epoch 38: loss improved from 39.64232 to 38.40149, saving model to lstm_stock.h5
Epoch 39/100
Epoch 39: loss improved from 38.40149 to 37.44020, saving model to lstm_stock.h5
Epoch 40/100
Epoch 40: loss improved from 37.44020 to 31.65425, saving model to lstm_stock.h5
Epoch 41/100
Epoch 41: loss improved from 31.65425 to 29.75344, saving model to lstm_stock.h5
Epoch 42/100
Epoch 42: loss did not improve from 29.75344
Epoch 43/100
Epoch 43: loss improved from 29.75344 to 27.27531, saving model to lstm_stock.h5
Epoch 44/100
Epoch 44: loss did not improve from 27.27531
Epoch 45/100
Epoch 45: loss improved from 27.27531 to 26.37043, saving model to lstm_stock.h5
Epoch 46/100
Epoch 46: loss did not improve from 26.37043
Epoch 47/100
Epoch 47: loss improved from 26.37043 to 25.94625, saving model to lstm_stock.h5
Epoch 48/100
Epo

Epoch 75/100
Epoch 75: loss improved from 22.31797 to 22.30469, saving model to lstm_stock.h5
Epoch 76/100
Epoch 76: loss improved from 22.30469 to 22.24800, saving model to lstm_stock.h5
Epoch 77/100
Epoch 77: loss did not improve from 22.24800
Epoch 78/100
Epoch 78: loss did not improve from 22.24800
Epoch 79/100
Epoch 79: loss improved from 22.24800 to 22.15068, saving model to lstm_stock.h5
Epoch 80/100
Epoch 80: loss improved from 22.15068 to 21.97063, saving model to lstm_stock.h5
Epoch 81/100
Epoch 81: loss did not improve from 21.97063
Epoch 82/100
Epoch 82: loss did not improve from 21.97063
Epoch 83/100
Epoch 83: loss improved from 21.97063 to 21.95504, saving model to lstm_stock.h5
Epoch 84/100
Epoch 84: loss did not improve from 21.95504
Epoch 85/100
Epoch 85: loss improved from 21.95504 to 21.32822, saving model to lstm_stock.h5
Epoch 86/100
Epoch 86: loss did not improve from 21.32822
Epoch 87/100
Epoch 87: loss did not improve from 21.32822
Epoch 88/100
Epoch 88: loss di

<keras.callbacks.History at 0x23e49b136a0>

In [17]:
y_pred = model.predict(X_test)
y_pred



array([[ 0.0618515],
       [ 1.9811678],
       [27.031998 ],
       ...,
       [17.78685  ],
       [ 8.598194 ],
       [23.149738 ]], dtype=float32)