##Importing libraries and data

In [30]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler                   
from tensorflow.keras import Sequential                             
from tensorflow.keras.layers import LSTM,Dense,Dropout,Activation
import numpy as np
import plotly.graph_objects as go

In [31]:
df = pd.read_csv("data.csv")
print(len(df))
df.head() 

554


Unnamed: 0,Symbol,Series,Date,Prev Close,Open Price,High Price,Low Price,Last Price,Close Price,Average Price,Total Traded Quantity,Turnover,No. of Trades,Deliverable Qty,% Dly Qt to Traded Qty
0,SBIN,EQ,01-Mar-2018,268.0,266.3,270.0,261.25,262.2,262.15,264.62,22859175,6049017000.0,173004,7121896,31.16
1,SBIN,EQ,05-Mar-2018,262.15,260.45,265.0,258.75,263.15,263.5,262.51,18484565,4852294000.0,146913,4941066,26.73
2,SBIN,EQ,06-Mar-2018,263.5,265.85,266.0,255.0,255.35,256.55,261.43,20778164,5431996000.0,147230,6144299,29.57
3,SBIN,EQ,07-Mar-2018,256.55,255.05,255.35,246.05,246.7,246.65,248.97,31262500,7783271000.0,180424,11540402,36.91
4,SBIN,EQ,08-Mar-2018,246.65,247.0,257.95,244.3,256.65,256.8,251.48,33133336,8332437000.0,171219,7379164,22.27


##Parameters of the model

In [32]:
time_step = 30
train_test = 0.82
predict_next_days = 7
no_of_epochs = 50
batch_size = 8

##Model data preprocessing


In [33]:
df.columns

Index(['Symbol', 'Series', 'Date', 'Prev Close', 'Open Price', 'High Price',
       'Low Price', 'Last Price', 'Close Price', 'Average Price',
       'Total Traded Quantity', 'Turnover', 'No. of Trades', 'Deliverable Qty',
       '% Dly Qt to Traded Qty'],
      dtype='object')

In [34]:
len(df)

554

In [35]:
#Initalising empty list for testing and training data
X_train = []
y_train = []
X_test = []
y_test = []

In [36]:
#We are going to predict the closing price of the stock, so in the columns of stock we have the 'Close Price'. We can use this.
closed_data = np.asarray(df['Close Price'])
#Reshaping to automatically make it a 2d-array which is the input for sequential model
closed_data = closed_data.reshape(-1,1)

In [37]:
#We scale the data to make the data modelling easier
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(closed_data)

In [38]:
#We find the index for train_test_split as spefied ratio by the user above in the parameters
training_data_len = int(len(df)*train_test)
train_data = scaled_data[0:training_data_len,:]
test_data = scaled_data[training_data_len-time_step:]

print(len(train_data))
print(len(test_data))

454
130


In [59]:
def create_dataset(dataset, time_step=1):
  dataX, dataY = [], []
  for i in range(len(dataset)-time_step) :
    a = dataset[i: (i+time_step), 0]
    dataX.append(a)
    dataY. append (dataset[i + time_step, 0])
  return np.array(dataX), np.array(dataY)

In [60]:
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(424, 30) (424,)
(100, 30) (100,)


In [61]:
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [62]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(424, 30, 1) (424,)
(100, 30, 1) (100,)


## Model processing


In [63]:
#Building the LSTM model
model = Sequential()
model.add(LSTM(units=32, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(units=15, return_sequences=False))
# model.add(LSTM(units=16, return_sequences=True))
# model.add(LSTM(units=8, return_sequences=False))
# model.add(Dense(64))
# model.add(Dense(32))
# model.add(Dense(16))
model.add(Dense(1))
model.compile(optimizer='rmsprop', loss='mean_squared_error')
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_4 (LSTM)               (None, 30, 32)            4352      
                                                                 
 lstm_5 (LSTM)               (None, 15)                2880      
                                                                 
 dense_2 (Dense)             (None, 1)                 16        
                                                                 
Total params: 7,248
Trainable params: 7,248
Non-trainable params: 0
_________________________________________________________________


In [64]:
# Fitting the training data and fitting the data to LSTM.
# It uses a mse loss, we usually use MSE for regression. We can use Adam or Rmsprop optimizer
model.fit(X_train, y_train, batch_size=batch_size, epochs=no_of_epochs)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f57f0883710>

## Loss values

In [65]:
#Predict the data for test data and checked the mse for scalled data
predictions = model.predict(X_test)
mse = np.mean(predictions-y_test)**2
predictions = scaler.inverse_transform(predictions)
actual_data = scaler.inverse_transform(y_test.reshape(-1,1))
mse

6.650140948793848e-05

In [66]:
#MSE for actual data
np.mean(actual_data-predictions)**2

6.281977749436374

## Printing Predictions

In [67]:
#Just printing and showing the predicted and actual data - this is a size of time_step
for i in range(len(predictions)):
  print(predictions[i],actual_data[i])

[496.66794] [491.25]
[499.23645] [503.65]
[516.9477] [505.95]
[515.831] [510.25]
[520.07587] [511.35]
[519.388] [508.35]
[514.1335] [514.]
[521.75854] [506.8]
[508.789] [515.8]
[522.59265] [511.4]
[512.08636] [502.7]
[501.80698] [494.15]
[493.65768] [514.65]
[523.1648] [528.95]
[533.188] [523.45]
[519.74695] [538.3]
[544.31006] [532.3]
[529.80414] [539.8]
[544.4025] [540.1]
[541.7382] [530.3]
[528.7641] [533.25]
[538.03894] [531.35]
[533.21027] [535.25]
[539.06934] [540.55]
[544.1044] [529.6]
[526.39496] [501.4]
[494.1726] [524.8]
[537.14526] [516.7]
[513.6895] [512.95]
[512.40027] [515.3]
[517.3561] [511.85]
[511.10657] [498.4]
[494.66647] [498.7]
[500.6038] [472.65]
[465.6453] [482.95]
[487.81125] [483.2]
[484.02066] [474.25]
[471.44568] [467.4]
[465.8326] [461.95]
[461.07095] [440.3]
[435.23737] [440.3]
[440.67535] [451.7]
[455.08237] [468.7]
[473.40765] [470.35]
[470.67065] [485.15]
[490.048] [485.85]
[487.5035] [492.75]
[497.37408] [501.9]
[508.2505] [490.6]
[490.6752] [493.7]
[49

In [68]:
predict = []
for i in predictions:
  predict.append(i[0])
print(predict)

[496.66794, 499.23645, 516.9477, 515.831, 520.07587, 519.388, 514.1335, 521.75854, 508.789, 522.59265, 512.08636, 501.80698, 493.65768, 523.1648, 533.188, 519.74695, 544.31006, 529.80414, 544.4025, 541.7382, 528.7641, 538.03894, 533.21027, 539.06934, 544.1044, 526.39496, 494.1726, 537.14526, 513.6895, 512.40027, 517.3561, 511.10657, 494.66647, 500.6038, 465.6453, 487.81125, 484.02066, 471.44568, 465.8326, 461.07095, 435.23737, 440.67535, 455.08237, 473.40765, 470.67065, 490.048, 487.5035, 497.37408, 508.2505, 490.6752, 499.6948, 494.78616, 490.71857, 496.2299, 503.146, 496.76077, 496.6821, 495.9174, 515.30206, 515.0195, 509.7503, 517.9358, 517.63, 518.9734, 517.1221, 514.9555, 522.2361, 508.81693, 515.3954, 510.6874, 520.7342, 496.49893, 495.3947, 511.4865, 495.29724, 512.23047, 493.25912, 490.93427, 478.1943, 482.19595, 486.73447, 474.3399, 476.11322, 478.1828, 459.71765, 441.30426, 459.12646, 471.92624, 455.85556, 445.50287, 467.73532, 462.1748, 463.90137, 454.14893, 475.365, 471.627

In [69]:
#Predict for the next x days gice. Here x is the predict_next_days. Input the parameter in parameters section
count = predict_next_days
next_days = []
closed_data_list = list(closed_data)
while(count!=0):
  last = closed_data_list[-time_step:]
  print(len(last), last)
  last_data = np.asarray(last, dtype='float32')
  last_data = last_data.reshape(-1,1)
  last_scalled = scaler.transform(last_data)
  x_test = []
  x_test.append(last_scalled)
  x_test = np.array(x_test)
  x_test = np.reshape(x_test,(x_test.shape[0],x_test.shape[1], 1))
  prediction = model.predict(x_test)
  prediction = scaler.inverse_transform(prediction)
  next_days.append(prediction[0][0])
  closed_data_list.append(prediction[0])
  count-=1

print(next_days)


30 [array([500.6]), array([494.75]), array([505.5]), array([497.25]), array([507.5]), array([496.3]), array([491.]), array([479.65]), array([480.]), array([483.95]), array([475.9]), array([475.2]), array([476.55]), array([462.65]), array([444.65]), array([455.]), array([467.3]), array([458.]), array([447.5]), array([462.4]), array([460.9]), array([462.05]), array([454.1]), array([468.9]), array([468.95]), array([474.6]), array([468.1]), array([468.3]), array([469.85]), array([464.5])]
30 [array([494.75]), array([505.5]), array([497.25]), array([507.5]), array([496.3]), array([491.]), array([479.65]), array([480.]), array([483.95]), array([475.9]), array([475.2]), array([476.55]), array([462.65]), array([444.65]), array([455.]), array([467.3]), array([458.]), array([447.5]), array([462.4]), array([460.9]), array([462.05]), array([454.1]), array([468.9]), array([468.95]), array([474.6]), array([468.1]), array([468.3]), array([469.85]), array([464.5]), array([466.34515], dtype=float32)]
3

In [70]:
train = list(df['Close Price'])[:training_data_len]
print(train)
test = list(df['Close Price'])[training_data_len:]
print(test)

date_train = list(df['Date'])[:training_data_len]
date_test = list(df['Date'])[training_data_len:]

date_next = ['1st Day','2nd Day','3rd Day','4th Day','5th Day','6th Day','7th Day']

[262.15, 263.5, 256.55, 246.65, 256.8, 253.25, 252.85, 254.7, 257.05, 253.7, 252.3, 247.95, 249.1, 248.0, 241.55, 234.8, 246.5, 254.35, 249.9, 246.15, 250.5, 247.3, 259.3, 259.7, 260.65, 263.3, 257.05, 253.8, 250.95, 249.0, 248.15, 246.45, 246.15, 241.2, 242.45, 240.6, 237.2, 233.2, 242.6, 246.4, 241.0, 241.9, 241.95, 246.5, 250.05, 248.0, 247.95, 250.35, 253.6, 248.0, 243.1, 242.7, 238.85, 244.45, 253.9, 263.2, 268.5, 266.95, 272.05, 264.3, 267.15, 269.55, 266.7, 263.0, 263.55, 270.15, 269.65, 273.0, 274.0, 282.85, 287.7, 282.6, 277.45, 276.85, 272.6, 273.3, 268.65, 273.05, 268.45, 267.4, 261.7, 257.0, 259.35, 258.85, 257.5, 257.45, 256.45, 257.2, 261.55, 263.75, 259.1, 262.75, 257.75, 251.6, 259.05, 259.55, 260.2, 261.5, 266.35, 266.85, 271.85, 287.7, 286.75, 297.4, 293.5, 295.1, 293.9, 299.25, 308.5, 304.05, 308.7, 317.4, 304.45, 294.15, 294.9, 292.7, 302.1, 307.65, 307.0, 302.45, 300.45, 308.0, 305.35, 309.7, 308.6, 309.6, 306.35, 296.4, 296.55, 296.45, 291.65, 284.85, 282.6, 285.3

## Graph

In [71]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=date_train, y=train,mode='lines',name='Train Data'))
fig.add_trace(go.Scatter(x=date_test, y=test,mode='lines',name='Test Data'))
fig.add_trace(go.Scatter(x=date_test, y=predict,mode='lines',name='Test Predicted Data'))
fig.add_trace(go.Scatter(x=date_next, y=next_days,mode='lines+markers',name='Predicted Data'))
fig.update_xaxes(
  rangeslider_visible=True,
  rangeselector=dict(
      buttons=list([
                    dict(count=1,
                          label='1y',
                          step='year',
                          stepmode='todate'),
                    dict(count=2,
                          label='2y',
                          step='year',
                          stepmode='todate'),
                    dict(count=3,
                          label='3y',
                          step='year',
                          stepmode='todate'),
                    dict(step='all')
      ])
  )
)
fig.update_layout(
  title={'text': 'Stock Data',
          'x':0.5,
          'xanchor': 'center',
          'yanchor': 'top'},
  xaxis_title="Date",
  yaxis_title="Close Values",
)
fig.show()