In [28]:
import streamlit as st
from plotly import graph_objs as go
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential, load_model
from keras.layers import Dense, LSTM, Dropout
import numpy as np
import math
import matplotlib.pyplot as plt
import os
from keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

# Data

In [29]:
data = pd.read_csv("BRITANNIA.csv", index_col="Date", parse_dates=True)     # import data from .csv file
data = data[['Open']]       # truncate the data to keep only the required feature
data_arr = np.array(data)

In [30]:
# Normalize data

sc = MinMaxScaler(feature_range=(0,1))      # scaling the data to values between 0 and 1
scaled_data = sc.fit_transform(data)

df_X_train = scaled_data[:math.ceil(len(data) * .8)]

In [31]:
# create training data
# such that 50 records are stored for training and 51st record is used for validation while training the model

X_train = []
y_train = []

for i in range(50, df_X_train.shape[0]):
    X_train.append(df_X_train[i-50:i, 0])
    y_train.append(df_X_train[i, 0])

X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

In [32]:
# create testing data
# testing data is created with the same strategy as training data

test_data = scaled_data[len(df_X_train) - 50:, :]

X_test = []
y_test = data_arr[len(df_X_train):, :]

for i in range(50, len(test_data)):
    X_test.append(test_data[i-50:i, 0])

X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# Modeling

In [33]:
# designing the LSTM model

model = Sequential()
model.add(LSTM(128, input_shape=(X_train.shape[1], 1), return_sequences=True))
model.add(LSTM(64, input_shape=(X_train.shape[1], 1), return_sequences=False))
model.add(Dense(25,activation='relu'))
model.add(Dense(1,activation='relu'))

model.compile(optimizer='adam', loss='mean_squared_error')

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 50, 128)           66560     
                                                                 
 lstm_3 (LSTM)               (None, 64)                49408     
                                                                 
 dense_2 (Dense)             (None, 25)                1625      
                                                                 
 dense_3 (Dense)             (None, 1)                 26        
                                                                 
Total params: 117,619
Trainable params: 117,619
Non-trainable params: 0
_________________________________________________________________


In [34]:
callback = EarlyStopping(monitor='loss', patience=3)    # this will monitor the model training for overfitting
model.fit(X_train, y_train, epochs=100, batch_size=32, callbacks=[callback], verbose=1)    

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100


<keras.callbacks.History at 0x24eae21b940>

# Prediction

In [35]:
pred = model.predict(X_test)



In [36]:
pred = sc.inverse_transform(pred)   # predicted values are transformed back to original values from the normalized values

In [38]:
# create dataframes for visualization

train = data[:len(df_X_train)]
valid = data[len(df_X_train):]
valid['Predictions'] = pred



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



# Visualization

In [39]:
# plotting graph of the actual values and the predicted values for analysis

fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=train.index, y=train['Open'], name='Open'))
fig1.add_trace(go.Scatter(x=valid.index, y=valid['Open'], name=f'Actual Open'))
fig1.add_trace(go.Scatter(x=valid.index, y=valid['Predictions'], name=f'Predicted Open'))
fig1.layout.update(title_text='Predictions', xaxis_rangeslider_visible=True)
fig1

# Evaluation

In [51]:
MSE = mean_squared_error(y_test, pred)
print('MSE:', MSE)

RMSE = math.sqrt(MSE)
print('RMSE:', RMSE)

mape = mean_absolute_percentage_error(y_test, pred)
print('MAPE:', mape)

forecast_errors = [valid['Open'][i]-valid['Predictions'][i] for i in range(len(valid))]
bias = sum(forecast_errors) * 1.0/len(valid)
print('Bias:', bias)

MSE: 14593.185200192138
RMSE: 120.80225660223462
MAPE: 0.014578548691516131
Bias: 19.51441193633661


In [52]:
y_test.mean()

3879.7471253534404

In [None]:
# The model learns from the time-series data and performs predictions as expected.
# Further hyperparameter tuning can be performed to achieve better evaluations.