# Introduction

In this notebook, I'm demonstrating Recurrent Neural Network for time-series and predictive analysis. Here, I'm using Gated Recurrent Unit(GRU), Long Short Term Memory(LSTM), and Bidirectional Long Short Term Memory(BiLSTM) for comparing their performances on NSE Tata stock market data. Then using the best model I'm predictions for next 5years. 

# Data

**NSE Stock Market** data is used for study. The dataset exhibits daily stock prices from 2010 to 2018 including company's each day opening, closing, high, low, total quantity and  total turnover. 

# Implementation

### Import Libraries

In [None]:
from IPython.core.pylabtools import figsize
from pandas.plotting import register_matplotlib_converters
from sklearn.preprocessing import MinMaxScaler, StandardScaler, Normalizer, QuantileTransformer
from keras.models import Sequential
from keras.layers import Dense, LSTM, GRU, Bidirectional, Activation, Dropout
from keras import callbacks
from keras.regularizers import l2
from pandas import DataFrame
import random
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy
import tensorflow as tf

### Data Reading & Exploration

In [None]:
Dataset = pd.read_csv('Stock_Data.csv', parse_dates = ['Date'], index_col = 'Date')
Dataset.head()
df = Dataset.copy()

In [None]:
def timeseries (x_axis, y_axis, x_label, y_label):
    plt.figure(figsize = (12, 7))
    plt.plot(x_axis, y_axis, color ='black')
    plt.xlabel(x_label, {'fontsize': 12}) 
    plt.ylabel(y_label, {'fontsize': 12})
#     plt.savefig('E:/Forecast/1.jpg', format='jpg', dpi=1000)

timeseries(df.index, df['Turnover_Lacs'], 'Time (day)','Turnover(Lacs)')

## Different plots to better understand the data and its distribution

In [None]:
sns.distplot(df['Open'], hist = True, color = 'r')
plt.show()
sns.distplot(df['High'], hist = True, color = 'r')
plt.show()
sns.distplot(df['Low'], hist = True, color = 'r')
plt.show()
sns.distplot(df['Last'], hist = True, color = 'r')
plt.show()
sns.distplot(df['Total_Trade_Quantity'], hist = True, color = 'r')
plt.show()
sns.distplot(df['Turnover_Lacs'], hist = True, color = 'r')
plt.show()

sns_heat = sns.heatmap(df.corr(), annot = True)
# plt.savefig('E:/Forecast/2.jpg', format='jpg', dpi=1000)
sns_pair = sns.pairplot(df)
# plt.savefig('E:/Forecast/3.jpg', format='jpg', dpi=1000)

## Data Preprocessing

In [None]:
# Checking data distribution:

fig, ax = plt.subplots(4,2, figsize=(16,16))
sns.distplot(Dataset.Open, bins = 20, ax=ax[0,0]) 
sns.distplot(Dataset.High, bins = 20, ax=ax[0,1]) 
sns.distplot(Dataset.Low, bins = 20, ax=ax[1,0]) 
sns.distplot(Dataset.Last, bins = 20, ax=ax[1,1]) 
sns.distplot(Dataset.Close, bins = 20, ax=ax[2,0])
sns.distplot(Dataset.Total_Trade_Quantity, bins = 20, ax=ax[2,1])
sns.distplot(Dataset.Turnover_Lacs, bins = 20, ax=ax[3,0]) 

In [None]:
# checking outliers
columns = list(Dataset.columns)[:-1]
fig, ax = plt.subplots(4,2, figsize=(16,16))

for idx, col in enumerate(columns):
    sns.boxplot(data = Dataset, x = col, ax=ax[idx//2, idx % 2])
plt.show()


In [None]:
#tranforming the data using Quantile transformation
X = Dataset.iloc[:, :]
qt = QuantileTransformer(n_quantiles=1000, output_distribution='normal')
X_qt = qt.fit_transform(X)
X_qt_df = pd.DataFrame(X_qt, columns=X.columns)

In [None]:
# Checking data distribution:

fig, ax = plt.subplots(3,2, figsize=(16,16))
sns.distplot(X_qt_df.Open, bins = 20, ax=ax[0,0]) 
sns.distplot(X_qt_df.High, bins = 20, ax=ax[0,1]) 
sns.distplot(X_qt_df.Low, bins = 20, ax=ax[1,0]) 
sns.distplot(X_qt_df.Last, bins = 20, ax=ax[1,1]) 
sns.distplot(X_qt_df.Close, bins = 20, ax=ax[2,0])
sns.distplot(X_qt_df.Total_Trade_Quantity, bins = 20, ax=ax[2,1])
sns.distplot(X_qt_df.Turnover_Lacs, bins = 20, ax=ax[2,1])

In [None]:
#+++++++++++++++++++++++++++++Splitting Data+++++++++++++++++++++++++++
train_size = int(len(X_qt_df) * 0.75)
print(train_size)
training_data, testing_data = X_qt_df.iloc[:train_size], X_qt_df.iloc[train_size:]

#+++++++++++++++++++++++Plotting Train and Test Data+++++++++++++++++++
plt.figure(figsize = (15, 9))
plt.plot(training_data.Turnover_Lacs)
plt.plot(testing_data.Turnover_Lacs)
plt.xlabel('Time (day)')
plt.ylabel('Turnover(Lacs)')
plt.legend(['Train set', 'Test set'], loc='upper right')

print('Dimension of Training Data: ',training_data.shape)
print('Dimension of Testing Data: ', testing_data.shape)


Train_X = training_data.drop('Turnover_Lacs', axis = 1)
Train_Y = training_data.loc[:, ['Turnover_Lacs']]

Test_X = testing_data.drop('Turnover_Lacs', axis = 1)
Test_Y = testing_data.loc[:, ['Turnover_Lacs']]

Train_X = Train_X.to_numpy()
Train_Y = Train_Y.to_numpy()
Test_X = Test_X.to_numpy()
Test_Y = Test_Y.to_numpy()

#+++++++++++++++++++++++Creating Dataset++++++++++++++++++++++++++++
def create_dataset(X, y, time_steps = 1):
    X_list, y_list = [], []
    for i in range(len(X) - time_steps):
        v = X[i:i + time_steps, :]
        X_list.append(v)
        y_list.append(y[i+time_steps])
    return np.array(X_list), np.array(y_list)

Time_steps = 10

X_train, y_train = create_dataset(Train_X, Train_Y, Time_steps)
X_test, y_test = create_dataset(Test_X, Test_Y, Time_steps)


print('X_train.shape: ', X_train.shape)
print('y_train.shape: ', y_train.shape)
print('X_test.shape: ', X_test.shape)
print('y_test.shape: ', y_test.shape)

## Model Building and Fitting

In [None]:
def create_GRU_LSTM_model(m, units):

    model = Sequential()
    model.add(m(units = units, input_shape = [X_train.shape[1], X_train.shape[2]], kernel_regularizer = l2(0.0001), return_sequences = True))
    model.add(Activation('relu'))
    model.add(Dropout(0.25))
    model.add(m(units = units))
    model.add(Dense(units = 1))
    model.compile(optimizer = 'adam', loss = 'mean_absolute_error')
    return model

def create_BiLSTM_model(units):

    model = Sequential()
    model.add(Bidirectional(LSTM(units = units, kernel_regularizer = l2(0.0001),return_sequences=True),input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Activation('relu'))
    model.add(Dropout(0.25))
    model.add(Bidirectional(LSTM(units = units)))
    model.add(Dense(units = 1))
    model.compile(loss='mean_absolute_error', optimizer='adam')
    return model

model_gru = create_GRU_LSTM_model(GRU, 64)
model_lstm = create_GRU_LSTM_model(LSTM, 64)
model_bilstm = create_BiLSTM_model(64)

def fit_model(model):
    early_stop = callbacks.EarlyStopping(monitor = 'val_loss', patience = 10)
    history = model.fit(X_train, y_train, epochs = 400, validation_split = 0.2, batch_size = 32, shuffle = False, callbacks = [early_stop])
    return history

history_gru = fit_model(model_gru)
history_lstm = fit_model(model_lstm)
history_bilstm = fit_model(model_bilstm)

## Plotting and Predictions

In [None]:
# Plot train loss and validation loss
def plot_loss (history):
    plt.figure(figsize = (15, 9))
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.ylabel('Loss')
    plt.xlabel('epoch')
    plt.legend(['Train loss', 'Validation loss'], loc='upper right')

plot_loss (history_bilstm)
plot_loss (history_lstm)
plot_loss (history_gru)

def prediction(model):
    prediction = model.predict(X_test)
    return prediction

prediction_bilstm = prediction(model_bilstm)
prediction_lstm = prediction(model_lstm)
prediction_gru = prediction(model_gru)


# Plot true future vs prediction
def plot_future(prediction, y_test):
    plt.figure(figsize=(10, 6))
    range_future = len(prediction)
    plt.plot(np.arange(range_future), np.array(y_test), label='True Future')     
    plt.plot(np.arange(range_future),np.array(prediction),label='Prediction')
    plt.legend(loc='upper right')
    plt.xlabel('Time (day)')
    plt.ylabel('Turnover(Lacs)')
    #plt.savefig('E:/Forecast/4.jpg', format='jpg', dpi=1000)
    
plot_future(prediction_bilstm, y_test)
plot_future(prediction_lstm, y_test)
plot_future(prediction_gru, y_test)

def evaluate_prediction(predictions, actual, model_name):
    errors = predictions - actual
    mse = np.square(errors).mean()
    rmse = np.sqrt(mse)
    mae = np.abs(errors).mean()
    print(model_name + ':')
    print('Mean Absolute Error: {:.4f}'.format(mae))
    print('Root Mean Square Error: {:.4f}'.format(rmse))
    print('')
evaluate_prediction(prediction_bilstm, y_test, 'Bidirectional LSTM')
evaluate_prediction(prediction_lstm, y_test, 'LSTM')
evaluate_prediction(prediction_gru, y_test, 'GRU')

## Making Future Predictions on Unseen Data

In [None]:
# Importing Unseen Data
newinput = pd.read_csv('Test_Data.csv', parse_dates=['Date'], index_col = 'Date')
#Sorting to maintain the order of data
newinput.sort_index(inplace=True)
newinput.head()


In [None]:
# Plot histoy and future data
def plot_history_future(y_train, prediction, model_name):
    
    plt.figure(figsize=(10, 6))
    
    range_history = len(y_train)
    range_future = list(range(range_history, range_history + len(prediction)))

    plt.plot(np.arange(range_history), np.array(y_train), label='History')
    plt.plot(range_future, np.array(prediction),label='Prediction')

    plt.title('History and prediction for ' + model_name)
    plt.legend(loc='upper right')
    plt.xlabel('Time (day)')
    plt.ylabel('TurnOver Lacs')
    #plt.savefig('C:/Users/nious/Documents/Medium/LSTM&GRU/3.jpg', format='jpg', dpi=1000)

In [None]:
Time_steps = 10
def forecast(X_input, time_steps):
    # Scale the unseen input with the scaler fitted on the training data
    X = qt.fit_transform(X_input)
    X = pd.DataFrame(X,columns=newinput.columns)
    X = X.drop('Turnover_Lacs', axis = 1)
    Y = newinput.loc[:, ['Turnover_Lacs']]
    y_transform = qt.fit_transform(Y)
    X = X.to_numpy()
    Xs = []

    # Reshape unseen data to a 3D input
    def create_dataset(X,time_steps = 1):
        Xs = []
        for i in range(len(X) - time_steps):
            v = X[i:i + time_steps, :]
            Xs.append(v)
        return np.array(Xs)
    X_transformed = create_dataset(X, Time_steps)

    # Make prediction for unseen data using LSTM model 
    prediction = model_lstm.predict(X_transformed)
    return prediction

prediction = forecast(newinput, Time_steps)
plot_history_future(y_train, prediction,'LSTM')