## Forcasting Stocks using RNN (LSTM)

### To start with the implementation of a basic LSTM on time series forecasting, we import the necessary libraries and load the data set:

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import keras
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense, Activation, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle

In [None]:
# Loading data
data = pd.read_csv('INFY20002008.csv')
data.info()

### Next, we use subset only the Date and Average Price attributes out of which we are going to focus upon Average Price and Date is left just for labels.

In [None]:
# Selecting only Date and Average Price columns
data = data[['Date', 'Average Price']]

### Let us now proceed with scaling the values and splitting the data set in train and test portions. Remember don't shuffle the dataset while splitting. It should be split following a sequence.

In [None]:
# Scaling the values in the range of 0 to 1
scaler = MinMaxScaler(feature_range = (0, 1))
scaled_price = scaler.fit_transform(data.loc[:, 'Average Price'].values.reshape(-1, 1))

In [None]:
# Splitting dataset in the ratio of 75:25 for training and test
train_size = int(data.shape[0] * 0.75)
train, test = scaled_price[0:train_size, :], scaled_price[train_size:data.shape[0], :]
print("Number of entries (training set, test set): " + str((len(train), len(test))))

### Next, we need to construct a data set from the array of Average Price values along with defining a window size. 
#### Window is used to define how many values need to be taken while forecasting the new value. By default, in the function, we set the window size as 1, however, while constructing the data set, we set the window size to 3. You can change it and observe the corresponding effect on the forecasted value.

In [None]:
def create_dataset(scaled_price, window_size=1):
    data_X, data_Y = [], []
    for i in range(len(scaled_price) - window_size - 1):
        a = scaled_price[i:(i + window_size), 0]
        data_X.append(a)
        data_Y.append(scaled_price[i + window_size, 0])
    return(np.array(data_X), np.array(data_Y))

### Next, we call the function and reset the dataset to make it fit for Keras:

In [None]:
# Create test and training sets for one-step-ahead regression.
window_size = 3
train_X, train_Y = create_dataset(train, window_size)
test_X, test_Y = create_dataset(test, window_size)
print("Original training data shape:")
print(train_X.shape)

# Reshape the input data into appropriate form for Keras.
train_X = np.reshape(train_X, (train_X.shape[0], 1, train_X.shape[1]))
test_X = np.reshape(test_X, (test_X.shape[0], 1, test_X.shape[1]))
print("New training data shape:")
print(train_X.shape)

#### Now, we design our LSTM network with four blocks and just one layer using the MSE as loss function with three epochs.


The LSTM architecture here consists of:  

* One input layer.  
* One LSTM layer of 4 blocks.  
* One Dense layer to produce a single output.  
* MSE as loss function.  

In [None]:
# Designing the LSTM model
model = Sequential()
model.add(LSTM(4, input_shape = (1, window_size)))
model.add(Dense(1))

# Compiling the model
model.compile(loss = "mean_squared_error", optimizer = "adam")

# Training the model
model.fit(train_X, train_Y, epochs=3, batch_size=1)

### Forecasting and visualization
#### Let us now check the MSE in train and test data and perform the corresponding visualization:

In [None]:
def predict_and_score(model, X, Y):
    # Make predictions on the original scale of the data.
    pred = scaler.inverse_transform(model.predict(X))
    # Prepare Y data to also be on the original scale for interpretability.
    orig_data = scaler.inverse_transform([Y])
    # Calculate RMSE.
    score = np.sqrt(mean_squared_error(orig_data[0], pred[:, 0]))
    return(score, pred)

rmse_train, train_predict = predict_and_score(model, train_X, train_Y)
rmse_test, test_predict = predict_and_score(model, test_X, test_Y)

print("Training data score: %.2f RMSE" % rmse_train)
print("Test data score: %.2f RMSE" % rmse_test)

In [None]:
# Start with training predictions.
train_predict_plot = np.empty_like(scaled_price)
train_predict_plot[:, :] = np.nan
train_predict_plot[window_size:len(train_predict) + window_size, :] = train_predict

# Add test predictions.
test_predict_plot = np.empty_like(scaled_price)
test_predict_plot[:, :] = np.nan
test_predict_plot[len(train_predict) + (window_size * 2) + 1:len(scaled_price) - 1, :] = test_predict

# Create the plot.
plt.figure(figsize = (15, 5))
plt.plot(scaler.inverse_transform(scaled_price), label = "True value")
plt.plot(train_predict_plot, label = "Training set prediction")
plt.plot(test_predict_plot, label = "Test set prediction")
plt.xlabel("Days")
plt.ylabel("Average Price")
plt.title("Comparison true vs. predicted training / test")
plt.legend()
plt.show()