# predicting stock price using Recurrent Neural Network (RNN) - LSTM

## Step 1 - Data Preprocessing

### Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

### Importing the training set

In [2]:
dataset_train = pd.read_csv('Google_Stock_Price_Train.csv')
# consider only one single stock column to train on
training_set = dataset_train.iloc[:, 1:2].values

### Feature Scaling

In [3]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(training_set)

### train with 60 timesteps to predict 1 output

In [4]:
X_train = []
y_train = []
# total entries = 1258
for i in range(60, 1258):
    X_train.append(training_set_scaled[i-60:i, 0])
    y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)

### Reshaping

In [5]:
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

## Step 2 - Building and Training the RNN

### Importing the Keras libraries and packages

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dropout

### Initialising the RNN

In [7]:
rnn = Sequential()

### Layer 1: first LSTM layer and Dropout regularisation (to avoid overfitting)

In [8]:
rnn.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
rnn.add(Dropout(0.2))

### Layer 2: second LSTM layer and Dropout regularisation

In [9]:
rnn.add(LSTM(units = 50, return_sequences = True))
rnn.add(Dropout(0.2))

### Layer 3: third LSTM layer and Dropout regularisation

In [10]:
rnn.add(LSTM(units = 50, return_sequences = True))
rnn.add(Dropout(0.2))

### Layer 4: fourth LSTM layer and Dropout regularisation

In [11]:
rnn.add(LSTM(units = 50)) # no return_sequences = True, because this is the last LSTM layer
rnn.add(Dropout(0.2))

### Layer 5: the output layer

In [12]:
rnn.add(Dense(units = 1)) # units = 1 for output layer as we are predicting a value (regression problem)

### Compiling the RNN

In [13]:
rnn.compile(optimizer = 'adam', loss = 'mean_squared_error') # MSE error because it's a regression problem

### training the RNN

In [14]:
rnn.fit(X_train, y_train, epochs = 250, batch_size = 32)

Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 34/250
Epoch 35/250
Epoch 36/250
Epoch 37/250
Epoch 38/250
Epoch 39/250
Epoch 40/250
Epoch 41/250
Epoch 42/250
 3/38 [=>............................] - ETA: 1s - loss: 0.0034

KeyboardInterrupt: 

## Step 3 - Evaluation and visualising the results

### Getting the real stock price of 2017

In [None]:
dataset_test = pd.read_csv('Google_Stock_Price_Test.csv')
real_stock_price = dataset_test.iloc[:, 1:2].values

### Getting the predicted stock price of 2017

In [None]:
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs)
X_test = []
for i in range(60, 80):
    X_test.append(inputs[i-60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = rnn.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)

### Visualising the results

In [None]:
plt.plot(real_stock_price, color = 'red', label = 'Real Google Stock Price')
plt.plot(predicted_stock_price, color = 'blue', label = 'Predicted Google Stock Price')
plt.title('Google Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Google Stock Price')
plt.legend()
plt.show()