# PHASE 1 (Preprocessing)

## Importing the Libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the training set

In [2]:
dataset_train = pd.read_csv("Google_Stock_Price_Train.csv")
training_set = dataset_train.iloc[:, 1:2].values

In [3]:
training_set

array([[325.25],
       [331.27],
       [329.83],
       ...,
       [793.7 ],
       [783.33],
       [782.75]])

In [4]:
dataset_train

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1/3/2012,325.25,332.83,324.97,663.59,7380500
1,1/4/2012,331.27,333.87,329.08,666.45,5749400
2,1/5/2012,329.83,330.75,326.89,657.21,6590300
3,1/6/2012,328.34,328.77,323.68,648.24,5405900
4,1/9/2012,322.04,322.29,309.46,620.76,11688800
...,...,...,...,...,...,...
1253,12/23/2016,790.90,792.74,787.28,789.91,623400
1254,12/27/2016,790.68,797.86,787.66,791.55,789100
1255,12/28/2016,793.70,794.23,783.20,785.05,1153800
1256,12/29/2016,783.33,785.93,778.92,782.79,744300


## Feature Scaling

In [5]:
# recommended to use normalization and not standadisation
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler()
scaled = sc.fit_transform(training_set)

In [6]:
scaled

array([[0.08581368],
       [0.09701243],
       [0.09433366],
       ...,
       [0.95725128],
       [0.93796041],
       [0.93688146]])

## Creating a DataStructure with 120 timesteps, 1 output

In [7]:
X_train = []
y_train = []
for i in range(120, 1258):
    X_train.append(scaled[i-120: i, 0]) # memorize what happened in the prev 60 rows
    y_train.append(scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)

In [8]:
X_train

array([[0.08581368, 0.09701243, 0.09433366, ..., 0.0180445 , 0.0180817 ,
        0.00710618],
       [0.09701243, 0.09433366, 0.09156187, ..., 0.0180817 , 0.00710618,
        0.0064923 ],
       [0.09433366, 0.09156187, 0.07984225, ..., 0.00710618, 0.0064923 ,
        0.00225091],
       ...,
       [0.78201503, 0.79792023, 0.81851328, ..., 0.95475854, 0.95204256,
        0.95163331],
       [0.79792023, 0.81851328, 0.82688444, ..., 0.95204256, 0.95163331,
        0.95725128],
       [0.81851328, 0.82688444, 0.82308952, ..., 0.95163331, 0.95725128,
        0.93796041]])

In [9]:
y_train

array([0.0064923 , 0.00225091, 0.00682714, ..., 0.95725128, 0.93796041,
       0.93688146])

## Reshaping

In [10]:
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

In [11]:
X_train

array([[[0.08581368],
        [0.09701243],
        [0.09433366],
        ...,
        [0.0180445 ],
        [0.0180817 ],
        [0.00710618]],

       [[0.09701243],
        [0.09433366],
        [0.09156187],
        ...,
        [0.0180817 ],
        [0.00710618],
        [0.0064923 ]],

       [[0.09433366],
        [0.09156187],
        [0.07984225],
        ...,
        [0.00710618],
        [0.0064923 ],
        [0.00225091]],

       ...,

       [[0.78201503],
        [0.79792023],
        [0.81851328],
        ...,
        [0.95475854],
        [0.95204256],
        [0.95163331]],

       [[0.79792023],
        [0.81851328],
        [0.82688444],
        ...,
        [0.95204256],
        [0.95163331],
        [0.95725128]],

       [[0.81851328],
        [0.82688444],
        [0.82308952],
        ...,
        [0.95163331],
        [0.95725128],
        [0.93796041]]])

# PHASE 2 (Build the RNN)

## Importing the libraries

In [12]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

## Initailizing the RNN

In [13]:
regressor = Sequential()

## Adding the first LSTM layer

In [14]:
regressor.add(LSTM(units = 70, return_sequences = True, input_shape = (X_train.shape[1], 1)))
regressor.add(Dropout(rate = 0.2)) # the rate is the percentage of neurons that will be ignored

## Adding a second LSTM layer

In [15]:
regressor.add(LSTM(units = 70, return_sequences = True))
regressor.add(Dropout(rate = 0.2)) # the rate is the percentage of neurons that will be ignored

## Adding a third LSTM layer

In [16]:
regressor.add(LSTM(units = 70, return_sequences = True))
regressor.add(Dropout(rate = 0.2)) # the rate is the percentage of neurons that will be ignored

## Adding a fourth LSTM layer

In [17]:
regressor.add(LSTM(units = 70, return_sequences = False))
regressor.add(Dropout(rate = 0.2)) # the rate is the percentage of neurons that will be ignored

## Adding the output layer

In [18]:
regressor.add(Dense(units = 1))

## Compiling the RNN

In [19]:
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')

## Fitting the RNN to the training set

In [None]:
regressor.fit(X_train, y_train, epochs = 130, batch_size = 32)

Epoch 1/130
Epoch 2/130
Epoch 3/130
Epoch 4/130
Epoch 5/130
Epoch 6/130
Epoch 7/130
Epoch 8/130
Epoch 9/130
Epoch 10/130
Epoch 11/130
Epoch 12/130
Epoch 13/130
Epoch 14/130
Epoch 15/130
Epoch 16/130
Epoch 17/130
Epoch 18/130
Epoch 19/130
Epoch 20/130
Epoch 21/130
Epoch 22/130
Epoch 23/130
Epoch 24/130
Epoch 25/130
Epoch 26/130
Epoch 27/130
Epoch 28/130
Epoch 29/130
Epoch 30/130
Epoch 31/130
Epoch 32/130
Epoch 33/130
Epoch 34/130
Epoch 35/130
Epoch 36/130
Epoch 37/130
Epoch 38/130
Epoch 39/130
Epoch 40/130
Epoch 41/130
Epoch 42/130
Epoch 43/130
Epoch 44/130
Epoch 45/130
Epoch 46/130
Epoch 47/130
Epoch 48/130
Epoch 49/130
Epoch 50/130
Epoch 51/130
Epoch 52/130
Epoch 53/130
Epoch 54/130
Epoch 55/130
Epoch 56/130
Epoch 57/130
Epoch 58/130
Epoch 59/130
Epoch 60/130
Epoch 61/130
Epoch 62/130
Epoch 63/130
Epoch 64/130
Epoch 65/130
Epoch 66/130
Epoch 67/130
Epoch 68/130
Epoch 69/130
Epoch 70/130
Epoch 71/130
Epoch 72/130
Epoch 73/130
Epoch 74/130
Epoch 75/130
Epoch 76/130
Epoch 77/130
Epoch 78

# PHASE 3 (Making Predictions & Visualizing results)

## Get the real stock price of 2017

In [None]:
dataset_test = pd.read_csv("Google_Stock_Price_Test.csv")
real_stock_price = dataset_test.iloc[:, 1:2].values
real_stock_price

## Getting the predicted stock price of 2017

### Preprocessing the test set

In [None]:
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 120:].values
inputs = inputs.reshape(-1, 1)
inputs = sc.transform(inputs)
X_test = []
for i in range(120, 140):
    X_test.append(inputs[i-120: i, 0]) # memorize what happened in the prev 60 rows
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = regressor.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)

In [None]:
predicted_stock_price

### Visualizing the predicted values

In [None]:
plt.plot(real_stock_price, color='green', label = 'Real Price')
plt.plot(predicted_stock_price, color='red', label = 'Predicted Price')
plt.title("Stock Price Predictions")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.show()