In [None]:
#Importing important libraries
import tensorflow as tf
import numpy as np
import pandas as pd
import keras
import matplotlib as plt
import math

from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt

from keras import layers
from keras import models

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM

In [None]:
#Importing the stock prices dataset
training_dataset = pd.read_csv('GOOG (10).csv')
training_dataset

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2018-08-09,1249.900024,1255.541992,1246.010010,1249.099976,1249.099976,848600
1,2018-08-10,1243.000000,1245.694946,1232.000000,1237.609985,1237.609985,1108700
2,2018-08-13,1236.979980,1249.272949,1233.640991,1235.010010,1235.010010,997300
3,2018-08-14,1235.189941,1245.869995,1225.109985,1242.099976,1242.099976,1348100
4,2018-08-15,1229.260010,1235.239990,1209.510010,1214.380005,1214.380005,1828800
...,...,...,...,...,...,...,...
498,2020-08-03,1486.640015,1490.469971,1465.640015,1474.449951,1474.449951,2330200
499,2020-08-04,1476.569946,1485.560059,1458.650024,1464.969971,1464.969971,1903500
500,2020-08-05,1469.300049,1482.410034,1463.459961,1473.609985,1473.609985,1979500
501,2020-08-06,1471.750000,1502.390015,1466.000000,1500.099976,1500.099976,1995400


In [None]:
#Checking for null values and returning respective totals
print (training_dataset.isnull().sum())
total_null = training_dataset.isnull().sum().sum()
print ("The total number of null values is: " + str(total_null))

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64
The total number of null values is: 0


In [None]:
#Drop every column except the open, high and low columns
training_dataset.drop(['Date', 'Close', 'Volume','Adj Close'], axis = 1, inplace = True)

In [None]:
#Visualize the stock price data
training_dataset.plot.line(subplots = True, title='Alphabet Stock Prices')
training_dataset.plot(title='Alphabet Stock Prices')

In [None]:
#Drop the high and low columns 
training_dataset.drop(['High', 'Low'], axis = 1, inplace = True)
#Drop the bottom 2 columns
training_dataset.drop(training_dataset.tail(2).index,inplace=True)
print(training_dataset)
print(training_dataset.shape)

In [None]:
#Convert the data into numpy array
train_data = training_dataset.to_numpy()
train_data

In [None]:
#From the dataset, separate into training and testing data with 80/20 percentile 
dataset_train = np.array(train_data[:int(train_data.shape[0]*0.8)])
dataset_test = np.array(train_data[int(train_data.shape[0]*0.8):])
#Find number of columns for each dataset
print(dataset_train.shape)
print(dataset_test.shape)

In [None]:
#Scale the data to range[0,1]
#MinMaxScalar subtracts the minimum value in each feature and divides by the range(difference between original min and max)
scalar = MinMaxScaler(feature_range = (0,1))
train_data = scalar.fit_transform(dataset_train)
train_data.shape

In [None]:
#Declare the x_train and y_train 
x_train = train_data[0:399]
y_train = train_data[1:400]

x_train = np.reshape(x_train, (399,1,1))
x_train.shape

In [None]:
#Instantiate the Sequential model class 
model = Sequential()
#Set the number of neurons/nodes with the units parameter
#Return_sequences=True reflects adding more layers
model.add(LSTM(units=96, return_sequences=True, input_shape=(None, 1)))
#Add dropout to reduce over-fitting
model.add(Dropout(0.2))
model.add(LSTM(units=96, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=96, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=96))
model.add(Dropout(0.2))
#Add dense layer to reduce spacial parameters of the vector
model.add(Dense(units=1))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, None, 96)          37632     
_________________________________________________________________
dropout (Dropout)            (None, None, 96)          0         
_________________________________________________________________
lstm_1 (LSTM)                (None, None, 96)          74112     
_________________________________________________________________
dropout_1 (Dropout)          (None, None, 96)          0         
_________________________________________________________________
lstm_2 (LSTM)                (None, None, 96)          74112     
_________________________________________________________________
dropout_2 (Dropout)          (None, None, 96)          0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 96)                7

In [None]:
#Compile the model
model.compile(optimizer = 'adam', loss = "mean_squared_error")

In [None]:
#Train the model for 100 epochs with 32 training examples used in each iteration
model.fit(x_train, y_train, epochs = 100, batch_size = 32)

In [None]:
#Display the testing dataset
print(dataset_test)
print(dataset_test.shape)

In [None]:
#Reshape the training data and scale
inputs = np.reshape(scalar.transform(dataset_test), (101,1,1))
#Reverse the scaled predictions to their original values  
stock_prediction = scalar.inverse_transform(model.predict(inputs))
stock_prediction

In [None]:
stock_prediction = np.squeeze(stock_prediction)
stock_prediction

In [None]:
stock_prediction = stock_prediction.reshape(-1,1)
stock_prediction

In [None]:
#Graph the real stock prices against the model's prediction
plt.plot(stock_prediction, label = 'Predicted Alphabet Stock Price', linewidth = 1.5)
plt.plot(dataset_test, label = 'Real Alphabet Stock Price', linewidth = 1.5)
plt.title("Alphabet Stock Price Prediction with LSTM's")
plt.xlabel('Time (measured in days)')
plt.ylabel('Alphabet Stock Price($)')
plt.legend()
plt.show()

In [None]:
#Calculate the standard deviation of the residuals
rmse_value = math.sqrt(mean_squared_error(dataset_test, stock_prediction))
rmse_value = round(rmse_value, 3)
print(rmse_value)

In [None]:
#Extrapolate the minimum and maximum real stock price values
maxValTest = round(float(max(dataset_test)), 2)
minValTest = round(float(min(dataset_test)), 2)
print ("The real maximum stock price is: " + str(maxValTest) +" dollars")
print ("The real minimum stock price is: " + str(minValTest) + " dollars")

In [None]:
#Extrapolate the minimum and maximum real stock price values
maxValPredict = round(float(max(stock_prediction)), 2)
minValPredict = round(float(min(stock_prediction)), 2)
print ("The predicted maximum stock price is: " + str(maxValPredict) +" dollars")
print ("The predicted minimum stock price is: " + str(minValPredict) +" dollars")

In [None]:
#Calculate error and model accuracy with MAPE
total = 0
for i, j in zip(dataset_test, stock_prediction):
    value = abs(i - j) / abs(i)
    total += value
error = float(total*100/(len(dataset_test))) #calculate mape
mape = round(error, 1) #round to 3 significant figures
accuracy = 100 - mape #Calculate accuracy
print ("The LSTM's accuracy in predicting the stock price is: " + str(accuracy) + "%")

The LSTM's accuracy in predicting the stock price is: 99.7%
