In [None]:
# Description: This program uses artificial recurrent neural network called Long Short Term Memory (LSTM)
#              to predict the closing stock price of a corporation using the past 60 day stock price.

In [None]:
# Import the libraries
import math
import pandas_datareader as web
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

In [None]:
#Get the stock quote 
df = web.DataReader('AAPL', data_source='yahoo', start='2012-01-01', end='2020-03-24')
#Show the Data
df

In [None]:
#Get the number of rows and columns in the data set
df.shape

In [None]:
#Visualize the closing price history
plt.figure(figsize=(16,8))
plt.title('Close Price History')
plt.plot(df['Close'])
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Data USE($)', fontsize=18)
plt.show()

In [None]:
#Create a new dataframe with only the 'Close' column
data=df.filter(['Close'])
#Convert the dataframe to a numpy array
dataset = data.values
#Get the number of rows to train the model on
training_data_len = math.ceil( len(dataset) * .8 )

training_data_len

In [None]:
#Scale the data
#Good practice to do? Why? Because its always advantageous to normalize the import data before presenting it to the network
# and is very beneficial for the model
scaler = MinMaxScaler(feature_range=(0,1))
#Computes the MIN and Max values to be used for scaling then transforms the data based on these values
scaled_data = scaler.fit_transform(dataset)

scaled_data

In [None]:
#As you can see these values are between 0 and 1

In [None]:
#Create the training data set
#Create the scaled training data set
train_data = scaled_data[0:training_data_len , :]
#Split the data into x_train & y_train data sets
x_train = []
y_train = []

for i in range(60, len(train_data)):
    x_train.append(train_data[i-60:i, 0]) #will contain 60 values indexed from 0 to 59
    y_train.append(train_data[i, 0]) #will contain the 61st value, which will be postioned at 60
    if i <= 61:
        print(x_train)
        print(y_train)
        print()

In [None]:
# x_train data is all the values that are together
# y_train is the single value 0.115444

# We are training the data on the y_train value

In [None]:
#Convert the x_train and y_train to numpy arrays
x_train, y_train = np.array(x_train), np.array(y_train)

In [None]:
#Reshape the data
#Why? We need it to be 3D because thats what the model expects and ours is currently 2D
x_train.shape

In [None]:
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_train.shape

In [None]:
#Now we have made it into 3D

In [None]:
#Now we finally can build the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape = (x_train.shape[1], 1)))
model.add(LSTM(50, return_sequences = False))
model.add(Dense(25))
model.add(Dense(1))

In [None]:
#Compile the model
model.compile(optimizer='adam', loss='mean_squared_error') 

#Optimizer is used to improve upon the loss function
# The loss function is used to see how well the model did on training

In [None]:
#Train the model
model.fit(x_train, y_train, batch_size=1, epochs=1) #epoch is the number of iterations when a dataset is passed through

In [None]:
#Create the testing data set
#Create a new array containing scaled values from index 1543 to 2003

test_data = scaled_data[training_data_len - 60: , :]
#Create the data sets x_test and y_test
x_test = []
y_test = dataset [training_data_len:, :] #contains the 61st values non scaled

for i in range(60, len(test_data)):
    x_test.append(test_data[i-60:i, 0]) #contains the past 60 values
    

In [None]:
#Convert the data to a numpy array
x_test = np.array(x_test) #converting it to a numpy array so that we can use it in the LSTM model

In [None]:
#Reshape the data because it is 2D and we need 3D once again
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))

In [None]:
#Get the models predicted price values
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions) #This is unscaling the values . 
#We want the same values as the y_test data set

In [None]:
# Get the root mean sqaured error (RMSE)

#RMSE is a good measure of how accurate the model predicts the response. Is the STD of the residuals

# The lower the value of the RMSE shows a better fit.

In [None]:
rmse = np.sqrt(np.mean(predictions - y_test)**2)
rmse

In [None]:
#A reading of 0 is a perfect match. We didnt get a value of 0, but ours is still decent

In [None]:
#Plot the data
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions
#Visualize the data
plt.figure(figsize=(16,8))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.plot(train['Close'])
plt.plot(valid[['Close','Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')

In [None]:
#As you can see they are really close, so our model is good!

In [None]:
valid

In [None]:
#Get the quote
apple_quote = web.DataReader('AAPL', data_source='yahoo', start='2012-01-01', end='2020-03-24')

#Create a new dataframe
new_df = apple_quote.filter(['Close'])

#Get the last 60 day closing price values and convert the DF to an array
last_60_days = new_df[-60:].values

In [None]:
#Scale the data to be values between 0 & 1
last_60_days_scaled = scaler.transform(last_60_days)

#Create an empty list
X_test = []

#Append the last 60 days to the x_list
X_test.append(last_60_days_scaled)

#Convert the X_test data set to a numpy array
X_test = np.array(X_test)

#Reshape the data
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

#Get the predicted scaled price
pred_price = model.predict(X_test)

#undo the scaling
pred_price = scaler.inverse_transform(pred_price)
print(pred_price)

In [None]:
#Get the quote
apple_quote2 = web.DataReader('AAPL', data_source='yahoo', start='2020-03-25', end='2020-03-25')
print(apple_quote2['Close'])