<a href="https://colab.research.google.com/github/brody-looney/Stock-Prediction-LSTM/blob/main/10_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Author: Brody Looney
#File: 10-LSTM.ipynb
#Purpose: Stock Predictor LSTM for COS470 Project. This model trains on AAPL data and predicts the stock price of 10 tickers.

In [None]:
#upload .csv files here
#AAPL_train.csv for train file
#all test files need to be uploaded

from google.colab import files
uploaded = files.upload()

In [5]:
#Importing the necessary attributes
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from keras import Sequential
from keras.layers import Dense, LSTM, Dropout

#Importing the AAPL_train set and then all of the test sets
train = pd.read_csv('AAPL_train.csv')
test_AAPL = pd.read_csv('AAPL_test.csv')
test_AMZN = pd.read_csv('AMZN_test.csv')
test_FB = pd.read_csv('FB_test.csv')
test_GE = pd.read_csv('GE_test.csv')
test_GOOGL = pd.read_csv('GOOGL_test.csv')
test_GS = pd.read_csv('GS_test.csv')
test_IBM = pd.read_csv('IBM_test.csv')
test_JPM = pd.read_csv('JPM_test.csv')
test_MSFT = pd.read_csv('MSFT_test.csv')
test_TSLA = pd.read_csv('TSLA_test.csv')


#Dates to be used in the graphical visualization

#AAPL
dates_AAPL = list(test_AAPL['Date'])
dates_AAPL = [dt.datetime.strptime(date, '%m/%d/%Y').date() for date in dates_AAPL]

#AMZN
dates_AMZN = list(test_AMZN['Date'])
dates_AMZN = [dt.datetime.strptime(date, '%m/%d/%Y').date() for date in dates_AMZN]

#FB
dates_FB = list(test_FB['Date'])
dates_FB = [dt.datetime.strptime(date, '%m/%d/%Y').date() for date in dates_FB]

#GE
dates_GE = list(test_GE['Date'])
dates_GE = [dt.datetime.strptime(date, '%m/%d/%Y').date() for date in dates_GE]

#GOOGL
dates_GOOGL = list(test_GOOGL['Date'])
dates_GOOGL = [dt.datetime.strptime(date, '%m/%d/%Y').date() for date in dates_GOOGL]

#GS
dates_GS = list(test_GS['Date'])
dates_GS = [dt.datetime.strptime(date, '%m/%d/%Y').date() for date in dates_GS]

#IBM
dates_IBM = list(test_IBM['Date'])
dates_IBM = [dt.datetime.strptime(date, '%m/%d/%Y').date() for date in dates_IBM]

#JPM
dates_JPM = list(test_JPM['Date'])
dates_JPM = [dt.datetime.strptime(date, '%m/%d/%Y').date() for date in dates_JPM]

#MSFT
dates_MSFT = list(test_MSFT['Date'])
dates_MSFT = [dt.datetime.strptime(date, '%m/%d/%Y').date() for date in dates_MSFT]

#TSLA
dates_TSLA = list(test_TSLA['Date'])
dates_TSLA = [dt.datetime.strptime(date, '%m/%d/%Y').date() for date in dates_TSLA]

#Dropping the columns that won't be used in this type of predictor. This model only uses
#EMAs and the close price
train = train.drop(['Date', 'Volume', 'Open', 'High', 'Low'], axis = 1)

#Creating a scaler which will be used to scale down the train data
train_scaler = MinMaxScaler()

scaled_train = train_scaler.fit_transform(train)

#Splitting the train data up into X and y sets that will be in the necessary shape for the LSTM
X_train = []
y_train = []

for i in range(1, train.shape[0]):
    X_train.append(scaled_train[i-1:i])
    y_train.append(scaled_train[i, 0])
    
X_train, y_train = np.array(X_train), np.array(y_train)

In [None]:
#Creating the LSTM model with multiple layers
model = Sequential()

model.add(LSTM(units = 100, return_sequences = True, input_shape = (X_train.shape[1], 4)))
model.add(Dropout(0.25))

model.add(LSTM(units = 100, return_sequences = True))
model.add(Dropout(0.25))

model.add(LSTM(units = 150))
model.add(Dropout(0.25))

model.add(Dense(units = 1))

#LSTM Compilation
model.compile(optimizer='adam', loss = 'mean_squared_error')
model.fit(X_train, y_train, epochs=10, batch_size=10)

In [8]:
#Splitting up the test sets for all 10 tickers
#fdate is used to represent final date

#AAPL
AAPL_scaler = MinMaxScaler()
scaled_AAPL = test_AAPL.drop(['Date', 'Volume', 'Open', 'High', 'Low'], axis = 1)
scaled_AAPL = AAPL_scaler.fit_transform(scaled_AAPL)

X_AAPL = []
y_AAPL = []
fdate_AAPL = []

for i in range(1, scaled_AAPL.shape[0]):
    X_AAPL.append(scaled_AAPL[i-1:i])
    y_AAPL.append(scaled_AAPL[i, 0])
    fdate_AAPL.append(dates_AAPL[i])

X_AAPL, y_AAPL = np.array(X_AAPL), np.array(y_AAPL)

#AMZN
AMZN_scaler = MinMaxScaler()
scaled_AMZN = test_AMZN.drop(['Date', 'Volume', 'Open', 'High', 'Low'], axis = 1)
scaled_AMZN = AMZN_scaler.fit_transform(scaled_AMZN)

X_AMZN = []
y_AMZN = []
fdate_AMZN = []

for i in range(1, scaled_AMZN.shape[0]):
    X_AMZN.append(scaled_AMZN[i-1:i])
    y_AMZN.append(scaled_AMZN[i, 0])
    fdate_AMZN.append(dates_AMZN[i])

X_AMZN, y_AMZN = np.array(X_AMZN), np.array(y_AMZN)

#FB
FB_scaler = MinMaxScaler()
scaled_FB = test_FB.drop(['Date', 'Volume', 'Open', 'High', 'Low'], axis = 1)
scaled_FB = FB_scaler.fit_transform(scaled_FB)

X_FB = []
y_FB = []
fdate_FB = []

for i in range(1, scaled_FB.shape[0]):
    X_FB.append(scaled_FB[i-1:i])
    y_FB.append(scaled_FB[i, 0])
    fdate_FB.append(dates_FB[i])

X_FB, y_FB = np.array(X_FB), np.array(y_FB)

#GE
GE_scaler = MinMaxScaler()
scaled_GE = test_GE.drop(['Date', 'Volume', 'Open', 'High', 'Low'], axis = 1)
scaled_GE = GE_scaler.fit_transform(scaled_GE)

X_GE = []
y_GE = []
fdate_GE = []

for i in range(1, scaled_GE.shape[0]):
    X_GE.append(scaled_GE[i-1:i])
    y_GE.append(scaled_GE[i, 0])
    fdate_GE.append(dates_GE[i])

X_GE, y_GE = np.array(X_GE), np.array(y_GE)

#GOOGL
GOOGL_scaler = MinMaxScaler()
scaled_GOOGL = test_GOOGL.drop(['Date', 'Volume', 'Open', 'High', 'Low'], axis = 1)
scaled_GOOGL = GOOGL_scaler.fit_transform(scaled_GOOGL)

X_GOOGL = []
y_GOOGL = []
fdate_GOOGL = []

for i in range(1, scaled_GOOGL.shape[0]):
    X_GOOGL.append(scaled_GOOGL[i-1:i])
    y_GOOGL.append(scaled_GOOGL[i, 0])
    fdate_GOOGL.append(dates_GOOGL[i])

X_GOOGL, y_GOOGL = np.array(X_GOOGL), np.array(y_GOOGL)

#GS
GS_scaler = MinMaxScaler()
scaled_GS = test_GS.drop(['Date', 'Volume', 'Open', 'High', 'Low'], axis = 1)
scaled_GS = GS_scaler.fit_transform(scaled_GS)

X_GS = []
y_GS = []
fdate_GS = []

for i in range(1, scaled_GS.shape[0]):
    X_GS.append(scaled_GS[i-1:i])
    y_GS.append(scaled_GS[i, 0])
    fdate_GS.append(dates_GS[i])

X_GS, y_GS = np.array(X_GS), np.array(y_GS)

#IBM
IBM_scaler = MinMaxScaler()
scaled_IBM = test_IBM.drop(['Date', 'Volume', 'Open', 'High', 'Low'], axis = 1)
scaled_IBM = IBM_scaler.fit_transform(scaled_IBM)

X_IBM = []
y_IBM = []
fdate_IBM = []

for i in range(1, scaled_IBM.shape[0]):
    X_IBM.append(scaled_IBM[i-1:i])
    y_IBM.append(scaled_IBM[i, 0])
    fdate_IBM.append(dates_IBM[i])

X_IBM, y_IBM = np.array(X_IBM), np.array(y_IBM)

#JPM
JPM_scaler = MinMaxScaler()
scaled_JPM = test_JPM.drop(['Date', 'Volume', 'Open', 'High', 'Low'], axis = 1)
scaled_JPM = JPM_scaler.fit_transform(scaled_JPM)

X_JPM = []
y_JPM = []
fdate_JPM = []

for i in range(1, scaled_JPM.shape[0]):
    X_JPM.append(scaled_JPM[i-1:i])
    y_JPM.append(scaled_JPM[i, 0])
    fdate_JPM.append(dates_JPM[i])

X_JPM, y_JPM = np.array(X_JPM), np.array(y_JPM)

#MSFT
MSFT_scaler = MinMaxScaler()
scaled_MSFT = test_MSFT.drop(['Date', 'Volume', 'Open', 'High', 'Low'], axis = 1)
scaled_MSFT = MSFT_scaler.fit_transform(scaled_MSFT)

X_MSFT = []
y_MSFT = []
fdate_MSFT = []

for i in range(1, scaled_MSFT.shape[0]):
    X_MSFT.append(scaled_MSFT[i-1:i])
    y_MSFT.append(scaled_MSFT[i, 0])
    fdate_MSFT.append(dates_MSFT[i])

X_MSFT, y_MSFT = np.array(X_MSFT), np.array(y_MSFT)

#TSLA
TSLA_scaler = MinMaxScaler()
scaled_TSLA = test_TSLA.drop(['Date', 'Volume', 'Open', 'High', 'Low'], axis = 1)
scaled_TSLA = TSLA_scaler.fit_transform(scaled_TSLA)

X_TSLA= []
y_TSLA = []
fdate_TSLA = []

for i in range(1, scaled_TSLA.shape[0]):
    X_TSLA.append(scaled_TSLA[i-1:i])
    y_TSLA.append(scaled_TSLA[i, 0])
    fdate_TSLA.append(dates_TSLA[i])

X_TSLA, y_TSLA = np.array(X_TSLA), np.array(y_TSLA)

In [28]:
#Creating a value of the inverse of the scale so that it can be used to readjust the real price and prediction.
scale_AAPL = 1/0.00311614
scale_AMZN = 1/0.00053162
scale_FB = 1/0.00816793
scale_GE = 1/0.04393673
scale_GOOGL = 1/0.0017236
scale_GS = 1/0.00783822
scale_IBM = 1/0.01586043
scale_JPM = 1/0.01267267
scale_MSFT = 1/0.00680921
scale_TSLA = 1/0.00069983

#Creating a prediction on the test data and scaling it 

#AAPL
prediction_AAPL = model.predict(X_AAPL)
prediction_AAPL = prediction_AAPL * scale_AAPL + 150
real_AAPL = test_AAPL.drop(['Date', 'Volume', 'EMA10', 'EMA50', 'EMA200', 'Open', 'High', 'Low'], axis = 1)

#AMZN
prediction_AMZN = model.predict(X_AMZN)
prediction_AMZN = prediction_AMZN * scale_AMZN + 1400
real_AMZN = test_AMZN.drop(['Date', 'Volume', 'EMA10', 'EMA50', 'EMA200', 'Open', 'High', 'Low'], axis = 1)

#FB
prediction_FB = model.predict(X_FB)
prediction_FB = prediction_FB * scale_FB + 148
real_FB = test_FB.drop(['Date', 'Volume', 'EMA10', 'EMA50', 'EMA200', 'Open', 'High', 'Low'], axis = 1)

#GE
prediction_GE = model.predict(X_GE)
prediction_GE = prediction_GE * scale_GE + 6
real_GE = test_GE.drop(['Date', 'Volume', 'EMA10', 'EMA50', 'EMA200', 'Open', 'High', 'Low'], axis = 1)

#GOOGL
prediction_GOOGL = model.predict(X_GOOGL)
prediction_GOOGL = prediction_GOOGL * scale_GOOGL + 1000
real_GOOGL = test_GOOGL.drop(['Date', 'Volume', 'EMA10', 'EMA50', 'EMA200', 'Open', 'High', 'Low'], axis = 1)

#GS
prediction_GS = model.predict(X_GS)
prediction_GS = prediction_GS * scale_GS + 135
real_GS = test_GS.drop(['Date', 'Volume', 'EMA10', 'EMA50', 'EMA200', 'Open', 'High', 'Low'], axis = 1)

#IBM
prediction_IBM = model.predict(X_IBM)
prediction_IBM = prediction_IBM * scale_IBM + 92
real_IBM = test_IBM.drop(['Date', 'Volume', 'EMA10', 'EMA50', 'EMA200', 'Open', 'High', 'Low'], axis = 1)

#JPM
prediction_JPM = model.predict(X_JPM)
prediction_JPM = prediction_JPM * scale_JPM + 60
real_JPM = test_JPM.drop(['Date', 'Volume', 'EMA10', 'EMA50', 'EMA200', 'Open', 'High', 'Low'], axis = 1)

#MSFT
prediction_MSFT = model.predict(X_MSFT)
prediction_MSFT = prediction_MSFT * scale_MSFT + 70
real_MSFT = test_MSFT.drop(['Date', 'Volume', 'EMA10', 'EMA50', 'EMA200', 'Open', 'High', 'Low'], axis = 1)

#TSLA
prediction_TSLA = model.predict(X_TSLA)
prediction_TSLA = prediction_TSLA * scale_TSLA + 230
real_TSLA = test_TSLA.drop(['Date', 'Volume', 'EMA10', 'EMA50', 'EMA200', 'Open', 'High', 'Low'], axis = 1)

In [None]:
#AAPL
#Printing the Real Price of AAPL using real_AAPL
plt.figure(figsize=(14,5))
plt.plot(dates_AAPL, real_AAPL, color = 'red', label = 'Real AAPL Stock Price')
plt.gcf().autofmt_xdate()
plt.title('AAPL Stock Price')
plt.xlabel('Date')
plt.ylabel('AAPL Stock Price')

#Printing the prediction of the price of AAPL by using values of prediction_AAPL
plt.plot(fdate_AAPL, prediction_AAPL, color = 'blue', label = 'Predicted AAPL Stock Price')
plt.gcf().autofmt_xdate()
plt.title('AAPL Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('AAPL Stock Price')
plt.legend()
plt.show()

#AMZN
#Printing the Real Price of AMZN using real_AMZN
plt.figure(figsize=(14,5))
plt.plot(dates_AMZN, real_AMZN, color = 'red', label = 'Real AMZN Stock Price')
plt.gcf().autofmt_xdate()
plt.title('AMZN Stock Price')
plt.xlabel('Date')
plt.ylabel('AMZN Stock Price')

#Printing the prediction of the price of AMZN by using values of prediction_AMZN
plt.plot(fdate_AMZN, prediction_AMZN, color = 'blue', label = 'Predicted AMZN Stock Price')
plt.gcf().autofmt_xdate()
plt.title('AMZN Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('AMZN Stock Price')
plt.legend()
plt.show()

#FB
#Printing the Real Price of FB using real_FB
plt.figure(figsize=(14,5))
plt.plot(dates_FB, real_FB, color = 'red', label = 'Real FB Stock Price')
plt.gcf().autofmt_xdate()
plt.title('FB Stock Price')
plt.xlabel('Date')
plt.ylabel('FB Stock Price')

#Printing the prediction of the price of FB by using values of prediction_FB
plt.plot(fdate_FB, prediction_FB, color = 'blue', label = 'Predicted FB Stock Price')
plt.gcf().autofmt_xdate()
plt.title('FB Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('FB Stock Price')
plt.legend()
plt.show()

#GE
#Printing the Real Price of GE using real_GE
plt.figure(figsize=(14,5))
plt.plot(dates_GE, real_GE, color = 'red', label = 'Real GE Stock Price')
plt.gcf().autofmt_xdate()
plt.title('GE Stock Price')
plt.xlabel('Date')
plt.ylabel('GE Stock Price')

#Printing the prediction of the price of GE by using values of prediction_GE
plt.plot(fdate_GE, prediction_GE, color = 'blue', label = 'Predicted GE Stock Price')
plt.gcf().autofmt_xdate()
plt.title('GE Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('GE Stock Price')
plt.legend()
plt.show()

#GOOGL
#Printing the Real Price of GOOGL using real_GOOGL
plt.figure(figsize=(14,5))
plt.plot(dates_GOOGL, real_GOOGL, color = 'red', label = 'Real GOOGL Stock Price')
plt.gcf().autofmt_xdate()
plt.title('GOOGL Stock Price')
plt.xlabel('Date')
plt.ylabel('GOOGL Stock Price')

#Printing the prediction of the price of GOOGL by using values of prediction_GE
plt.plot(fdate_GOOGL, prediction_GOOGL, color = 'blue', label = 'Predicted GOOGL Stock Price')
plt.gcf().autofmt_xdate()
plt.title('GOOGL Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('GOOGL Stock Price')
plt.legend()
plt.show()

#GS
#Printing the Real Price of GS using real_GS
plt.figure(figsize=(14,5))
plt.plot(dates_GS, real_GS, color = 'red', label = 'Real GS Stock Price')
plt.gcf().autofmt_xdate()
plt.title('GS Stock Price')
plt.xlabel('Date')
plt.ylabel('GS Stock Price')

#Printing the prediction of the price of GS by using values of prediction_GS
plt.plot(fdate_GS, prediction_GS, color = 'blue', label = 'Predicted GS Stock Price')
plt.gcf().autofmt_xdate()
plt.title('GS Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('GS Stock Price')
plt.legend()
plt.show()

#IBM
#Printing the Real Price of IBM using real_IBM
plt.figure(figsize=(14,5))
plt.plot(dates_IBM, real_IBM, color = 'red', label = 'Real IBM Stock Price')
plt.gcf().autofmt_xdate()
plt.title('IBM Stock Price')
plt.xlabel('Date')
plt.ylabel('IBM Stock Price')

#Printing the prediction of the price of IBM by using values of prediction_IBM
plt.plot(fdate_IBM, prediction_IBM, color = 'blue', label = 'Predicted IBM Stock Price')
plt.gcf().autofmt_xdate()
plt.title('IBM Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('IBM Stock Price')
plt.legend()
plt.show()

#JPM
#Printing the Real Price of JPM using real_JPM
plt.figure(figsize=(14,5))
plt.plot(dates_JPM, real_JPM, color = 'red', label = 'Real JPM Stock Price')
plt.gcf().autofmt_xdate()
plt.title('JPM Stock Price')
plt.xlabel('Date')
plt.ylabel('JPM Stock Price')

#Printing the prediction of the price of JPM by using values of prediction_JPM
plt.plot(fdate_JPM, prediction_JPM, color = 'blue', label = 'Predicted JPM Stock Price')
plt.gcf().autofmt_xdate()
plt.title('JPM Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('JPM Stock Price')
plt.legend()
plt.show()

#MSFT
#Printing the Real Price of MSFT using real_MSFT
plt.figure(figsize=(14,5))
plt.plot(dates_MSFT, real_MSFT, color = 'red', label = 'Real MSFT Stock Price')
plt.gcf().autofmt_xdate()
plt.title('MSFT Stock Price')
plt.xlabel('Date')
plt.ylabel('MSFT Stock Price')

#Printing the prediction of the price of MSFT by using values of prediction_MSFT
plt.plot(fdate_MSFT, prediction_MSFT, color = 'blue', label = 'Predicted MSFT Stock Price')
plt.gcf().autofmt_xdate()
plt.title('MSFT Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('MSFT Stock Price')
plt.legend()
plt.show()

#TSLA
#Printing the Real Price of TSLA using real_TSLA
plt.figure(figsize=(14,5))
plt.plot(dates_TSLA, real_TSLA, color = 'red', label = 'Real TSLA Stock Price')
plt.gcf().autofmt_xdate()
plt.title('TSLA Stock Price')
plt.xlabel('Date')
plt.ylabel('TSLA Stock Price')

#Printing the prediction of the price of TSLA by using values of prediction_TSLA
plt.plot(fdate_TSLA, prediction_TSLA, color = 'blue', label = 'Predicted TSLA Stock Price')
plt.gcf().autofmt_xdate()
plt.title('TSLA Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('TSLA Stock Price')
plt.legend()
plt.show()

In [11]:
AAPL_scaler.scale_

array([0.00329924, 0.00329924, 0.00329924, 0.00311614])

In [12]:
AMZN_scaler.scale_

array([0.00057555, 0.00057555, 0.00057555, 0.00053162])

In [14]:
FB_scaler.scale_

array([0.00918695, 0.00918695, 0.00918695, 0.00816793])

In [15]:
GE_scaler.scale_

array([0.04480287, 0.04480287, 0.04480287, 0.04393673])

In [16]:
GOOGL_scaler.scale_

array([0.00185673, 0.00185673, 0.00185673, 0.0017236 ])

In [17]:
GS_scaler.scale_

array([0.0087581 , 0.0087581 , 0.0087581 , 0.00783822])

In [18]:
IBM_scaler.scale_

array([0.0172503 , 0.0172503 , 0.0172503 , 0.01586043])

In [19]:
JPM_scaler.scale_

array([0.01296008, 0.01296008, 0.01296008, 0.01267267])

In [20]:
MSFT_scaler.scale_

array([0.00698959, 0.00698959, 0.00698959, 0.00680921])

In [21]:
TSLA_scaler.scale_

array([0.00074095, 0.00074095, 0.00074095, 0.00069983])