In [56]:
# Plotting tools 
import matplotlib.pyplot as plt 
import matplotlib.pyplot as plt 

#Numerical Tools 
from pandas import * 
import pandas as pd
import numpy as np 
import seaborn as sns

#LSTM modules 
import tensorflow as tf
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense 
from tensorflow.keras.layers import LSTM 
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

import os
import glob 
import warnings
warnings.simplefilter('ignore', FutureWarning)
from datetime import datetime

In [57]:
%matplotlib inline

In [58]:
# Stock to be used 
# MSFT - Microsoft 
# AAPL - Apple
# AMZN - Amazon

# Get CSV file 
files = os.listdir('FinancialDatasets/stocks')
print("Files: ", files)

print("\n")
# Stock Data for Apple 
AAPL_File_Path = os.path.join('FinancialDatasets/stocks', 'AAPL.csv')
AAPLStock = pd.read_csv(AAPL_File_Path, header=0, index_col=0, parse_dates=True)

#Clean data and remove old data prior to 2022 
AAPLStock.index = pd.to_datetime(AAPLStock.index, utc=True)
AAPLStock = AAPLStock.drop(AAPLStock[AAPLStock.index < '2022'].index)

print("Apple Stock Data :\n", AAPLStock)
print("\n")

# Stock Data for Amazon 
AMZN_File_Path = os.path.join('FinancialDatasets/stocks', 'AMZN.csv')
AMZNStock = pd.read_csv(AMZN_File_Path, header=0, index_col=0, parse_dates=True)

AMZNStock.index = pd.to_datetime(AMZNStock.index, utc=True)
AMZNStock = AMZNStock.drop(AMZNStock[AMZNStock.index < '2022'].index)
print("Amazon Stock Data :\n", AMZNStock)


# Stock Data for Microsoft
MSFT_File_Path = os.path.join('FinancialDatasets/stocks', 'MSFT.csv')
MSFTStock = pd.read_csv(MSFT_File_Path, header = 0, index_col=0, parse_dates=True)
# Now we will convert the date column and drop all dates prior to 2022
MSFTStock.index = pd.to_datetime(MSFTStock.index, utc=True)
MSFTStock = MSFTStock.drop(MSFTStock[MSFTStock.index < '2022'].index)

print("Microsoft Data: ", MSFTStock)

# Using 3 csv files containing stock market data of Apple, Microsoft and Amazon
# Each dataset will be used to predict future prices using the AutoRegressive Integreated Moving Average Model.

Files:  ['AAPL.csv', 'AMZN.csv', 'archive', 'MSFT.csv']


Apple Stock Data :
                                  Open        High         Low       Close  \
Date                                                                        
2022-01-03 05:00:00+00:00  176.052761  181.052294  175.933965  180.190979   
2022-01-04 05:00:00+00:00  180.804778  181.111677  177.329848  177.904053   
2022-01-05 05:00:00+00:00  177.814944  178.369344  172.894614  173.171814   
2022-01-06 05:00:00+00:00  170.973992  173.548013  169.924588  170.280991   
2022-01-07 05:00:00+00:00  171.162116  172.399623  169.320704  170.449310   
...                               ...         ...         ...         ...   
2023-09-15 04:00:00+00:00  176.479996  176.500000  173.820007  175.009995   
2023-09-18 04:00:00+00:00  176.479996  179.380005  176.169998  177.970001   
2023-09-19 04:00:00+00:00  177.520004  179.630005  177.130005  179.070007   
2023-09-20 04:00:00+00:00  179.259995  179.699997  175.399994  175.490005  

In [59]:
# AMZNStock.info()
# print("\n")
AAPLStock.info()
print("\n")
MSFTStock.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 432 entries, 2022-01-03 05:00:00+00:00 to 2023-09-21 04:00:00+00:00
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Open          432 non-null    float64
 1   High          432 non-null    float64
 2   Low           432 non-null    float64
 3   Close         432 non-null    float64
 4   Volume        432 non-null    int64  
 5   Dividends     432 non-null    float64
 6   Stock Splits  432 non-null    float64
dtypes: float64(6), int64(1)
memory usage: 27.0 KB


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 432 entries, 2022-01-03 05:00:00+00:00 to 2023-09-21 04:00:00+00:00
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Open          432 non-null    float64
 1   High          432 non-null    float64
 2   Low           432 non-null    float64
 3   Close         432 non-null    floa

In [60]:
# When using an LSTM, we need to convert the array of values into 
# a dataset matrix 
def create_dataset(dataset, look_back=1): 
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1): 
        a = dataset[i:(i+look_back), 0] 
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)
# Fix random seed in order for others to reproduce the experiment 
tf.random.set_seed(7)

In [68]:
# Using Amazon stock as a test dummy 
# We will calculate the average price from the highest and lowest price per day 
high_price = MSFTStock.loc[:,'High'].to_numpy()
low_price = MSFTStock.loc[:,'Low'].to_numpy()
mid_price = (high_price + low_price) / 2.0

print('High Price Length: ', len(high_price))
print('Low price length: ', len(low_price))
print('Mid_price', len(mid_price))

High Price Length:  432
Low price length:  432
Mid_price 432


In [69]:
# Split into train and test sets 
train_data = mid_price[:216]
test_data = mid_price[216:]

In [71]:
scaler = MinMaxScaler()
train_data = train_data.reshape(-1, 1)
test_data = test_data.reshape(-1, 1)

In [72]:
#Create and fit the LSTM Network
model = Sequential()
model.add(LSTM(4, input_shape=(look_back, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2)

Epoch 1/100


  super().__init__(**kwargs)


287/287 - 1s - 5ms/step - loss: 0.1281
Epoch 2/100
287/287 - 0s - 1ms/step - loss: 0.0381
Epoch 3/100
287/287 - 0s - 1ms/step - loss: 0.0243
Epoch 4/100
287/287 - 0s - 1ms/step - loss: 0.0140
Epoch 5/100
287/287 - 0s - 1ms/step - loss: 0.0069
Epoch 6/100
287/287 - 0s - 1ms/step - loss: 0.0034
Epoch 7/100
287/287 - 0s - 1ms/step - loss: 0.0023
Epoch 8/100
287/287 - 0s - 1ms/step - loss: 0.0021
Epoch 9/100
287/287 - 0s - 1ms/step - loss: 0.0021
Epoch 10/100
287/287 - 0s - 991us/step - loss: 0.0021
Epoch 11/100
287/287 - 0s - 981us/step - loss: 0.0021
Epoch 12/100
287/287 - 0s - 1ms/step - loss: 0.0021
Epoch 13/100
287/287 - 0s - 1ms/step - loss: 0.0021
Epoch 14/100
287/287 - 0s - 1ms/step - loss: 0.0021
Epoch 15/100
287/287 - 0s - 1ms/step - loss: 0.0021
Epoch 16/100
287/287 - 0s - 1ms/step - loss: 0.0021
Epoch 17/100
287/287 - 0s - 1ms/step - loss: 0.0021
Epoch 18/100
287/287 - 0s - 1ms/step - loss: 0.0021
Epoch 19/100
287/287 - 0s - 995us/step - loss: 0.0021
Epoch 20/100
287/287 - 0s -

<keras.src.callbacks.history.History at 0x19fa47647d0>