## [1] Importing Libraries

In [4]:
# General libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

# For uploading data
# from google.colab import files

# Preprocessing
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures
from sklearn.model_selection import TimeSeriesSplit


# Model
from keras.layers import LSTM, Dense, Dropout
from keras.models import Sequential, load_model
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam
from keras.utils import plot_model

from sklearn.linear_model import LinearRegression

# Testing
from sklearn.metrics import mean_squared_error, r2_score

# Finding function to relate epochs & loss
# from scipy.optimize import curve_fit

# Allows for panning or zooming in plots
# %matplotlib inline

# Yahoo finance for data
import yfinance as yf


## [2] Loading & Visualizing Dataset

In [23]:
# Load data with yahoo finance library
aapl = yf.Ticker("AAPL")
tsla = yf.Ticker("TSLA")
amzn = yf.Ticker("AMZN")
msft = yf.Ticker("MSFT")
googl = yf.Ticker("GOOGL")
tickers = [aapl, tsla, amzn, msft, googl] # Randomly chosen by chatGPT

# Historical data usage (for reference):
hist = msft.history(period="max")
# print("type(hist):", type(hist))  # It's a dataframe baby
print(hist.head())

                               Open      High       Low     Close      Volume  \
Date                                                                            
1986-03-13 00:00:00-05:00  0.054693  0.062736  0.054693  0.060055  1031788800   
1986-03-14 00:00:00-05:00  0.060055  0.063272  0.060055  0.062199   308160000   
1986-03-17 00:00:00-05:00  0.062199  0.063808  0.062199  0.063272   133171200   
1986-03-18 00:00:00-05:00  0.063272  0.063808  0.061127  0.061663    67766400   
1986-03-19 00:00:00-05:00  0.061663  0.062199  0.060055  0.060591    47894400   

                           Dividends  Stock Splits  
Date                                                
1986-03-13 00:00:00-05:00        0.0           0.0  
1986-03-14 00:00:00-05:00        0.0           0.0  
1986-03-17 00:00:00-05:00        0.0           0.0  
1986-03-18 00:00:00-05:00        0.0           0.0  
1986-03-19 00:00:00-05:00        0.0           0.0  


## [4] Train test split + Model Construction and Training all in one

In [26]:
print(tickers)
for ticker in tickers: print(ticker)

[yfinance.Ticker object <AAPL>, yfinance.Ticker object <TSLA>, yfinance.Ticker object <AMZN>, yfinance.Ticker object <MSFT>, yfinance.Ticker object <GOOGL>]
yfinance.Ticker object <AAPL>
yfinance.Ticker object <TSLA>
yfinance.Ticker object <AMZN>
yfinance.Ticker object <MSFT>
yfinance.Ticker object <GOOGL>


In [24]:
# This stuff is global & consistent across all tickers
# Pre-processing
Scaler = MinMaxScaler()
# Splitting Data
num_splits = 7 # Modifiable
num_epochs=25
tss = TimeSeriesSplit(n_splits=num_splits)
# Model
model = Sequential()
model.add(LSTM(32, activation='relu', return_sequences=True))
model.add(Dropout(0.1))
model.add(Dense(1))
model.add(Dropout(0.1))
model.compile(loss='mean_squared_error', optimizer='adam')

# Loss Summary
lossDict = {} # Ticker: array of losses, will average out later
## This is the general structure I think:
for ticker in tickers:
    # Separate inputs & outputs
    hist = ticker.history(period="max")
    inputs = hist[['Open', 'High', 'Low', 'Volume']]
    outputs = hist['Close']

    # Preprocess w/MinMaxScaler to make operations less intensive
    df = Scaler.fit_transform(inputs)
    df = pd.DataFrame(columns=inputs.columns, data=df, index=hist.index)


    # Split data into training & testing

    X_train, X_test, y_train, y_test = None, None, None, None # Init now to avoid scope issues
    # i love github copilot sm
    for train_index, test_index in tss.split(df):
        # X_train = everything in df until train_index
        # X_test = everything in df after train_index
        X_train, X_test = df[:len(train_index)], df[len(train_index):]
        y_train, y_test = outputs[:len(train_index)], outputs[len(train_index):]

    # Reshape data for LSTM
    X_train = np.array(X_train).reshape(X_train.shape[0], 1, X_train.shape[1])
    X_test = np.array(X_test).reshape(X_test.shape[0], 1, X_test.shape[1])

    # Train
    lossDict[ticker] = []
    print("Training model for", ticker)
    history = model.fit(X_train, y_train, epochs=num_epochs, batch_size=10, verbose=1, shuffle=False)
    lossDict[ticker].append(history.history['loss'])


Training model for yfinance.Ticker object <AAPL>
Epoch 1/25
[1m963/963[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 463us/step - loss: 2.1893
Epoch 2/25
[1m963/963[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 401us/step - loss: 116.6340
Epoch 3/25
[1m963/963[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 404us/step - loss: 51.4044
Epoch 4/25
[1m963/963[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 403us/step - loss: 32.3913
Epoch 5/25
[1m963/963[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 432us/step - loss: 18.1979
Epoch 6/25
[1m963/963[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 403us/step - loss: 8.4047
Epoch 7/25
[1m963/963[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 402us/step - loss: 4.2050
Epoch 8/25
[1m963/963[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 402us/step - loss: 3.2470
Epoch 9/25
[1m963/963[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 402us/step - loss: 2.5273
Epoch 10/25
[1m963

In [29]:
# print(lossDict)

for key in lossDict.keys():
    lossDict[key] = np.mean(lossDict[key])
    print(key, ":", lossDict[key])

yfinance.Ticker object <AAPL> : 22.929141731262206
yfinance.Ticker object <TSLA> : 1306.60736328125
yfinance.Ticker object <AMZN> : 225.48595703125
yfinance.Ticker object <MSFT> : 123.69180084228516
yfinance.Ticker object <GOOGL> : 236.04610717773437
