In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import tensorflow as tf
import yfinance as yf
import matplotlib.pyplot as plt

In [None]:
# To clear GPU memory
tf.keras.backend.clear_session()

In [None]:
from pandas_datareader import DataReader
from pandas_datareader import data as pdr

In [None]:
from datetime import datetime

## Case 1: Large Dataset

### Importing data from yahoo finance

In [None]:
start = "2007-01-01"
end = "2023-07-31"

# Convert start and end dates to datetime objects
start_date = datetime.strptime(start, "%Y-%m-%d")
end_date = datetime.strptime(end, "%Y-%m-%d")

# Set up the data reader with Yahoo Finance
yf.pdr_override()

In [None]:
# Making an array for different stock prices from tech industry
# AAPL - Apple
# MSFT - Microsoft
# AMZN - Amazon
# NFLX - Netflix
# INFY - Infosys
# ADBE - Adobe
# GOOGL - Google (Class A - gives voting rights)
# NVDA - NVIDIA Corporation

stock_types = ["AAPL", "MSFT", "AMZN", "NFLX", "INFY", "ADBE", "GOOGL", "NVDA"]

In [None]:
# Making dictionary for every stock
stock_dict = {}
for stock in stock_types:
  stock_dict[stock] = None

In [None]:
for stock in stock_dict:
  print(stock)

AAPL
MSFT
AMZN
NFLX
INFY
ADBE
GOOGL
NVDA


In [None]:
for stock in stock_dict:
  # Fetch the data using DataReader
  df = pdr.get_data_yahoo(stock, start=start_date, end=end_date)
  df = df.reset_index()

  if df is not None:
    stock_dict[stock] = df
    print(stock + "-" )
    print(df.head())  # Display the first few rows of the loaded data

[*********************100%***********************]  1 of 1 completed
AAPL-
        Date      Open      High       Low     Close  Adj Close      Volume
0 2007-01-03  3.081786  3.092143  2.925000  2.992857   2.540327  1238319600
1 2007-01-04  3.001786  3.069643  2.993571  3.059286   2.596711   847260400
2 2007-01-05  3.063214  3.078571  3.014286  3.037500   2.578220   834741600
3 2007-01-08  3.070000  3.090357  3.045714  3.052500   2.590951   797106800
4 2007-01-09  3.087500  3.320714  3.041071  3.306071   2.806182  3349298400
[*********************100%***********************]  1 of 1 completed
MSFT-
        Date       Open       High        Low      Close  Adj Close    Volume
0 2007-01-03  29.910000  30.250000  29.400000  29.860001  21.525984  76935100
1 2007-01-04  29.700001  29.969999  29.440001  29.809999  21.489935  45774500
2 2007-01-05  29.629999  29.750000  29.450001  29.639999  21.367382  44607200
3 2007-01-08  29.650000  30.100000  29.530001  29.930000  21.576443  50220200
4 2

In [None]:
stock_dict["AAPL"].shape[0]

4171

In [None]:
for stock in stock_dict:
  print(stock + "-")
  print(stock_dict[stock].info())
  print(stock_dict[stock].describe())

AAPL-
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4171 entries, 0 to 4170
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       4171 non-null   datetime64[ns]
 1   Open       4171 non-null   float64       
 2   High       4171 non-null   float64       
 3   Low        4171 non-null   float64       
 4   Close      4171 non-null   float64       
 5   Adj Close  4171 non-null   float64       
 6   Volume     4171 non-null   int64         
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 228.2 KB
None
              Open         High          Low        Close    Adj Close  \
count  4171.000000  4171.000000  4171.000000  4171.000000  4171.000000   
mean     46.785610    47.307183    46.286132    46.818624    45.051682   
std      50.777082    51.386761    50.223518    50.833700    50.967575   
min       2.835357     2.928571     2.792857     2.792857     2.370567   
25%      11.973572 

In [None]:
import math

In [None]:
dataset = {}

for stock in stock_dict:

  #Creating a new dataframe with only the 'Close' column
  data = stock_dict[stock].filter(['Close'])

  #Converting the dataframe to a numpy array
  dataset[stock] = data.values
  dataset[stock] = dataset[stock].reshape(-1, 1)

In [None]:
#Get /Compute the number of rows to train the model on
training_data_len = math.ceil( len(dataset[stock]) *.8)
training_data_len

3337

### Scalling Data for better results and making it suiatble for LSTM model

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))

In [None]:
scaled_data_dict = {}

for stock in dataset:
  # here we are Scaling the all of the data to be values between 0 and 1
  scaled_data_dict[stock] = scaler.fit_transform(dataset[stock])

In [None]:
print(scaled_data_dict["AAPL"])

[[0.00103607]
 [0.0013802 ]
 [0.00126734]
 ...
 [0.99311012]
 [0.98647928]
 [1.        ]]


In [None]:
train_data_dict = {}
x_train_dict = {}
y_train_dict = {}

for stock in scaled_data_dict:
  #Creating the scaled training data set
  train_data_dict[stock] = scaled_data_dict[stock][0:training_data_len  , : ]
  #Spliting the data into x_train and y_train data sets
  x_train=[]
  y_train = []
  for i in range(60,len(train_data_dict[stock])):
    x_train.append(train_data_dict[stock][i-60:i,0])
    y_train.append(train_data_dict[stock][i,0])

  #Here we are Converting x_train and y_train to numpy arrays
  x_train_dict[stock] = np.array(x_train)
  y_train_dict[stock] = np.array(y_train)

  # Here we are reshaping the data into the shape accepted by the LSTM
  x_train_dict[stock] = np.reshape(x_train_dict[stock], (x_train_dict[stock].shape[0], x_train_dict[stock].shape[1], 1))

In [None]:
x_train_dict["AAPL"].shape

(3277, 60, 1)

In [None]:
from keras.models import Sequential
from keras.layers import Dense,LSTM, Dropout

In [None]:
model_dict = {}

for stock in x_train_dict:

  print(stock, "-")

  #now we are Building the LSTM network model
  model = Sequential()
  model.add(LSTM(units=50, return_sequences=True,input_shape=(x_train_dict["AAPL"].shape[1],1)))
  model.add(LSTM(units=50, return_sequences=False))
  model.add(Dense(units=25))
  model.add(Dense(units=1))

  if stock == "AAPL":
    print(model.summary())

  # here we are Compiling the model
  model.compile(optimizer='adam', loss='mean_squared_error')

  # here we are training the model
  model.fit(x_train_dict[stock], y_train_dict[stock], batch_size=1, epochs=20)

  # Store the trained model in the model dictionary
  model_dict[stock] = model

  print()

AAPL -
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 60, 50)            10400     
                                                                 
 lstm_1 (LSTM)               (None, 50)                20200     
                                                                 
 dense (Dense)               (None, 25)                1275      
                                                                 
 dense_1 (Dense)             (None, 1)                 26        
                                                                 
Total params: 31,901
Trainable params: 31,901
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
E

In [None]:
# Saving all the models
for stock in model_dict:
  filename = "keras_model_" + stock + ".h5"
  model_dict[stock].save(filename)

In [None]:
import datetime

## Case 2: Small Dataset

### Importing dataset from yahoo finance

In [None]:
from datetime import datetime

In [None]:
start = "2023-01-01"
end = "2023-07-31"

# Convert start and end dates to datetime objects
start_date = datetime.strptime(start, "%Y-%m-%d")
end_date = datetime.strptime(end, "%Y-%m-%d")

# Set up the data reader with Yahoo Finance
yf.pdr_override()

In [None]:
# Making dictionary for every stock
stock_dict_small = {}
for stock in stock_types:
  stock_dict_small[stock] = None

In [None]:
for stock in stock_dict_small:
  # Fetch the data using DataReader
  df = pdr.get_data_yahoo(stock, start=start_date, end=end_date)
  df = df.reset_index()

  if df is not None:
    stock_dict_small[stock] = df
    print(stock + "-" )
    print(df.head())  # Display the first few rows of the loaded data

[*********************100%***********************]  1 of 1 completed
AAPL-
        Date        Open        High         Low       Close   Adj Close  \
0 2023-01-03  130.279999  130.899994  124.169998  125.070000  124.538658   
1 2023-01-04  126.889999  128.660004  125.080002  126.360001  125.823189   
2 2023-01-05  127.129997  127.769997  124.760002  125.019997  124.488869   
3 2023-01-06  126.010002  130.289993  124.889999  129.619995  129.069336   
4 2023-01-09  130.470001  133.410004  129.889999  130.149994  129.597076   

      Volume  
0  112117500  
1   89113600  
2   80962700  
3   87754700  
4   70790800  
[*********************100%***********************]  1 of 1 completed
MSFT-
        Date        Open        High         Low       Close   Adj Close  \
0 2023-01-03  243.080002  245.750000  237.399994  239.580002  238.460129   
1 2023-01-04  232.279999  232.869995  225.960007  229.100006  228.029114   
2 2023-01-05  227.199997  227.550003  221.759995  222.309998  221.270859   

In [None]:
for stock in stock_dict:
  print(stock + "-")
  print(stock_dict_small[stock].info())
  print(stock_dict_small[stock].describe())

AAPL-
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 143 entries, 0 to 142
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       143 non-null    datetime64[ns]
 1   Open       143 non-null    float64       
 2   High       143 non-null    float64       
 3   Low        143 non-null    float64       
 4   Close      143 non-null    float64       
 5   Adj Close  143 non-null    float64       
 6   Volume     143 non-null    int64         
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 7.9 KB
None
             Open        High         Low       Close   Adj Close  \
count  143.000000  143.000000  143.000000  143.000000  143.000000   
mean   164.616504  166.341399  163.453497  165.059231  164.663173   
std     18.615219   18.412319   18.713290   18.486263   18.591757   
min    126.010002  127.769997  124.169998  125.019997  124.488869   
25%    150.794998  153.195000  150.010002  151.3

In [None]:
dataset_small = {}

for stock in stock_dict_small:

  #Creating a new dataframe with only the 'Close' column
  data = stock_dict_small[stock].filter(['Close'])

  #Converting the dataframe to a numpy array
  dataset_small[stock] = data.values
  dataset_small[stock] = dataset_small[stock].reshape(-1, 1)

In [None]:
#Get /Compute the number of rows to train the model on
training_data_len_small = math.ceil( len(dataset_small[stock]) *.8)
training_data_len_small

115

In [None]:
scaled_data_dict_small = {}

for stock in dataset_small:
  # here we are Scaling the all of the data to be values between 0 and 1
  scaled_data_dict_small[stock] = scaler.fit_transform(dataset_small[stock])

In [None]:
train_data_dict_small = {}
x_train_dict_small = {}
y_train_dict_small = {}

for stock in scaled_data_dict_small:
  #Creating the scaled training data set
  train_data_dict_small[stock] = scaled_data_dict_small[stock][0:training_data_len_small  , : ]
  #Spliting the data into x_train and y_train data sets
  x_train=[]
  y_train = []
  for i in range(20,len(train_data_dict_small[stock])):
    x_train.append(train_data_dict_small[stock][i-20:i,0])
    y_train.append(train_data_dict_small[stock][i,0])

  #Here we are Converting x_train and y_train to numpy arrays
  x_train_dict_small[stock] = np.array(x_train)
  y_train_dict_small[stock] = np.array(y_train)

  # Here we are reshaping the data into the shape accepted by the LSTM
  x_train_dict_small[stock] = np.reshape(x_train_dict_small[stock], (x_train_dict_small[stock].shape[0], x_train_dict_small[stock].shape[1], 1))

In [None]:
model_dict_small = {}

for stock in x_train_dict_small:

  print(stock, "-")

  #now we are Building the LSTM network model
  model_small = Sequential()
  model_small.add(LSTM(units=50, return_sequences=True,input_shape=(x_train_dict_small["AAPL"].shape[1],1)))
  model_small.add(LSTM(units=50, return_sequences=False))
  model_small.add(Dense(units=25))
  model_small.add(Dense(units=1))

  if stock == "AAPL":
    print(model.summary())

  # here we are Compiling the model
  model_small.compile(optimizer='adam', loss='mean_squared_error')

  # here we are training the model
  model_small.fit(x_train_dict_small[stock], y_train_dict_small[stock], batch_size=1, epochs=50)

  # Store the trained model in the model dictionary
  model_dict_small[stock] = model_small

  print()

AAPL -
Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_14 (LSTM)              (None, 60, 50)            10400     
                                                                 
 lstm_15 (LSTM)              (None, 50)                20200     
                                                                 
 dense_14 (Dense)            (None, 25)                1275      
                                                                 
 dense_15 (Dense)            (None, 1)                 26        
                                                                 
Total params: 31,901
Trainable params: 31,901
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50

In [None]:
# Saving all the models
for stock in model_dict_small:
  filename = "keras_model_small_" + stock + ".h5"
  model_dict_small[stock].save(filename)