In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
import os

In [29]:
#Create a function to create models for different stocks and save them to .keras files

scalers={}
training_data_lens={}

def create_models(stock_name):
    df= pd.read_csv(f"stock_data/{stock_name}_last_5_years.csv")
    df['Date']=pd.to_datetime(df['Date'], errors='coerce')

    target_col='Close'

    dataset=df[target_col].values.reshape(-1,1)
    training_data_len=int(np.ceil(len(dataset) * 0.95))
    training_data_lens[stock_name]=training_data_len
    # print(f"Training data length = {training_data_len}")


    #Preprocessing data (scaling)
    scaler=StandardScaler()
    scaled_data=scaler.fit_transform(dataset)
    scalers[stock_name]=scaler

    #Create training data
    train_data=scaled_data[:training_data_len]

    X_train=[]
    y_train=[]

    #Creating 60 days sliding window for LSTM
    for i in range(60, len(train_data)):
        X_train.append(train_data[i-60:i, 0])   #60 previous days
        y_train.append(train_data[i, 0])        #61st day
    
    #Reshaping data for LSTM input(samples, time_steps, features)
    X_train, y_train=np.array(X_train), np.array(y_train)
    X_train=np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

    #Build LSTM model
    model=keras.Sequential()

    #First layer
    model.add(keras.layers.LSTM(units=64,return_sequences=True,input_shape=(X_train.shape[1],1)))

    #Second layer
    model.add(keras.layers.LSTM(units=64,return_sequences=False))

    #Third layer
    model.add(keras.layers.Dense(units=25))

    #Fourth layer
    model.add(keras.layers.Dropout(0.5))

    #Final output layer
    model.add(keras.layers.Dense(units=1))

    # model.summary()

    model.compile(optimizer='adam',loss='mae', metrics=[keras.metrics.RootMeanSquaredError()])

    model.fit(X_train,y_train, epochs=20, batch_size=32)

    model.save(f"models/{stock_name}_model.keras")

    return model



In [31]:
# Build models and predict for multiple stocks and visualize the test results

def train_models_for_stocks(stock_name):
    print(f"Creating model for {stock_name}...")
    model=create_models(stock_name)
    print(f"Model for {stock_name} created and saved.\n")

    #Testing the model
    df= pd.read_csv(f"stock_data/{stock_name}_last_5_years.csv")
    df['Date']=pd.to_datetime(df['Date'], errors='coerce')

    target_col='Close'
    dataset=df[target_col].values.reshape(-1,1)
    training_data_len=training_data_lens[stock_name]

    #get the scaler object
    scaler=scalers[stock_name]

    test_data=dataset[training_data_len - 60:]
    scaled_test_data=scaler.transform(test_data)

    #Prepare the test data
    X_test=[]
    y_test=dataset[training_data_len:]

    for i in range(60, len(scaled_test_data)):
        X_test.append(scaled_test_data[i-60:i, 0])


    X_test=np.array(X_test)
    X_test=np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    #Make a predictions
    predictions=model.predict(X_test)
    predictions=scaler.inverse_transform(predictions)

    #Plotting the data and the predictions (saved in outputs folder)
    train=df[:training_data_len]
    valid=df[training_data_len:]

    valid['Predictions']=predictions

    plt.figure(figsize=(16,8))
    plt.title(f"{stock_name} Stock Price Prediction")
    plt.xlabel('Date', fontsize=18)
    plt.ylabel('Close Price USD ($)', fontsize=18)
    plt.plot(train['Date'], train['Close'], label='Train Close Price')
    plt.plot(valid['Date'], valid['Close'], label='Actual Close Price')
    plt.plot(valid['Date'], valid['Predictions'], label='Predicted Close Price')
    plt.legend()
    plt.savefig(f"outputs/{stock_name}_stock_price_prediction.png")
    plt.close()

    print(f"Prediction plot for {stock_name} saved in outputs folder.\n")


    #Evaluate the model
    rmse=np.sqrt(np.mean(((predictions - y_test)**2))) 
    print(f"Root Mean Squared Error for {stock_name}: {rmse}")

    print("\n---------------------------------\n")
    