In [None]:
# Import the modules
import numpy as np
import pandas as pd
import hvplot.pandas
from pathlib import Path
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Import the fundamentals.csv file into a Pandas DataFrame
df_fundamentals_csv = pd.read_csv("Resources/fundamentals.csv", infer_datetime_format=True,index_col="Date",parse_dates=True)
df_fundamentals_csv

Unnamed: 0_level_0,GOOGL P/S (LTM),MMM P/S (LTM),NVDA P/S (LTM),PG P/S (LTM),GOOGL Open,GOOGL Low,GOOGL High,GOOGL Close,GOOGL Adj. Close,MMM Open,...,NVDA P/FCF (LTM).1,PG P/FCF (LTM),GOOGL P/E (LTM),MMM P/E (LTM),NVDA P/E (LTM),PG P/E (LTM),GOOGL Debt/Equity (LTM),MMM Debt/Equity (LTM),NVDA Debt/Equity (LTM),PG Debt/Equity (LTM)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-11-13,Data Restricted,Data Restricted,Data Restricted,Data Restricted,11.87250,11.85350,12.02925,12.02575,12.02575,38.776266,...,Data Restricted,Data Restricted,Data Restricted,Data Restricted,Data Restricted,Data Restricted,,,,
2006-11-14,Data Restricted,Data Restricted,Data Restricted,Data Restricted,12.01750,12.01250,12.24875,12.23250,12.23250,38.997003,...,Data Restricted,Data Restricted,Data Restricted,Data Restricted,Data Restricted,Data Restricted,,,,
2006-11-15,Data Restricted,Data Restricted,Data Restricted,Data Restricted,12.33575,12.29825,12.49625,12.29825,12.29825,39.163783,...,Data Restricted,Data Restricted,Data Restricted,Data Restricted,Data Restricted,Data Restricted,,,,
2006-11-16,Data Restricted,Data Restricted,Data Restricted,Data Restricted,12.37500,12.31400,12.44200,12.39750,12.39750,39.364900,...,Data Restricted,Data Restricted,Data Restricted,Data Restricted,Data Restricted,Data Restricted,,,,
2006-11-17,Data Restricted,Data Restricted,Data Restricted,Data Restricted,12.33125,12.32500,12.49150,12.46975,12.46975,39.482626,...,Data Restricted,Data Restricted,Data Restricted,Data Restricted,Data Restricted,Data Restricted,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-03-25,6.0897252060873015,1.7730506410452556,38.38004701093201,4.490849161116605,149.94000,147.82000,150.37500,150.07000,150.07000,88.540367,...,86.53229799045187,23.555208263967007,25.956471397504938,,79.63285834031853,26.8295746694061,,,,
2024-03-26,6.113070651997111,1.7356751935375294,37.39390256393421,4.500941586968177,150.22000,149.98000,152.25600,150.67000,150.67000,87.001989,...,84.30892017319862,23.608144620609924,26.055977594995465,,77.58675607711652,26.889869612136454,,,,
2024-03-27,6.121897890004359,1.7709388901502403,36.46027707560487,4.558692690479311,151.18000,148.90000,151.63500,150.87000,150.87000,86.115749,...,82.20395248140335,23.91105821709786,26.09360227313777,,75.64962279966473,27.23489067347785,,,,
2024-03-28,6.129121323122768,1.7959985474434688,36.50310022651915,4.548600264627739,150.85000,150.17000,151.43000,150.93000,150.93000,87.461830,...,82.30050227600755,23.858121860454943,26.124390991640595,,75.7384744341995,27.174595730747495,,,,


In [3]:
# Create new googl, nvda, mmm, and pg DataFrames with only the columns of interest
googl = df_fundamentals_csv[["GOOGL Adj. Close", "GOOGL P/S (LTM)","GOOGL P/FCF (LTM)","GOOGL P/E (LTM)","GOOGL Debt/Equity (LTM)"]]
nvda = df_fundamentals_csv[["NVDA Adj. Close","NVDA P/S (LTM)","NVDA P/FCF (LTM)", "NVDA P/E (LTM)", "NVDA Debt/Equity (LTM)"]]
mmm = df_fundamentals_csv[["MMM Adj. Close","MMM P/S (LTM)", "MMM P/FCF (LTM)", "MMM P/E (LTM)", "MMM Debt/Equity (LTM)"]]
pg = df_fundamentals_csv[["PG Adj. Close","PG P/S (LTM)", "PG P/FCF (LTM)", "PG P/E (LTM)", "PG Debt/Equity (LTM)"]]

In [4]:
# Concat the googl, nvda, mmm, and pg DataFrames by columns into a single DataFrame  
df_final = pd.concat([googl, nvda, mmm, pg], axis = 1)

In [5]:
# Drop the rows with "Data Restricted" in the "GOOGL P/S (LTM)" column
df_final = df_final.drop(df_final[df_final["GOOGL P/S (LTM)"] == "Data Restricted"].index)

In [9]:
# Drop the rows from the DataFrame where the "GOOGL P/S (LTM)" column is NaN
df_final = df_final.dropna(subset = ["GOOGL P/S (LTM)"])

In [10]:
# Drop the rows with "LOCKED" in the "GOOGL P/S (LTM)" column
df_final = df_final.drop(df_final[df_final["GOOGL P/S (LTM)"] == "LOCKED"].index)
df_final

Unnamed: 0_level_0,GOOGL Adj. Close,GOOGL P/S (LTM),GOOGL P/FCF (LTM),GOOGL P/E (LTM),GOOGL Debt/Equity (LTM),NVDA Adj. Close,NVDA P/S (LTM),NVDA P/FCF (LTM),NVDA P/E (LTM),NVDA Debt/Equity (LTM),MMM Adj. Close,MMM P/S (LTM),MMM P/FCF (LTM),MMM P/E (LTM),MMM Debt/Equity (LTM),PG Adj. Close,PG P/S (LTM),PG P/FCF (LTM),PG P/E (LTM),PG Debt/Equity (LTM)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2019-01-02,52.7340,5.624312583147244,31.913618120461397,39.41372981295773,,33.788895,6.689277089035582,24.511563421828907,18.203627255141033,,128.423190,3.388836587259982,24.512285808641977,25.638502088416196,,79.924749,3.400914508531607,20.418006129556474,23.842335504066842,
2019-01-03,51.2735,5.466324052446367,31.017155564075672,38.306586998518696,,31.747472,6.285131218805345,23.03064896755162,17.103819207058443,,123.587565,3.261233889881134,23.589304216710758,24.673114133289,,79.364366,3.377069358596659,20.274847453761897,23.675167507497804,
2019-01-04,53.9035,5.753529497574423,32.64682403687683,40.3192485723093,,33.781454,6.687803896312992,24.50616519174041,18.19961823430963,,128.672033,3.3954030682109115,24.559782775132277,25.688181301691593,,80.984225,3.4459967451135802,20.688665500897827,24.158387497483528,
2019-01-07,53.7960,5.741558031193692,32.5788952365535,40.23535562816376,,35.569869,7.0418612139752055,25.80353982300885,19.16311957412439,,128.376112,3.3875942800365744,24.503299895943563,25.629103318212426,,80.660253,3.432211267807205,20.605901891452685,24.061743499465415,
2019-01-08,54.2685,5.787965514376357,32.84222176300957,40.56056728317413,,34.684343,6.86655127998712,25.161150442477876,18.686046095186985,,128.914151,3.401792076683938,24.605996039682537,25.73651783337754,,80.957957,3.444879003708744,20.681954937960136,24.150551497632886,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-03-25,150.0700,6.0897252060873015,26.93639815814087,25.956471397504938,,950.020000,38.38004701093201,86.53229799045187,79.63285834031853,,87.654127,1.7730506410452556,11.440289832181639,,,160.190000,4.490849161116605,23.555208263967007,26.8295746694061,
2024-03-26,150.6700,6.113070651997111,27.039660982804516,26.055977594995465,,925.610000,37.39390256393421,84.30892017319862,77.58675607711652,,85.806401,1.7356751935375294,11.199131490621916,,,160.550000,4.500941586968177,23.608144620609924,26.889869612136454,
2024-03-27,150.8700,6.121897890004359,27.078706093963593,26.09360227313777,,902.500000,36.46027707560487,82.20395248140335,75.64962279966473,,87.445109,1.7709388901502403,11.426664139980257,,,162.610000,4.558692690479311,23.91105821709786,27.23489067347785,
2024-03-28,150.9300,6.129121323122768,27.11065716958055,26.124390991640595,,903.560000,36.50310022651915,82.30050227600755,75.7384744341995,,88.682500,1.7959985474434688,11.588357063968411,,,162.250000,4.548600264627739,23.858121860454943,27.174595730747495,


In [11]:
df_final.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1320 entries, 2019-01-02 to 2024-04-01
Data columns (total 20 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   GOOGL Adj. Close         1320 non-null   float64
 1   GOOGL P/S (LTM)          1320 non-null   object 
 2   GOOGL P/FCF (LTM)        1320 non-null   object 
 3   GOOGL P/E (LTM)          1320 non-null   object 
 4   GOOGL Debt/Equity (LTM)  15 non-null     float64
 5   NVDA Adj. Close          1320 non-null   float64
 6   NVDA P/S (LTM)           1320 non-null   object 
 7   NVDA P/FCF (LTM)         1320 non-null   object 
 8   NVDA P/E (LTM)           1320 non-null   object 
 9   NVDA Debt/Equity (LTM)   0 non-null      float64
 10  MMM Adj. Close           1320 non-null   float64
 11  MMM P/S (LTM)            1320 non-null   object 
 12  MMM P/FCF (LTM)          1320 non-null   object 
 13  MMM P/E (LTM)            1147 non-null   object 
 14  MMM De

In [None]:
# Create separate DataFrames for each stock
googl_data = df_final[["GOOGL Adj. Close", "GOOGL P/S (LTM)","GOOGL P/FCF (LTM)","GOOGL P/E (LTM)","GOOGL Debt/Equity (LTM)"]]
nvda_data = df_final[["NVDA Adj. Close", "NVDA P/S (LTM)","NVDA P/FCF (LTM)","NVDA P/E (LTM)","NVDA Debt/Equity (LTM)"]]
mmm_data =  df_final[["MMM Adj. Close", "MMM P/S (LTM)","MMM P/FCF (LTM)","MMM P/E (LTM)","MMM Debt/Equity (LTM)"]]
pg_data = df_final[["PG Adj. Close", "PG P/S (LTM)","PG P/FCF (LTM)","PG P/E (LTM)","PG Debt/Equity (LTM)"]]

In [None]:
 # Calculate the daily returns using the closing prices and the pct_change function
googl_data["actual_returns"] = googl_data["GOOGL Adj. Close"].pct_change()

# Drop all NaN values from the DataFrame
#googl_data = googl_data.dropna()

# Review the DataFrame
googl_data

In [None]:
# Define a window size of 4
short_window = 4

# Create a simple moving average (SMA) using the short_window and assign this to a new columns called sma_fast
googl_data["sma_fast"] = googl_data["GOOGL Adj. Close"].rolling(window=short_window).mean()


In [None]:
# Define a window size of 4
long_window = 100

# Create a simple moving average (SMA) using the short_window and assign this to a new columns called sma_fast
googl_data["sma_slow"] = googl_data["GOOGL Adj. Close"].rolling(window=short_window).mean()

In [None]:
print(googl_data.columns)

In [None]:
# Assign a copy of the sma_fast and sma_slow columns to a new DataFrame called X
X = googl_data[["GOOGL P/S (LTM)", "GOOGL P/FCF (LTM)","GOOGL P/E (LTM)"]].copy()
#X = googl_data[["sma_fast","sma_slow"]].shift().dropna().copy()
# Display sample data
display(X.head())
display(X.tail())

In [None]:
# Create a new column in the trading_df called signal setting its value to zero.
googl_data["signal"] = 0.0

In [None]:
# Create the signal to buy
googl_data.loc[(googl_data["actual_returns"] >= 0), "signal"] = 1

In [None]:
# Create the signal to sell
googl_data.loc[(googl_data["actual_returns"] < 0), "signal"] = -1

In [None]:
# Copy the new signal column to a new Series called y.
y = googl_data["signal"].copy()

In [None]:
# Import required libraries
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Create a StandardScaler instance
scaler = StandardScaler()
 
# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)
 
# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Import the BayesianRidge model
from sklearn.linear_model import BayesianRidge
from sklearn.metrics import mean_squared_error

In [None]:
# Create the classifier model
br_model_googl = BayesianRidge()

# Fit the model to the data using X_train_scaled and y_train
br_model_googl.fit(X_train_scaled, y_train)

# Use the trained model to predict the trading signals for the training data
y_train_pred = br_model_googl.predict(X_train_scaled)

# Display the sample predictions
# training_signal_predictions[:10]

In [None]:
y_train_pred[:10]


In [None]:
# Evaluate the model
mse_train = mean_squared_error(y_train, y_train_pred)
print("Mean Squared Error on Training Set:", mse_train)

In [None]:
# Use the trained model to make predictions on the testing data
y_pred_test = br_model_googl.predict(X_test_scaled)

# Evaluate the model on the testing set
mse_test = mean_squared_error(y_test, y_pred_test)
print("Mean Squared Error on Testing Set:", mse_test)