In [1155]:
import os
import numpy as np
import pandas as pd
import xgboost as xgb
import matplotlib.pyplot as plt
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score,classification_report
from sklearn.metrics import roc_curve, auc
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Time series decomposition
from statsmodels.tsa.seasonal import seasonal_decompose

# Chart drawing
import plotly as py
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

# Yahoo Finance
import yfinance as yf

# Mute sklearn warnings
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=DeprecationWarning)

# Show charts when running kernel
init_notebook_mode(connected=True)

# Change default background color for all visualizations
layout=go.Layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(250,250,250,0.8)')
fig = go.Figure(layout=layout)
templated_fig = pio.to_templated(fig)
pio.templates['my_template'] = templated_fig.layout.template
pio.templates.default = 'my_template'

In [1156]:
# Split data into training/validation and test sets based on dates
start_date = '1950-01-31'
end_date = '2030-09-30'

In [1157]:

file_path_1 = '/Users/edocampione/Desktop/Meng Engineering Science/4YP/data/ExxonMobil_CRSP_daily_data_alltime.csv' # Daily data (CRSP)
file_path_2 = '/Users/edocampione/Desktop/Meng Engineering Science/4YP/data/ExxonMobil_Compustat_q_data_alltime.csv' # Fundamentals (Compustat)
file_path_3 = '/Users/edocampione/Desktop/Meng Engineering Science/4YP/data/ExxonMobil_Compustat_quarterly_ratiodata_alltime.csv' # Ratios (Compustat)

df1 = pd.read_csv(file_path_1)
df2 = pd.read_csv(file_path_2)
df3 = pd.read_csv(file_path_3)

df1['date'] = pd.to_datetime(df1['date']) # daily technical data
df2['datadate'] = pd.to_datetime(df2['datadate'])  # Fundamental data
df3['qdate'] = pd.to_datetime(df3['qdate'])  # Financial ratios data


# Sort by qdate and public_date to ensure the most recent public_date comes last
df3 = df3.sort_values(by=['qdate', 'public_date'])
# Drop duplicates to keep only the latest public_date for each qdate
df3 = df3.drop_duplicates(subset='qdate', keep='last')

df = pd.merge(
    df2,  
    df3,  
    left_on='datadate', right_on='qdate',  # Merge on dates
    how='left'
)

df1_quarterly = df1.set_index('date').resample('Q').last().reset_index()
df1_quarterly = df1_quarterly[['date', 'PRC']]

df = pd.merge_asof(
    df1_quarterly,  # Daily data sorted by date
    df,  # Fundamental data sorted by quarterly date
    left_on='date', right_on='datadate',  # Merge on dates
    direction='backward'  # Merge to the closest past quarter
)

features = ['PRC', 'epsfxq', 'atq', 'actq', 'ltq', 'lctq', 'chq', 'revtq', 'niq', 'capxy', 'npm', 'de_ratio', 'curr_ratio', 'ptb']

feature_renames = {
    'epsfxq': 'earnings_per_share', 
    'atq': 'total_assets',
    'actq': 'current_assets',
    'ltq': 'total_liabilities', 
    'lctq': 'current_liabilities',
    'niq': 'net_income', 
    'revtq': 'revenue', 
    'chq': 'cash',
    'capxy': 'capital_expenditures', 
    'npm': 'net_profit_margin', 
    'de_ratio': 'debt_equity_ratio', 
    'curr_ratio': 'current_ratio', 
    'ptb': 'price_to_book_ratio'
}

#df_fundamental[features] = df_fundamental[features].pct_change()
df = df[(df['date'] >= start_date) & (df['date'] <= end_date)].copy()

df = df[['date'] + features].rename(columns=feature_renames)
df['book_value'] = df['total_assets'] - df['total_liabilities']
df['price_to_earnings_ratio'] = df['PRC'] / df['earnings_per_share']
df = df.drop(columns=['cash'])
df = df.drop(columns=['capital_expenditures'])

df = df.dropna()

df.index = range(len(df))

df.head()

#


Unnamed: 0,date,PRC,earnings_per_share,total_assets,current_assets,total_liabilities,current_liabilities,revenue,net_income,net_profit_margin,debt_equity_ratio,current_ratio,price_to_book_ratio,book_value,price_to_earnings_ratio
0,1971-12-31,73.75,1.94,20315.199,6790.504,8228.082,4329.738,4971.629,378.562,0.081,0.71,1.568,1.282,12087.117,38.015464
1,1972-12-31,87.5,2.19,21558.297,7098.578,8752.382,4707.105,5471.75,492.77,0.075,0.713,1.508,1.658,12805.915,39.954338
2,1973-12-31,94.125,3.51,25079.5,9793.512,10770.659,6307.887,7582.316,787.286,0.095,0.785,1.553,1.169,14308.841,26.816239
3,1974-12-31,64.625,3.85,31332.398,14852.797,14923.736,9531.973,11552.797,862.192,0.075,0.949,1.558,1.05,16408.662,16.785714
4,1975-03-31,74.0,2.67,30880.008,14153.004,14932.004,8659.0,10921.0,596.0,0.07,0.968,1.595,1.22,15948.004,27.715356


In [1158]:
# Convert Features into Percentage changes

df.loc[:, df.columns != 'date'] = df.loc[:, df.columns != 'date'].pct_change()

df.head()

Unnamed: 0,date,PRC,earnings_per_share,total_assets,current_assets,total_liabilities,current_liabilities,revenue,net_income,net_profit_margin,debt_equity_ratio,current_ratio,price_to_book_ratio,book_value,price_to_earnings_ratio
0,1971-12-31,,,,,,,,,,,,,,
1,1972-12-31,0.186441,0.128866,0.061191,0.045368,0.063721,0.087157,0.100595,0.301689,-0.074074,0.004225,-0.038265,0.293292,0.059468,0.051002
2,1973-12-31,0.075714,0.60274,0.163334,0.379644,0.230597,0.340078,0.38572,0.597674,0.266667,0.100982,0.029841,-0.294934,0.117362,-0.328828
3,1974-12-31,-0.313413,0.096866,0.249323,0.516596,0.385592,0.51112,0.52365,0.095145,-0.210526,0.208917,0.00322,-0.101796,0.14675,-0.374047
4,1975-03-31,0.145068,-0.306494,-0.014438,-0.047115,0.000554,-0.091584,-0.054688,-0.308739,-0.066667,0.020021,0.023748,0.161905,-0.028074,0.651128


In [1159]:
file_path_4 = '/Users/edocampione/Desktop/Meng Engineering Science/4YP/data/s&p500index.csv' # Ratios (Compustat)
df4 = pd.read_csv(file_path_4)
df4['caldt'] = pd.to_datetime(df4['caldt'])  # Adjust column name if needed

#merge (fill empty cells with nearest s&p500 value)
df = pd.merge_asof(df, df4, left_on='date', right_on='caldt', direction='nearest')
df = df.drop(columns=['caldt'])

# convert to pct
df['spindx'] = df['spindx'].pct_change()

# calculate relative returns
df['relative_quarterly_return'] = (df['PRC'] - df['spindx'])*100
df = df.drop(columns=['PRC', 'spindx'])

# prediction target is return of next quarter
df['target'] = df['relative_quarterly_return'].shift(-1)

df = df.dropna()

df.head()

Unnamed: 0,date,earnings_per_share,total_assets,current_assets,total_liabilities,current_liabilities,revenue,net_income,net_profit_margin,debt_equity_ratio,current_ratio,price_to_book_ratio,book_value,price_to_earnings_ratio,relative_quarterly_return,target
1,1972-12-31,0.128866,0.061191,0.045368,0.063721,0.087157,0.100595,0.301689,-0.074074,0.004225,-0.038265,0.293292,0.059468,0.051002,3.010803,24.936952
2,1973-12-31,0.60274,0.163334,0.379644,0.230597,0.340078,0.38572,0.597674,0.266667,0.100982,0.029841,-0.294934,0.117362,-0.328828,24.936952,-1.623208
3,1974-12-31,0.096866,0.249323,0.516596,0.385592,0.51112,0.52365,0.095145,-0.210526,0.208917,0.00322,-0.101796,0.14675,-0.374047,-1.623208,-7.080161
4,1975-03-31,-0.306494,-0.014438,-0.047115,0.000554,-0.091584,-0.054688,-0.308739,-0.066667,0.020021,0.023748,0.161905,-0.028074,0.651128,-7.080161,10.808541
5,1975-06-30,0.022472,0.014832,-0.013566,0.008304,0.012473,-0.017581,0.025168,-0.071429,-0.020661,-0.000627,0.003279,0.020943,0.222527,10.808541,6.4866


In [1160]:
test_size  = 0.15
valid_size = 0.0

test_split_idx  = int(df.shape[0] * (1-test_size))
valid_split_idx = int(df.shape[0] * (1-(valid_size+test_size)))

train_df  = df.loc[:valid_split_idx].copy()
valid_df  = df.loc[valid_split_idx+1:test_split_idx].copy()
test_df   = df.loc[test_split_idx+1:].copy()

# save test dates
test_dates = test_df['date']

train_df.head()

Unnamed: 0,date,earnings_per_share,total_assets,current_assets,total_liabilities,current_liabilities,revenue,net_income,net_profit_margin,debt_equity_ratio,current_ratio,price_to_book_ratio,book_value,price_to_earnings_ratio,relative_quarterly_return,target
1,1972-12-31,0.128866,0.061191,0.045368,0.063721,0.087157,0.100595,0.301689,-0.074074,0.004225,-0.038265,0.293292,0.059468,0.051002,3.010803,24.936952
2,1973-12-31,0.60274,0.163334,0.379644,0.230597,0.340078,0.38572,0.597674,0.266667,0.100982,0.029841,-0.294934,0.117362,-0.328828,24.936952,-1.623208
3,1974-12-31,0.096866,0.249323,0.516596,0.385592,0.51112,0.52365,0.095145,-0.210526,0.208917,0.00322,-0.101796,0.14675,-0.374047,-1.623208,-7.080161
4,1975-03-31,-0.306494,-0.014438,-0.047115,0.000554,-0.091584,-0.054688,-0.308739,-0.066667,0.020021,0.023748,0.161905,-0.028074,0.651128,-7.080161,10.808541
5,1975-06-30,0.022472,0.014832,-0.013566,0.008304,0.012473,-0.017581,0.025168,-0.071429,-0.020661,-0.000627,0.003279,0.020943,0.222527,10.808541,6.4866


In [1161]:
y_train = train_df['target'].copy()
X_train = train_df.drop(['target', 'date'], 1)

y_valid = valid_df['target'].copy()
X_valid = valid_df.drop(['target', 'date'], 1)

y_test  = test_df['target'].copy()
X_test  = test_df.drop(['target', 'date'], 1)

X_train.head()

Unnamed: 0,earnings_per_share,total_assets,current_assets,total_liabilities,current_liabilities,revenue,net_income,net_profit_margin,debt_equity_ratio,current_ratio,price_to_book_ratio,book_value,price_to_earnings_ratio,relative_quarterly_return
1,0.128866,0.061191,0.045368,0.063721,0.087157,0.100595,0.301689,-0.074074,0.004225,-0.038265,0.293292,0.059468,0.051002,3.010803
2,0.60274,0.163334,0.379644,0.230597,0.340078,0.38572,0.597674,0.266667,0.100982,0.029841,-0.294934,0.117362,-0.328828,24.936952
3,0.096866,0.249323,0.516596,0.385592,0.51112,0.52365,0.095145,-0.210526,0.208917,0.00322,-0.101796,0.14675,-0.374047,-1.623208
4,-0.306494,-0.014438,-0.047115,0.000554,-0.091584,-0.054688,-0.308739,-0.066667,0.020021,0.023748,0.161905,-0.028074,0.651128,-7.080161
5,0.022472,0.014832,-0.013566,0.008304,0.012473,-0.017581,0.025168,-0.071429,-0.020661,-0.000627,0.003279,0.020943,0.222527,10.808541


In [1162]:
# Scaling
scaler = MinMaxScaler()

# Fit the scaler on the training/validation data
X_train_scaled = scaler.fit_transform(X_train)

# Now apply the same scaling to the val/test data using the already fitted scaler
#X_valid_scaled = scaler.transform(X_valid)
X_test_scaled = scaler.transform(X_test)

# Convert the scaled data back to DataFrame for consistency
X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)
#X_valid_scaled = pd.DataFrame(X_valid_scaled, columns=X_valid.columns)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)

X_train_scaled.tail()

Unnamed: 0,earnings_per_share,total_assets,current_assets,total_liabilities,current_liabilities,revenue,net_income,net_profit_margin,debt_equity_ratio,current_ratio,price_to_book_ratio,book_value,price_to_earnings_ratio,relative_quarterly_return
162,0.093786,0.157039,0.154569,0.255017,0.237176,0.128785,0.096929,0.462617,0.42991,0.26108,0.485,0.098747,0.119104,0.631863
163,0.083886,0.142347,0.210548,0.201927,0.130405,0.103001,0.088743,0.40284,0.29792,0.721928,0.384751,0.121533,0.123034,0.603737
164,0.097633,0.195611,0.274366,0.260145,0.297123,0.30925,0.101409,0.338894,0.38752,0.440297,0.49127,0.167253,0.113965,0.681105
165,0.118024,0.137468,0.166468,0.180306,0.199532,0.194201,0.122952,0.373949,0.396884,0.450706,0.416293,0.134138,0.084682,0.659852
166,0.072511,0.153626,0.153954,0.192667,0.241793,0.168904,0.076081,0.275947,0.256797,0.199697,0.592637,0.153072,0.1629,0.684324


In [1163]:
'''
parameters = {
    'n_estimators': [100, 200, 300, 400],
    'max_depth': [None],
    'min_samples_split': [2],
    'random_state': [42]
    'n_jobs': [1]
}

'''

# Initialize the model
model = RandomForestRegressor(
    n_estimators=100,  # Number of trees in the forest
    max_depth=5,    # Allow trees to grow fully
    min_samples_split=2,
    min_samples_leaf=1,
    random_state=42,   # For reproducibility
    n_jobs=-1          # Use all available CPU cores
)

# Train the model
model.fit(X_train_scaled, y_train)

# Predictions
y_train_pred = model.predict(X_train_scaled)
#y_valid_pred = model.predict(X_valid_scaled)
y_test_pred = model.predict(X_test_scaled)

# Evaluate the model
train_rmse = mean_squared_error(y_train, y_train_pred, squared=False)
#valid_rmse = mean_squared_error(y_valid, y_valid_pred, squared=False)
test_rmse = mean_squared_error(y_test, y_test_pred, squared=False)

train_r2 = r2_score(y_train, y_train_pred)
#valid_r2 = r2_score(y_valid, y_valid_pred)
test_r2 = r2_score(y_test, y_test_pred)

print(f"Train RMSE: {train_rmse:.4f}, R²: {train_r2:.4f}")
print(f"Validation RMSE: {valid_rmse:.4f}, R²: {valid_r2:.4f}")
print(f"Test RMSE: {test_rmse:.4f}, R²: {test_r2:.4f}")


Train RMSE: 6.5057, R²: 0.6646
Validation RMSE: 8.2602, R²: -0.1037
Test RMSE: 15.0411, R²: -0.0571


In [1164]:

%%time
'''
parameters = {
    'n_estimators': [100, 200, 300, 400],
    'learning_rate': [0.001, 0.005, 0.01, 0.05],
    'max_depth': [8, 10, 12, 15],
    'gamma': [0.00001, 0.0001, 0.001],
    'random_state': [42]
}


parameters = {
    'n_estimators': [100,200,300],
    'learning_rate': [0.05],
    'max_depth': [1,2,3,4],
    'gamma': [1,0.1,0.01],
    'random_state': [42]
}

eval_set = [(X_train_scaled, y_train), (X_valid_scaled, y_valid)]
model = xgb.XGBRegressor(eval_set=eval_set, objective='reg:squarederror', verbose=False)
clf = GridSearchCV(model, parameters)

clf.fit(X_train_scaled, y_train)

print(f'Best params: {clf.best_params_}')
print(f'Best validation score = {clf.best_score_}')

'''



CPU times: user 8 µs, sys: 3 µs, total: 11 µs
Wall time: 21 µs


"\nparameters = {\n    'n_estimators': [100, 200, 300, 400],\n    'learning_rate': [0.001, 0.005, 0.01, 0.05],\n    'max_depth': [8, 10, 12, 15],\n    'gamma': [0.00001, 0.0001, 0.001],\n    'random_state': [42]\n}\n\n\nparameters = {\n    'n_estimators': [100,200,300],\n    'learning_rate': [0.05],\n    'max_depth': [1,2,3,4],\n    'gamma': [1,0.1,0.01],\n    'random_state': [42]\n}\n\neval_set = [(X_train_scaled, y_train), (X_valid_scaled, y_valid)]\nmodel = xgb.XGBRegressor(eval_set=eval_set, objective='reg:squarederror', verbose=False)\nclf = GridSearchCV(model, parameters)\n\nclf.fit(X_train_scaled, y_train)\n\nprint(f'Best params: {clf.best_params_}')\nprint(f'Best validation score = {clf.best_score_}')\n\n"

In [1165]:
'''
%%time

model = xgb.XGBRegressor(**clf.best_params_, objective='reg:squarederror')
model.fit(X_train_scaled, y_train, eval_set=eval_set, verbose=False)

'''

"\n%%time\n\nmodel = xgb.XGBRegressor(**clf.best_params_, objective='reg:squarederror')\nmodel.fit(X_train_scaled, y_train, eval_set=eval_set, verbose=False)\n\n"

In [1166]:
#plot_importance(model);

In [1167]:
y_pred = model.predict(X_train_scaled)
train_df['y_pred'] = y_pred

y_train_binary = (y_train >= 0).astype(int)
y_pred_binary = (y_pred >= 0).astype(int)

train_df['y_train_binary'] = y_train_binary
train_df['y_pred_binary'] = y_pred_binary

print(classification_report(y_train_binary, y_pred_binary))

train_df.head()

              precision    recall  f1-score   support

           0       0.73      0.83      0.78        83
           1       0.81      0.70      0.75        84

    accuracy                           0.77       167
   macro avg       0.77      0.77      0.77       167
weighted avg       0.77      0.77      0.77       167



Unnamed: 0,date,earnings_per_share,total_assets,current_assets,total_liabilities,current_liabilities,revenue,net_income,net_profit_margin,debt_equity_ratio,current_ratio,price_to_book_ratio,book_value,price_to_earnings_ratio,relative_quarterly_return,target,y_pred,y_train_binary,y_pred_binary
1,1972-12-31,0.128866,0.061191,0.045368,0.063721,0.087157,0.100595,0.301689,-0.074074,0.004225,-0.038265,0.293292,0.059468,0.051002,3.010803,24.936952,11.126553,1,1
2,1973-12-31,0.60274,0.163334,0.379644,0.230597,0.340078,0.38572,0.597674,0.266667,0.100982,0.029841,-0.294934,0.117362,-0.328828,24.936952,-1.623208,-1.060063,0,0
3,1974-12-31,0.096866,0.249323,0.516596,0.385592,0.51112,0.52365,0.095145,-0.210526,0.208917,0.00322,-0.101796,0.14675,-0.374047,-1.623208,-7.080161,-2.686263,0,0
4,1975-03-31,-0.306494,-0.014438,-0.047115,0.000554,-0.091584,-0.054688,-0.308739,-0.066667,0.020021,0.023748,0.161905,-0.028074,0.651128,-7.080161,10.808541,4.038118,1,1
5,1975-06-30,0.022472,0.014832,-0.013566,0.008304,0.012473,-0.017581,0.025168,-0.071429,-0.020661,-0.000627,0.003279,0.020943,0.222527,10.808541,6.4866,-1.710186,1,0


In [1168]:
'''
y_pred = model.predict(X_valid_scaled)
valid_df['y_pred'] = y_pred

y_valid_binary = (y_valid >= 0).astype(int)
y_pred_binary = (y_pred >= 0).astype(int)

valid_df['y_valid_binary'] = y_valid_binary
valid_df['y_pred_binary'] = y_pred_binary

print(classification_report(y_valid_binary, y_pred_binary))

valid_df.head()

# Create a figure
fig = go.Figure()

# Add Target values as bars
fig.add_trace(go.Bar(
    x=valid_df['date'],
    y=valid_df['target'],
    name='Target',
    marker_color='blue'
))

# Add Predicted values as bars
fig.add_trace(go.Bar(
    x=valid_df['date'],
    y=valid_df['y_pred'],
    name='Predicted',
    marker_color='orange'
))

# Update layout
fig.update_layout(
    title='Comparison of Target vs Predicted Values',
    xaxis_title='Date',
    yaxis_title='Percentage Change',
    barmode='group',  # Group bars side by side
    xaxis=dict(tickformat='%Y-%m-%d', tickangle=-45),
    template='plotly_white',
)

# Show the plot
fig.show()
'''

"\ny_pred = model.predict(X_valid_scaled)\nvalid_df['y_pred'] = y_pred\n\ny_valid_binary = (y_valid >= 0).astype(int)\ny_pred_binary = (y_pred >= 0).astype(int)\n\nvalid_df['y_valid_binary'] = y_valid_binary\nvalid_df['y_pred_binary'] = y_pred_binary\n\nprint(classification_report(y_valid_binary, y_pred_binary))\n\nvalid_df.head()\n\n# Create a figure\nfig = go.Figure()\n\n# Add Target values as bars\nfig.add_trace(go.Bar(\n    x=valid_df['date'],\n    y=valid_df['target'],\n    name='Target',\n    marker_color='blue'\n))\n\n# Add Predicted values as bars\nfig.add_trace(go.Bar(\n    x=valid_df['date'],\n    y=valid_df['y_pred'],\n    name='Predicted',\n    marker_color='orange'\n))\n\n# Update layout\nfig.update_layout(\n    title='Comparison of Target vs Predicted Values',\n    xaxis_title='Date',\n    yaxis_title='Percentage Change',\n    barmode='group',  # Group bars side by side\n    xaxis=dict(tickformat='%Y-%m-%d', tickangle=-45),\n    template='plotly_white',\n)\n\n# Show the p

In [1169]:
y_pred = model.predict(X_test_scaled)
test_df['y_pred'] = y_pred

y_test_binary = (y_test >= 0).astype(int)
y_pred_binary = (y_pred >= 0).astype(int)

test_df['y_test_binary'] = y_test_binary
test_df['y_pred_binary'] = y_pred_binary

print(classification_report(y_test_binary, y_pred_binary))

test_df['date'] = test_dates

test_df.head()

              precision    recall  f1-score   support

           0       0.65      0.83      0.73        18
           1       0.57      0.33      0.42        12

    accuracy                           0.63        30
   macro avg       0.61      0.58      0.58        30
weighted avg       0.62      0.63      0.61        30



Unnamed: 0,date,earnings_per_share,total_assets,current_assets,total_liabilities,current_liabilities,revenue,net_income,net_profit_margin,debt_equity_ratio,current_ratio,price_to_book_ratio,book_value,price_to_earnings_ratio,relative_quarterly_return,target,y_pred,y_test_binary,y_pred_binary
168,2016-03-31,-0.358209,0.017909,0.038195,0.027153,-0.097265,-0.19172,-0.348921,-0.132353,0.023504,0.088608,0.001132,0.009547,0.670877,6.46239,10.243332,-5.036544,1,0
169,2016-06-30,-0.046512,-0.000922,0.035638,0.007955,0.057013,0.204185,-0.060773,-0.135593,-0.003132,0.001163,-0.054833,-0.009093,0.17613,10.243332,-10.198435,-6.918354,0,0
170,2016-09-30,0.536585,-0.009014,-0.062909,-0.019457,-0.028794,0.007953,0.558824,-0.117647,-0.002094,-0.001161,0.01256,0.000763,-0.394055,-10.198435,0.160092,-1.208652,1,0
171,2016-12-31,-0.349206,-0.026731,-0.035604,-0.036286,-0.04764,0.032009,-0.366038,-0.111111,-0.018888,0.010465,0.015357,-0.017965,0.589049,0.160092,-14.673955,-1.676216,0,0
172,2017-03-31,1.317073,0.042066,0.041409,0.025268,0.120408,0.06609,1.386905,0.175,0.006417,-0.01496,-0.069226,0.057188,-0.607868,-14.673955,-4.129435,-9.924975,0,0


In [1170]:
# Create a figure
fig = go.Figure()

# Add Target values as bars
fig.add_trace(go.Bar(
    x=test_df['date'],
    y=test_df['target'],
    name='Target',
    marker_color='blue'
))

# Add Predicted values as bars
fig.add_trace(go.Bar(
    x=test_df['date'],
    y=test_df['y_pred'],
    name='Predicted',
    marker_color='orange'
))

# Update layout
fig.update_layout(
    title='Comparison of Target vs Predicted Values',
    xaxis_title='Date',
    yaxis_title='Percentage Change',
    barmode='group',  # Group bars side by side
    xaxis=dict(tickformat='%Y-%m-%d', tickangle=-45),
    template='plotly_white',
)

# Show the plot
fig.show()
