In [25]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV
import pandas as pd
import numpy as np

In [26]:
df_cleaned = pd.read_csv("../Data/CleanedData.csv")

In [27]:
df_cleaned = df_cleaned.drop(columns=["Unnamed: 0"])
df_cleaned

Unnamed: 0,CPI,Unemployment Rate,Fed Funds Rate,M2 Money Supply,day_diff
0,169.300,4.0,5.45,4667.6,-1.792515
1,169.300,4.0,5.45,4667.6,-2.409937
2,169.300,4.0,5.45,4667.6,0.039834
3,169.300,4.0,5.45,4667.6,-1.195010
4,169.300,4.0,5.45,4667.6,3.465529
...,...,...,...,...,...
6304,317.685,4.1,4.48,21533.8,4.559998
6305,317.685,4.1,4.48,21533.8,3.900024
6306,317.685,4.1,4.48,21533.8,-1.909973
6307,317.685,4.1,4.48,21533.8,1.079956


In [28]:
X = df_cleaned[['CPI', 'Unemployment Rate',
                'Fed Funds Rate', 'M2 Money Supply']]
y = df_cleaned['day_diff']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
y_scaled = scaler.fit_transform(y.values.reshape(-1, 1)).flatten()

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42)

In [29]:
model = RandomForestRegressor(random_state=42)

In [30]:
param_grid = {
    'n_estimators': [100, 200, 500, 1000],
    'max_depth': [None, 10, 20, 50, 100],
    'min_samples_split': [2, 5, 10, 20],
    'min_samples_leaf': [1, 2, 4, 10],
    'max_features': ['auto', 'sqrt', 'log2'],
    'bootstrap': [True, False]
}

grid_search = GridSearchCV(
    estimator=model, param_grid=param_grid, cv=3, n_jobs=-1)
grid_search.fit(X_train, y_train)
print(f'Best Parameters for Random Forest: {grid_search.best_params_}')
best_model = grid_search.best_estimator_

1920 fits failed out of a total of 5760.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1323 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/andrew/Desktop/Projects/MarketPredictor/env/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/andrew/Desktop/Projects/MarketPredictor/env/lib/python3.10/site-packages/sklearn/base.py", line 1382, in wrapper
    estimator._validate_params()
  File "/Users/andrew/Desktop/Projects/MarketPredictor/env/lib/python3.10/site-packages/sklearn/base.py", line 436, in _validate_params
    validate_parameter_constraints(
  File "/Users/andrew/Desktop/Projects/M

Best Parameters for Random Forest: {'bootstrap': True, 'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 10, 'min_samples_split': 2, 'n_estimators': 200}


In [31]:
y_pred = best_model.predict(X_test)
print(y_pred[:5])
print(y_test[:5])

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f'Mean Absolute Error (MAE): {mae}')
print(f'Mean Squared Error (MSE): {mse}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'R-squared (R²): {r2}')

[-0.05122506 -0.09666397 -0.00313108 -0.01027348 -1.16347826]
2264    0.163220
4520   -1.169741
432     0.650042
1612   -0.175300
5642   -2.870392
Name: day_diff, dtype: float64
Mean Absolute Error (MAE): 1.0901067037650194
Mean Squared Error (MSE): 3.327064028562732
Root Mean Squared Error (RMSE): 1.8240241304770977
R-squared (R²): -0.10436035944484878
