In [4]:
import importlib
import src.evaluation

importlib.reload(src.evaluation)

<module 'src.evaluation' from 'C:\\Users\\lovas\\Desktop\\Market_risk_prediction\\src\\evaluation.py'>

In [5]:
import pandas as pd
from joblib import dump, load
import optuna
import numpy as np
import sys
from pathlib import Path

# Add project root to sys.path
project_root = Path().resolve().parent # assumes notebook is in a subdirectory of the project root
sys.path.append(str(project_root))

from src.utils import best_threshold
from src.evaluation import give_metrics, log_experiment, log_trials_to_csv, eval_top_N

In [3]:
sp500 = pd.read_csv(project_root / "data/processed/sp500_processed.csv", index_col=0, parse_dates=True)

In [4]:
X_train, X_test = sp500.loc[:'2018'].drop('Stress', axis=1), sp500.loc['2019':].drop('Stress', axis=1)
y_train, y_test = sp500.loc[:'2018', 'Stress'], sp500.loc['2019':, 'Stress']

# So that we wont learn the specifics of the timeframe
# Makes it more 'structural'
normalized_features = ['Vol20/VIX', 'Vol5/VIX', 'Scaled_Lag_1d', 'Scaled_Lag_3d',
                       'Sharpe-like_10', 'Sharpe-like_5', 'log_returns_zscore' ,'VIX_zscore']

normalized_features += ['Momentum5_20', 'Momentum10_20', 'Momentum5_10', 'Momentum3_20', 'Momentum3_10', 'Momentum3_5'] # 5_20 is the original
normalized_features += ['Scaled_weighted_avg']

# Skew and Kurtosis seems to make things worse
# Scaled_Lag_5d also seemed bad
# Vol_momentum tends towards not being useful

# More generally seems like longer timeframe stats are better (except lagged returns)

# -- added ['Momentum5_20', 'Momentum10_20', 'Momentum5_10', 'Momentum3_20', 'Momentum3_10', 'Momentum3_5'] and df['Scaled_weighted_avg']

X_train = X_train[normalized_features]
X_test = X_test[normalized_features]

In [None]:
from sklearn.metrics import average_precision_score

def cross_val(model, tscv, X_train, y_train, alpha=0.8):
    scores = []

    for train_idx, val_idx in tscv.split(X_train):
        X_t, X_v = X_train.iloc[train_idx], X_train.iloc[val_idx]
        y_t, y_v = y_train.iloc[train_idx], y_train.iloc[val_idx]
        
        model.fit(X_t, y_t)
        y_pred = model.predict_proba(X_v)[:, 1]
        ap = average_precision_score(y_v, y_pred)
        scores.append(ap)
    
    if alpha is None:
        return np.mean(scores)
    
    weights = np.array([alpha**(len(scores) - i) for i in range(len(scores))])
    weighted_scores = np.array(scores) * weights

    return np.sum(weighted_scores) / np.sum(weights), scores

In [125]:
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.model_selection import TimeSeriesSplit

def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 200, 1500)
    max_depth = trial.suggest_int('max_depth', 3, 20)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 32)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 16)
    max_features = trial.suggest_categorical('max_features', ['sqrt', 'log2'])
    class_weight = trial.suggest_categorical('class_weight', ['balanced'])  # important for imbalanced data
    
    model = RandomForestClassifier(
                        n_estimators=n_estimators,
                        max_depth=max_depth,
                        min_samples_split=min_samples_split,
                        min_samples_leaf=min_samples_leaf,
                        max_features=max_features,
                        class_weight= class_weight,
                        random_state=42)
    
    tscv = TimeSeriesSplit(n_splits=5)

    score, folds = cross_val(model, tscv, X_train, y_train)
    
    # store folds scores for analysis
    trial.set_user_attr('folds', folds)

    return score

In [135]:
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.RandomSampler(seed=42))

[I 2025-10-14 14:31:28,657] A new study created in memory with name: no-name-c8aba445-2c6a-4da6-90ef-8281784381c6


In [136]:
study.optimize(objective, n_trials=50, n_jobs=-1)

[I 2025-10-14 14:31:52,062] Trial 1 finished with value: 0.10287233377276381 and parameters: {'n_estimators': 315, 'max_depth': 8, 'min_samples_split': 23, 'min_samples_leaf': 7, 'max_features': 'log2', 'class_weight': 'balanced'}. Best is trial 1 with value: 0.10287233377276381.
[I 2025-10-14 14:31:53,944] Trial 0 finished with value: 0.09676895840847881 and parameters: {'n_estimators': 346, 'max_depth': 16, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'class_weight': 'balanced'}. Best is trial 1 with value: 0.10287233377276381.
[I 2025-10-14 14:32:08,219] Trial 4 finished with value: 0.10827991028425966 and parameters: {'n_estimators': 285, 'max_depth': 14, 'min_samples_split': 3, 'min_samples_leaf': 14, 'max_features': 'sqrt', 'class_weight': 'balanced'}. Best is trial 4 with value: 0.10827991028425966.
[I 2025-10-14 14:32:32,786] Trial 2 finished with value: 0.09348988971991709 and parameters: {'n_estimators': 1181, 'max_depth': 4, 'min_samples_split': 4,

In [137]:
study_df = study.trials_dataframe().sort_values('value', ascending=False)

log_trials_to_csv(study_df, project_root / 'results/rf_trials.csv', append=True)

In [142]:
study_df.sort_values('value', ascending=False).iloc[:5]

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_class_weight,params_max_depth,params_max_features,params_min_samples_leaf,params_min_samples_split,params_n_estimators,user_attrs_folds,state
49,49,0.113769,2025-10-14 14:40:07.986801,2025-10-14 14:40:27.986088,0 days 00:00:19.999287,balanced,17,log2,15,8,486,"[0.08390102181772537, 0.019531967649075197, 0....",COMPLETE
40,40,0.112007,2025-10-14 14:38:31.336594,2025-10-14 14:39:00.497213,0 days 00:00:29.160619,balanced,9,log2,12,18,577,"[0.0776711804764583, 0.018117965917003334, 0.0...",COMPLETE
3,3,0.111416,2025-10-14 14:31:30.785334,2025-10-14 14:32:49.036367,0 days 00:01:18.251033,balanced,12,sqrt,15,23,1405,"[0.0823846989589842, 0.020311666986543832, 0.0...",COMPLETE
31,31,0.111227,2025-10-14 14:37:13.256990,2025-10-14 14:37:33.730385,0 days 00:00:20.473395,balanced,18,log2,12,12,397,"[0.08054277741794477, 0.018384075613374976, 0....",COMPLETE
8,8,0.109323,2025-10-14 14:32:46.660133,2025-10-14 14:33:51.453266,0 days 00:01:04.793133,balanced,11,log2,16,24,1243,"[0.08404125687006875, 0.019052967855331768, 0....",COMPLETE


In [184]:
name = 'RF'

In [185]:
results, models, metrics_df = eval_top_N(study_df, X_train, y_train, X_test, y_test, name=name)

0.38999999999999985
0.3549999999999998
0.33999999999999986
0.34499999999999986
0.36999999999999983


In [176]:
metrics_df

Unnamed: 0,Accuracy,Precision,Recall,F1 Score,ROC AUC,Average Precision
RF_1,0.857143,0.179104,0.413793,0.25,0.746897,0.173391
RF_2,0.84127,0.168831,0.448276,0.245283,0.744229,0.172848
RF_3,0.814484,0.15873,0.517241,0.242915,0.745808,0.16561
RF_4,0.844246,0.167785,0.431034,0.241546,0.736661,0.172496
RF_5,0.826389,0.16185,0.482759,0.242424,0.745644,0.162943


In [183]:
for name in models:
        log_experiment(model_name=name,
               params=results[name]['params'],
               cv_scheme="expanding_window_5fold_alpha0.8",
               metrics={'folds': results[name]['folds'],
                        'mean_weighted_ap': results[name]['value'],
                        "model_scores": metrics_df.loc[name].to_dict()},
                features=normalized_features,
                artifacts={
                        'model_file': f'results/models/{name}_01.joblib',
                        })