
# Model Metrics Comparison


In [21]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, roc_auc_score, roc_curve, confusion_matrix

OUT_DIR = Path('../data/predictions')
DB_PATH = Path('../data/data.db')
MODELS_REG = ['linreg','ridge','rf','xgb']
MODELS_CLS = ['logit','rf','xgb']

def available_files(task: str, models: list[str], out_dir: Path) -> dict[str, dict[str, Path]]:
    found = {}
    for m in models:
        pred = out_dir / f'predictions_{task}_{m}.csv'
        met  = out_dir / f'metrics_{task}_{m}.csv'
        fi   = out_dir / f'feature_importances_{task}_{m}.csv'
        if pred.exists():
            found[m] = {'pred': pred if pred.exists() else None,
                        'met':  met  if met.exists() else None,
                        'fi':   fi   if fi.exists() else None}
    return found

def rmse(y_true, y_pred):
    return float(np.sqrt(mean_squared_error(y_true, y_pred)))


In [22]:

reg_files = available_files('regression', MODELS_REG, OUT_DIR)
cls_files = available_files('classification', MODELS_CLS, OUT_DIR)

print('Found regression models:', list(reg_files.keys()))
print('Found classification models:', list(cls_files.keys()))


Found regression models: ['ridge', 'rf', 'xgb']
Found classification models: ['logit', 'rf', 'xgb']


In [23]:

def summarize_regression_predictions(pred_df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
    """Return overall and per-ticker summary (MAE, RMSE, R2, Hit, IC)."""
    # Drop rows without y_true
    df = pred_df.dropna(subset=['y_true']).copy()
    # Overall
    overall = {
        'n_samples': len(df),
        'MAE': mean_absolute_error(df['y_true'], df['y_pred']),
        'RMSE': rmse(df['y_true'], df['y_pred']),
        'R2': r2_score(df['y_true'], df['y_pred']) if len(df) >= 2 and df['y_true'].var() > 0 else np.nan,
        'Hit': (np.sign(df['y_true']) == np.sign(df['y_pred'])).mean(),
        'IC':  np.corrcoef(df['y_true'], df['y_pred'])[0,1] if len(df) >= 2 else np.nan,
    }
    overall_df = pd.DataFrame([overall])

    # Per-ticker
    per = []
    for t, g in df.groupby('ticker'):
        per.append({
            'ticker': t,
            'n_samples': len(g),
            'MAE': mean_absolute_error(g['y_true'], g['y_pred']),
            'RMSE': rmse(g['y_true'], g['y_pred']),
            'R2': r2_score(g['y_true'], g['y_pred']) if len(g) >= 2 and g['y_true'].var() > 0 else np.nan,
            'Hit': (np.sign(g['y_true']) == np.sign(g['y_pred'])).mean(),
            'IC':  np.corrcoef(g['y_true'], g['y_pred'])[0,1] if len(g) >= 2 else np.nan,
        })
    per_df = pd.DataFrame(per).sort_values('RMSE')
    return overall_df, per_df


def summarize_classification_predictions(pred_df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
    """Return overall and per-ticker summary (Accuracy, F1, Precision, Recall, AUC if probs)."""
    cols_needed = ['y_pred_cls']
    has_prob = 'y_prob' in pred_df.columns
    if has_prob:
        cols_needed.append('y_prob')
    df = pred_df.dropna(subset=['y_true_cls'] + cols_needed).copy()

    # Overall
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
    overall = {
        'n_samples': len(df),
        'Accuracy': accuracy_score(df['y_true_cls'], df['y_pred_cls']),
        'Precision': precision_score(df['y_true_cls'], df['y_pred_cls'], zero_division=0),
        'Recall': recall_score(df['y_true_cls'], df['y_pred_cls'], zero_division=0),
        'F1': f1_score(df['y_true_cls'], df['y_pred_cls'], zero_division=0),
        'AUC': roc_auc_score(df['y_true_cls'], df['y_prob']) if has_prob and df['y_true_cls'].nunique()==2 else np.nan,
    }
    overall_df = pd.DataFrame([overall])

    # Per-ticker
    per = []
    for t, g in df.groupby('ticker'):
        d = {
            'ticker': t,
            'n_samples': len(g),
            'Accuracy': accuracy_score(g['y_true_cls'], g['y_pred_cls']),
            'Precision': precision_score(g['y_true_cls'], g['y_pred_cls'], zero_division=0),
            'Recall': recall_score(g['y_true_cls'], g['y_pred_cls'], zero_division=0),
            'F1': f1_score(g['y_true_cls'], g['y_pred_cls'], zero_division=0),
            'AUC': roc_auc_score(g['y_true_cls'], g['y_prob']) if has_prob and g['y_true_cls'].nunique()==2 else np.nan,
        }
        per.append(d)
    per_df = pd.DataFrame(per).sort_values('Accuracy', ascending=False)
    return overall_df, per_df


## Regression

In [24]:

for model, paths in reg_files.items():
    preds = pd.read_csv(paths['pred'], parse_dates=['date'])
    overall, per = summarize_regression_predictions(preds)

    print(f"\n=== Regression: {model} ===")
    display(overall)
    display(per.head(15))



=== Regression: ridge ===


Unnamed: 0,n_samples,MAE,RMSE,R2,Hit,IC
0,2088,0.028875,0.039983,-0.349794,0.489943,-0.020239


Unnamed: 0,ticker,n_samples,MAE,RMSE,R2,Hit,IC
3,XLI,232,0.0208,0.027004,-0.504343,0.431034,-0.156224
8,XLY,232,0.021769,0.029748,-0.393844,0.487069,-0.050855
4,XLK,232,0.023303,0.029778,-0.688686,0.482759,0.074501
7,XLV,232,0.024201,0.032126,-0.259401,0.551724,0.114902
0,XLB,232,0.027362,0.034744,-0.448569,0.418103,-0.12643
5,XLP,232,0.026195,0.034816,-0.38551,0.538793,0.016366
2,XLF,232,0.030752,0.042693,-0.469093,0.478448,-0.081374
6,XLU,232,0.037991,0.048482,-0.288699,0.50431,-0.023745
1,XLE,232,0.047503,0.065288,-0.256693,0.517241,-0.026108



=== Regression: rf ===


Unnamed: 0,n_samples,MAE,RMSE,R2,Hit,IC
0,2088,0.026472,0.03738,-0.179761,0.48659,-0.081807


Unnamed: 0,ticker,n_samples,MAE,RMSE,R2,Hit,IC
3,XLI,232,0.018363,0.024251,-0.213213,0.491379,-0.171727
4,XLK,232,0.019638,0.024798,-0.171081,0.469828,0.009401
8,XLY,232,0.019482,0.026934,-0.142629,0.517241,-0.090002
7,XLV,232,0.02325,0.02997,-0.096058,0.49569,0.009079
0,XLB,232,0.024024,0.031111,-0.161443,0.461207,-0.109605
5,XLP,232,0.024458,0.032387,-0.19894,0.508621,-0.1663
2,XLF,232,0.028007,0.040168,-0.300484,0.452586,-0.104732
6,XLU,232,0.035644,0.04528,-0.124081,0.49569,-0.095138
1,XLE,232,0.045378,0.063783,-0.199427,0.487069,-0.089734



=== Regression: xgb ===


Unnamed: 0,n_samples,MAE,RMSE,R2,Hit,IC
0,2088,0.029348,0.041287,-0.439267,0.493295,-0.088517


Unnamed: 0,ticker,n_samples,MAE,RMSE,R2,Hit,IC
3,XLI,232,0.020075,0.026591,-0.458599,0.508621,-0.129333
4,XLK,232,0.022945,0.029227,-0.626832,0.491379,-0.052321
8,XLY,232,0.021316,0.029765,-0.395507,0.474138,-0.188659
7,XLV,232,0.025134,0.032539,-0.292011,0.512931,-0.006806
0,XLB,232,0.025501,0.033498,-0.346536,0.5,-0.101471
5,XLP,232,0.026794,0.035428,-0.434641,0.49569,-0.147512
2,XLF,232,0.031671,0.044625,-0.605059,0.482759,-0.05668
6,XLU,232,0.039979,0.051709,-0.465945,0.508621,-0.185287
1,XLE,232,0.050714,0.069233,-0.413145,0.465517,-0.059179


## Classification 

In [25]:

for model, paths in cls_files.items():
    preds = pd.read_csv(paths['pred'], parse_dates=['date'])
    overall, per = summarize_classification_predictions(preds)

    print(f"\n=== Classification: {model} ===")
    display(overall)
    display(per.head(15))



=== Classification: logit ===


Unnamed: 0,n_samples,Accuracy,Precision,Recall,F1,AUC
0,2088,0.512452,0.504864,0.504864,0.504864,0.509856


Unnamed: 0,ticker,n_samples,Accuracy,Precision,Recall,F1,AUC
7,XLV,232,0.577586,0.561905,0.531532,0.546296,0.615293
4,XLK,232,0.530172,0.56,0.564516,0.562249,0.512395
5,XLP,232,0.521552,0.508621,0.522124,0.515284,0.504871
6,XLU,232,0.517241,0.515873,0.560345,0.53719,0.49948
2,XLF,232,0.512931,0.474747,0.435185,0.454106,0.494624
8,XLY,232,0.512931,0.546218,0.524194,0.534979,0.514785
0,XLB,232,0.491379,0.442308,0.433962,0.438095,0.446616
1,XLE,232,0.478448,0.429825,0.466667,0.447489,0.495013
3,XLI,232,0.469828,0.491667,0.487603,0.489627,0.47435



=== Classification: rf ===


Unnamed: 0,n_samples,Accuracy,Precision,Recall,F1,AUC
0,2088,0.501916,0.494071,0.486381,0.490196,0.504146


Unnamed: 0,ticker,n_samples,Accuracy,Precision,Recall,F1,AUC
8,XLY,232,0.551724,0.584746,0.556452,0.570248,0.561231
0,XLB,232,0.521552,0.475728,0.462264,0.4689,0.516846
7,XLV,232,0.521552,0.5,0.486486,0.493151,0.511876
5,XLP,232,0.508621,0.495495,0.486726,0.491071,0.511601
2,XLF,232,0.50431,0.463158,0.407407,0.433498,0.497872
4,XLK,232,0.487069,0.51938,0.540323,0.529644,0.45109
1,XLE,232,0.482759,0.428571,0.428571,0.428571,0.491339
3,XLI,232,0.469828,0.491379,0.471074,0.481013,0.473234
6,XLU,232,0.469828,0.472441,0.517241,0.493827,0.473283



=== Classification: xgb ===


Unnamed: 0,n_samples,Accuracy,Precision,Recall,F1,AUC
0,2088,0.488985,0.48134,0.4893,0.485287,0.490043


Unnamed: 0,ticker,n_samples,Accuracy,Precision,Recall,F1,AUC
7,XLV,232,0.543103,0.52381,0.495495,0.509259,0.535775
0,XLB,232,0.517241,0.471698,0.471698,0.471698,0.524783
8,XLY,232,0.512931,0.547009,0.516129,0.53112,0.545102
5,XLP,232,0.5,0.486726,0.486726,0.486726,0.480776
2,XLF,232,0.487069,0.443299,0.398148,0.419512,0.471774
1,XLE,232,0.478448,0.433333,0.495238,0.462222,0.474691
3,XLI,232,0.474138,0.496183,0.53719,0.515873,0.473904
4,XLK,232,0.456897,0.492647,0.540323,0.515385,0.455048
6,XLU,232,0.431034,0.433333,0.448276,0.440678,0.423008
