In [103]:
import min_features, daily_return
import importlib
import pandas as pd
import numpy as np
from sklearn.base import clone
from sklearn.metrics import (
    balanced_accuracy_score,
    accuracy_score,
    f1_score,
    matthews_corrcoef,
    precision_score,
    recall_score,
)
from sklearn.inspection import permutation_importance
import warnings
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
warnings.filterwarnings("ignore", message="y_pred contains classes not in y_true")
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=UserWarning)
import time

importlib.reload(min_features)
importlib.reload(daily_return)

min_feats = "N"
returns = [1, 2, 3, 5, 10, 20, 30]

if min_feats != 'N':
    df_min = min_features.min_features()
    df_daily, feature_sets = daily_return.pull_daily('QQQ', returns) 

    df_main = pd.merge(df_min, df_daily, how='inner', on='Date')
    df_main = df_main.sort_values(by='Date', ascending=False)

    return_cols = df_main.columns[df_main.columns.str.contains("Return_")].to_list()
    daily_cols = [
        c for c in df_daily.iloc[:, 1:].columns
        if "return" not in c.lower()
    ]
    close_cols = df_min.columns[(df_min.columns.str.contains("close_")) | (df_min.columns.str.contains("post_")) | (df_min.columns.str.contains("overnight_"))].to_list()
    min_cols = (
        df_min
        .loc[:, ~df_min.columns.isin(close_cols)]  # drop close_ columns
        .iloc[:, 1:]                               # drop first column
        .columns
        .to_list()
    )
else:
    df_daily, feature_sets = daily_return.pull_daily('QQQ', returns) 
    return_cols = df_daily.columns[df_daily.columns.str.contains("Return_")].to_list()
    daily_cols = [
        c for c in df_daily.iloc[:, 1:].columns
        if "return" not in c.lower()
    ]
    df_main = df_daily[df_daily['Date'] <= '2026-01-21'].copy()

#top_models = pd.read_csv("top_performers2.csv")
print(f'Available Feature Sets: {feature_sets.keys()}')

Available Feature Sets: dict_keys(['ma', 'rsi', 'macd', 'volume', 'atr_adx', 'volatility', 'vix_skew', 'experimental_slope'])


In [104]:
# signed log1p transform for every column in ma_num
cols = ma_num

df_main[[f"{c}" for c in cols]] = (
    np.sign(df_main[cols]) * np.log1p(np.abs(df_main[cols]))
)

df_main[ma_num].head(25)

Unnamed: 0,num_days_50,num_days_100,num_days_200,Max_10_Rows_Since,Min_10_Rows_Since,Max_30_Rows_Since,Min_30_Rows_Since,Max_60_Rows_Since,Min_60_Rows_Since,Max_120_Rows_Since,Min_120_Rows_Since,Max_240_Rows_Since,Min_240_Rows_Since
6758,0.693147,3.713572,5.164786,1.386294,0.693147,1.386294,3.135494,4.043051,3.688879,4.043051,4.779123,4.043051,5.293305
6757,0.0,3.688879,5.159055,1.098612,0.0,1.098612,3.091042,4.025352,3.663562,4.025352,4.770685,4.025352,5.288267
6756,2.302585,3.663562,5.153292,0.693147,1.098612,0.693147,3.044522,4.007333,3.637586,4.007333,4.762174,4.007333,5.283204
6755,2.197225,3.637586,5.147494,0.0,2.302585,0.0,2.995732,3.988984,3.610918,3.988984,4.75359,3.988984,5.278115
6754,2.079442,3.610918,5.141664,0.693147,2.197225,0.693147,2.944439,3.970292,3.583519,3.970292,4.744932,3.970292,5.273
6753,1.94591,3.583519,5.135798,0.0,2.079442,0.0,2.890372,3.951244,3.555348,3.951244,4.736198,3.951244,5.267858
6752,1.791759,3.555348,5.129899,0.0,1.94591,0.0,2.833213,3.931826,3.526361,3.931826,4.727388,3.931826,5.26269
6751,1.609438,3.526361,5.123964,1.098612,1.791759,3.044522,2.772589,3.912023,3.496508,3.912023,4.718499,3.912023,5.257495
6750,1.386294,3.496508,5.117994,0.693147,1.609438,2.995732,3.401197,3.89182,3.465736,3.89182,4.70953,3.89182,5.252273
6749,1.098612,3.465736,5.111988,0.0,1.386294,2.944439,3.401197,3.871201,3.433987,3.871201,4.70048,3.871201,5.247024


In [None]:
# -----------------------------
# Feature Sets
# -----------------------------
ma_all_cols = feature_sets['ma']
ma_lag = [c for c in ma_all_cols if "lag" in c.lower()]
ma_rel = [c for c in ma_all_cols if "rel_" in c.lower()]
ma_sma = [c for c in ma_all_cols if ("sma_" in c.lower()) and ("lag" not in c.lower())]
ma_num = [c for c in ma_all_cols if ("num" in c.lower()) or ("since" in c.lower())]
rsi_cols = feature_sets['rsi']
macd_cols = feature_sets['macd']
volu_cols = feature_sets['volume']
atr_adx_cols = feature_sets['atr_adx']
vola_cols = feature_sets['volatility']
vix_skew_cols = feature_sets['vix_skew']
experimental_slope_cols = feature_sets['experimental_slope']

sets = [ma_lag, ma_rel, ma_sma, ma_num, rsi_cols + macd_cols, volu_cols, atr_adx_cols + vola_cols, vix_skew_cols, experimental_slope_cols]
set_names = ["ma_lag", "ma_rel", "ma_sma", "ma_num", "rsi_macd", "volu", "atr_adx" + "vola", "vix_skew", "experimental_slope"]

# -----------------------------
# Models
# -----------------------------
models = {
    #"xgboost-4": XGBClassifier(n_estimators=400, random_state=42, n_jobs=-1),
    #"xgboost-6": XGBClassifier(n_estimators=600, random_state=42, n_jobs=-1),
    "xgb_first_pass": XGBClassifier(
    n_estimators=800,
    learning_rate=0.02,
    max_depth=5,
    min_child_weight=10,
    subsample=0.7,
    colsample_bytree=0.7,
    gamma=0.3,
    reg_alpha=0.5,
    reg_lambda=15,
    tree_method="hist",
    random_state=42,
    n_jobs=-1)
}

models = {
    #"xgboost-4": XGBClassifier(n_estimators=400, random_state=42, n_jobs=-1),
    "xgboost-6": XGBClassifier(n_estimators=600, random_state=42, n_jobs=-1),
}

# -----------------------------
# Helpers
# -----------------------------
def _compute_dist(y):
    """Distribution stats for y in {0,1}."""
    n = int(len(y))
    n_pos = int((y == 1).sum())
    n_neg = int((y == 0).sum())
    return {
        "test_n": n,
        "test_pos_n": n_pos,
        "test_neg_n": n_neg,
        "test_pos_frac": (n_pos / n) if n else np.nan,
        "test_neg_frac": (n_neg / n) if n else np.nan,
    }

def walkback_runs(
    df,
    feature_cols,
    target_col,
    *,
    date_col="Date",
    train_years=6,
    test_days=5,
    step_days=5,
    runs=20,
    horizon_days=1,        # r (used for purge)
    purge_days=None,       # defaults to horizon_days
    fill_inf=0.0,
):
    """
    Deployment-aligned evaluation:
      - For each run, take a 5-day OOT test window stepping back by 5 days.
      - Train on the prior N years (fixed-length window) ending right before test.
      - Purge 'purge_days' from the end of train to avoid overlap leakage for forward-return labels.
      - Score ONLY on the OOT test window (distribution + metrics).
    Returns: long DataFrame with one row per (feature_set/run/model).
    """
    dfw = df.sort_values("Date").reset_index(drop=True).copy()

    # Drop any accidental return cols from features (belt+suspenders)
    safe_feature_cols = [c for c in feature_cols if "Return" not in c]

    # Basic numeric cleaning
    dfw[safe_feature_cols] = dfw[safe_feature_cols].replace([np.inf, -np.inf], fill_inf)

    n = len(dfw)
    train_size = 245 * int(train_years)
    test_size = int(test_days)
    step = int(step_days)
    purge = int(purge_days) if purge_days is not None else 0 #int(horizon_days)

    X_all = dfw[safe_feature_cols].to_numpy()
    #y_all = _to_binary(dfw[target_col].to_numpy())
    y_all = dfw[target_col].to_numpy()
    dates = dfw[date_col].to_numpy() if date_col in dfw.columns else None

    rows = []

    for k in range(runs):
        test_end = n - k * step
        test_start = test_end - test_size
        if test_start < 0:
            break

        train_end = test_start - purge
        train_start = train_end - train_size
        if train_start < 0 or train_end <= train_start:
            break

        print(
            f"Run {k+1}/{runs} | "
            f"Train: {dates[train_start]} → {dates[train_end-1]} | "
            f"Test: {dates[test_start]} → {dates[test_end-1]} | "
            f"Train_n={train_end-train_start} | Test_n={test_end-test_start}"
        )

        X_train = X_all[train_start:train_end]
        y_train = y_all[train_start:train_end]
        X_test  = X_all[test_start:test_end]
        y_test  = y_all[test_start:test_end]

        dist = _compute_dist(y_test)
        single_class_test = (np.unique(y_test).size < 2)

        for model_name, model in models.items():
            #start_time = time.time()
            m = clone(model)
            m.fit(X_train, y_train)

            preds = m.predict(X_test)
            proba = np.nan
            if hasattr(m, "predict_proba"):
                proba = float(m.predict_proba(X_test)[0, 1])   # prob(class=1)
            elif hasattr(m, "decision_function"):
                s = float(m.decision_function(X_test)[0])
                proba = float(1.0 / (1.0 + np.exp(-s)))        # squash to (0,1)
            proba = np.nan if np.isnan(proba) else round(round(proba / 0.05) * 0.05, 2)



            rows.append({
                "run": k + 1,
                "model": model_name,
                "test_days": test_days,
                "pred": round(proba,2),

                # core metrics
                #"bal_acc": float(balanced_accuracy_score(y_test, preds)),
                "acc": float(accuracy_score(y_test, preds)),
                #"sign_acc": 2 * float(accuracy_score(y_test, preds)) - 1,
                #"mcc": float(matthews_corrcoef(y_test, preds)),

                # only meaningful if test has both classes
                #"f1": np.nan if single_class_test else float(f1_score(y_test, preds, zero_division=0)),
                #"precision": np.nan if single_class_test else float(precision_score(y_test, preds, zero_division=0)),
                #"recall": np.nan if single_class_test else float(recall_score(y_test, preds, zero_division=0)),

                **dist,

                "train_n": int(len(y_train)),
                "train_start": dates[train_start] if dates is not None else train_start,
                "train_end": dates[train_end - 1] if dates is not None else train_end - 1,
                "test_start": dates[test_start] if dates is not None else test_start,
                "test_end": dates[test_end - 1] if dates is not None else test_end - 1,
                "train_years": train_years,
                "horizon_days": horizon_days,
                "n_features": len(safe_feature_cols),
            })

            #total_time = time.time() - start_time
            #print(f"{model_name} - {total_time}")
    return pd.DataFrame(rows)

def perm_list(
    df,
    feature_cols,
    target_col,
    *,
    date_col="Date",
    train_years=6,
    test_days=5,
    step_days=5,
    purge_days=None, 
    fill_inf=0.0,
    k=1
):

    dfw = df.sort_values("Date").reset_index(drop=True).copy()

    # Drop any accidental return cols from features (belt+suspenders)
    safe_feature_cols = [c for c in feature_cols if "Return" not in c]

    # Basic numeric cleaning
    dfw[safe_feature_cols] = dfw[safe_feature_cols].replace([np.inf, -np.inf], fill_inf)

    n = len(dfw)
    train_size = 245 * int(train_years)
    test_size = int(test_days)
    step = int(step_days)
    purge = int(purge_days) if purge_days is not None else 0 #int(horizon_days)

    X_all = dfw[safe_feature_cols].to_numpy()
    #y_all = _to_binary(dfw[target_col].to_numpy())
    y_all = dfw[target_col].to_numpy()
    dates = dfw[date_col].to_numpy() if date_col in dfw.columns else None

    test_end = n - k * step
    test_start = test_end - test_size
    train_end = test_start - purge
    train_start = train_end - train_size

    X_train = X_all[train_start:train_end]
    y_train = y_all[train_start:train_end]

    N_PI = int(len(X_train) * 0.75)
    X_pi = X_train[-N_PI:]
    y_pi = y_train[-N_PI:]

    # fit model
    m = clone(model).fit(X_train, y_train)

    # permutation importance on training-only slice
    pi = permutation_importance(
        m,
        X_pi,
        y_pi,
        scoring="neg_log_loss",   # or "accuracy", "neg_log_loss", etc.
        n_repeats=50,
        random_state=42,
        n_jobs=-1,
    )

    # pi.importances_mean aligns to feature_cols order
    pi_df = pd.DataFrame({
        "feature": feature_cols,                 # same order used to build X_train
        "pi_mean": pi.importances_mean,
        "pi_std":  pi.importances_std,
    }).sort_values("pi_mean", ascending=False)

    # keep only features with PI > 0
    pi_cols = pi_df['feature'][pi_df['pi_mean'] > .003].to_list()
    print(f"Ran permutation importance | Len: {N_PI} | Old: {len(feature_cols)} | New: {len(pi_cols)}")
    
    return pi_cols

# -----------------------------
# Run grid (feature sets x horizon x train_years, etc.)
# -----------------------------
returns = [2, 5, 10, 20, 30]
#train_years_grid = [4, 5, 6]#[3, 5, 7] 
days_assessed = 230
test_days = [1]
#results= []
#results_df = pd.DataFrame()

for test_day in test_days:

    runs = int(days_assessed / test_day)

    for r in returns:

        #fs_map = horizon_feature_cols[r]   # dict: feature_set_name

        #for list_name in fs_map.keys():

        #cols = feature_lists[list_name] + new_feats
        if r == 2:
            base_cols = experimental_slope_cols + ma_lag + ma_sma
            list_name = "initial+rsi+macd" #worse
            list_name = "initial+sma" #worse
            train_years = 4
            #cols = experimental_slope_cols + ma_lag + rsi_cols + macd_cols + volu_cols
        elif r == 5:
            base_cols = experimental_slope_cols + ma_lag + ma_num + rsi_cols + macd_cols + volu_cols + vix_skew_cols
            list_name = "initial+atradx" #worse
            list_name = "initial+sma" #worse
            list_name = "initial+vixskew" #much worse
            train_years = 5
            #cols = atr_adx_cols + vola_cols + experimental_slope_cols + ma_lag + ma_num + ma_rel + ma_sma + rsi_cols + macd_cols + volu_cols + vix_skew_cols
        elif r == 10:
            base_cols = atr_adx_cols + vola_cols + ma_num + volu_cols + ma_sma + ma_lag
            list_name = "initial+sma" # better
            list_name = "initial+sma+lag" #worse
            train_years = 5
            #cols = atr_adx_cols + vola_cols + ma_num + ma_rel + ma_sma + volu_cols + vix_skew_cols
        elif r == 20:
            base_cols = atr_adx_cols + vola_cols + ma_num + ma_sma + ma_lag
            list_name = "initial+volu" #worse
            list_name = "initial+lag" #much worse
            train_years = 6
        else:
            base_cols = atr_adx_cols + vola_cols + ma_num + ma_sma + volu_cols #rsi_cols + macd_cols + 
            list_name = "initial+volu" #worse
            list_name = "initial-rsimacd+volu" #better
            train_years = 5

        target_col = f"Return_{r}"
        # Trime unknown (recent) outcomes
        df_final = df_main.iloc[r:].copy()

        perm_cols = perm_list(
            df=df_final,
            feature_cols=base_cols,
            target_col=target_col,
            date_col="Date",
            train_years=train_years,
            test_days=test_day,
            step_days=test_day,
            purge_days=r, 
            fill_inf=0.0,
        )

        #for train_years in train_years_grid:
        df_scores = walkback_runs(
            df=df_final,
            feature_cols=perm_cols,
            target_col=target_col,
            date_col="Date",
            train_years=train_years,
            test_days=test_day,
            step_days=test_day,
            runs=runs,
            horizon_days=r,
            purge_days=r, 
            fill_inf=0.0,
        )

        df_scores["feature_set"] = "pi_top_model" #list_name
        df_scores["horizon"] = r

        results.append(df_scores)

results_df = pd.concat(results, ignore_index=True)

Ran permutation importance | Len: 735 | Old: 86 | New: 20
Run 1/230 | Train: 2022-02-16 → 2026-01-13 | Test: 2026-01-16 → 2026-01-16 | Train_n=980 | Test_n=1
Run 2/230 | Train: 2022-02-15 → 2026-01-12 | Test: 2026-01-15 → 2026-01-15 | Train_n=980 | Test_n=1
Run 3/230 | Train: 2022-02-14 → 2026-01-09 | Test: 2026-01-14 → 2026-01-14 | Train_n=980 | Test_n=1
Run 4/230 | Train: 2022-02-11 → 2026-01-08 | Test: 2026-01-13 → 2026-01-13 | Train_n=980 | Test_n=1
Run 5/230 | Train: 2022-02-10 → 2026-01-07 | Test: 2026-01-12 → 2026-01-12 | Train_n=980 | Test_n=1
Run 6/230 | Train: 2022-02-09 → 2026-01-06 | Test: 2026-01-09 → 2026-01-09 | Train_n=980 | Test_n=1
Run 7/230 | Train: 2022-02-08 → 2026-01-05 | Test: 2026-01-08 → 2026-01-08 | Train_n=980 | Test_n=1
Run 8/230 | Train: 2022-02-07 → 2026-01-02 | Test: 2026-01-07 → 2026-01-07 | Train_n=980 | Test_n=1
Run 9/230 | Train: 2022-02-04 → 2025-12-31 | Test: 2026-01-06 → 2026-01-06 | Train_n=980 | Test_n=1
Run 10/230 | Train: 2022-02-03 → 2025-12-3

In [None]:
results_df = pd.concat(results, ignore_index=True)
df_new = results_df[cols].copy()
df_new.to_csv('baseline_performance.csv', index=False)

In [None]:
df = pd.read_csv("baseline_performance.csv")
cols = ['model', 'test_days', 'pred', 'acc', 'test_n', 'test_pos_n', 'train_n', 'test_start', 'test_end', 'train_years', 'feature_set', 'horizon']
df_new = results_df[cols].copy()
df_concat = pd.concat([df2, df_new], ignore_index=True)
df_concat.to_csv('baseline_performance.csv', index=False)