In [228]:
import min_features, daily_return
import importlib
import pandas as pd
import numpy as np
from sklearn.base import clone
from sklearn.metrics import (
    balanced_accuracy_score,
    accuracy_score,
    f1_score,
    matthews_corrcoef,
    precision_score,
    recall_score,
)
from sklearn.inspection import permutation_importance
import warnings
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
warnings.filterwarnings("ignore", message="y_pred contains classes not in y_true")
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=UserWarning)
import time

importlib.reload(min_features)
importlib.reload(daily_return)

min_feats = "N"
returns = [1, 2, 3, 5, 10, 20, 30]

if min_feats != 'N':
    df_min = min_features.min_features()
    df_daily, feature_sets = daily_return.pull_daily('QQQ', returns) 

    df_main = pd.merge(df_min, df_daily, how='inner', on='Date')
    df_main = df_main.sort_values(by='Date', ascending=False)

    return_cols = df_main.columns[df_main.columns.str.contains("Return_")].to_list()
    daily_cols = [
        c for c in df_daily.iloc[:, 1:].columns
        if "return" not in c.lower()
    ]
    close_cols = df_min.columns[(df_min.columns.str.contains("close_")) | (df_min.columns.str.contains("post_")) | (df_min.columns.str.contains("overnight_"))].to_list()
    min_cols = (
        df_min
        .loc[:, ~df_min.columns.isin(close_cols)]  # drop close_ columns
        .iloc[:, 1:]                               # drop first column
        .columns
        .to_list()
    )
else:
    df_daily, feature_sets = daily_return.pull_daily('QQQ', returns) 
    return_cols = df_daily.columns[df_daily.columns.str.contains("Return_")].to_list()
    past_return_cols = df_daily.columns[df_daily.columns.str.contains("Past_Ret")].to_list()
    daily_cols = [
        c for c in df_daily.iloc[:, 1:].columns
        if "return" not in c.lower()
    ]
    df_main = df_daily[df_daily['Date'] <= '2026-01-21'].copy()

#top_models = pd.read_csv("top_performers2.csv")
print(f'Available Feature Sets: {feature_sets.keys()}')

Available Feature Sets: dict_keys(['ma', 'rsi', 'macd', 'volume', 'atr_adx', 'volatility', 'vix_skew', 'experimental_slope'])


In [None]:
def new_features(df): 
    
    # Percent return over 1, 3, 5, 10 days?
    # Win rate over 10, 20, 30, 50 days?
    # Num days pos over th
    # Num days neg over th

    df = df.sort_values(by='Date', ascending=True)

    # =======================
    # Basic SMAs and Ratios
    # =======================
    sma_windows = [10, 25, 50, 100, 200]
    for sma_window in sma_windows:
        
        df[f'SMA_{sma_window}'] = df['Close'].rolling(window=sma_window).mean()

        # Current close relativet to n_day high | max 1
        df[f'Close_Rel_Max{sma_window}'] = (df['Close'] / df['High'].rolling(window=sma_window).max()).round(2)
        # Current close relativet to n_day low | min 1
        df[f'Close_Rel_Min{sma_window}'] = (df['Close'] / df['Low'].rolling(window=sma_window).min()).round(2)

    lag_periods = [10, 25, 50, 100, 150, 200]
    for sma_window in sma_windows:
        new_cols = {}
        for col in df.columns:
            if col == f'SMA_{sma_window}':
                for lag in lag_periods:
                        new_cols[f'{col}_Lag{lag}_min'] = (df[col] / df[col].rolling(window=lag).min()).round(2)
                        new_cols[f'{col}_Lag{lag}_max'] = (df[col] / df[col].rolling(window=lag).max()).round(2)

        df = pd.concat([df, pd.DataFrame(new_cols, index=df.index)], axis=1)
    
    for window in [50, 100, 200]:
        df[f'num_days_{window}'] = 0
        for i in range(1, len(df)):
            prev = df.loc[i - 1, f'num_days_{window}']
            price = df.loc[i, 'Close']
            sma = df.loc[i, f'SMA_{window}']
            if price > sma:
                df.loc[i, f'num_days_{window}'] = prev + 1 if prev >= 0 else 0
            elif price < sma:
                df.loc[i, f'num_days_{window}'] = prev - 1 if prev <= 0 else 0
            else:
                df.loc[i, f'num_days_{window}'] = 0

    # ============================
    # Relative Position Features
    # ============================
    def rows_since_max(x): return len(x) - x.argmax() - 1
    def rows_since_min(x): return len(x) - x.argmin() - 1

    for window in [10, 30, 60, 120, 240]:

        df[f'Rel_Max_{window}'] = (df['High'] / df['High'].rolling(window=window).max()).round(2)
        df[f'Rel_Min_{window}'] = (df['Low'] / df['Low'].rolling(window=window).min()).round(2)
        df[f'Max_{window}_Rows_Since'] = df['High'].rolling(window=window).apply(rows_since_max, raw=True)
        df[f'Min_{window}_Rows_Since'] = df['Low'].rolling(window=window).apply(rows_since_min, raw=True)

    for a, b in [(50, 100), (50, 200), (100, 200), (10, 25), (10, 50), (10, 100), (10, 200), (25, 50), (25, 100), (25, 200)]:    
        df[f'{a}_SMA_{b}'] = (df[f'SMA_{a}'] / df[f'SMA_{b}']).round(2)

    for window in sma_windows:

        df[f'SMA_{window}'] = (df['Close'] / df[f'SMA_{window}']).round(2)
        #df[f'EMA_{window}'] = (df['Close'] / df[f'EMA_{window}']).round(2)

    return df

In [243]:
df_main[past_return_cols]

Unnamed: 0,Past_Return_1,Past_Return%_1,Past_Return_2,Past_Return%_2,Past_Return_3,Past_Return%_3,Past_Return_5,Past_Return%_5,Past_Return_10,Past_Return%_10,Past_Return_20,Past_Return%_20,Past_Return_30,Past_Return%_30
6758,1,0.013338,0,-0.008081,0,-0.008925,0,-0.016161,0,-0.011586,1,0.000039,0,-0.013622
6757,0,-0.021708,0,-0.022564,0,-0.018896,0,-0.031428,0,-0.016331,0,-0.000438,0,-0.023153
6756,0,-0.000837,1,0.002753,0,-0.008016,0,-0.008676,1,0.013102,1,0.034804,0,-0.002346
6755,1,0.003587,0,-0.007173,0,-0.008669,1,0.002107,1,0.012014,1,0.017397,1,0.000933
6754,0,-0.010798,0,-0.012299,0,-0.011460,0,-0.007215,1,0.000194,1,0.015811,1,0.005123
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,1,0.008423,1,0.036101,1,0.012034,0,,0,,0,,0,
3,1,0.027913,1,0.003641,1,0.008496,0,,0,,0,,0,
2,0,-0.024969,0,-0.019975,0,,0,,0,,0,,0,
1,1,0.004872,0,,0,,0,,0,,0,,0,


In [239]:
df_main[ma_lag]

Unnamed: 0,SMA_10_Lag10_min,SMA_10_Lag10_max,SMA_10_Lag25_min,SMA_10_Lag25_max,SMA_10_Lag50_min,SMA_10_Lag50_max,SMA_10_Lag100_min,SMA_10_Lag100_max,SMA_10_Lag150_min,SMA_10_Lag150_max,...,SMA_200_Lag25_min,SMA_200_Lag25_max,SMA_200_Lag50_min,SMA_200_Lag50_max,SMA_200_Lag100_min,SMA_200_Lag100_max,SMA_200_Lag150_min,SMA_200_Lag150_max,SMA_200_Lag200_min,SMA_200_Lag200_max
6758,1.00,1.0,1.01,1.0,1.03,1.00,1.09,0.99,1.18,0.99,...,1.03,1.0,1.05,1.0,1.10,1.0,1.14,1.0,1.16,1.0
6757,1.00,1.0,1.01,1.0,1.03,1.00,1.09,1.00,1.18,1.00,...,1.03,1.0,1.05,1.0,1.10,1.0,1.14,1.0,1.16,1.0
6756,1.01,1.0,1.01,1.0,1.04,1.00,1.10,1.00,1.18,1.00,...,1.03,1.0,1.05,1.0,1.10,1.0,1.14,1.0,1.16,1.0
6755,1.01,1.0,1.01,1.0,1.03,1.00,1.09,1.00,1.18,1.00,...,1.03,1.0,1.05,1.0,1.09,1.0,1.14,1.0,1.15,1.0
6754,1.01,1.0,1.01,1.0,1.03,0.99,1.09,0.99,1.18,0.99,...,1.03,1.0,1.05,1.0,1.09,1.0,1.14,1.0,1.15,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,


In [247]:
# -----------------------------
# Feature Sets
# -----------------------------
ma_all_cols = feature_sets['ma']
ma_lag = [c for c in ma_all_cols if "lag" in c.lower()]
ma_rel = [c for c in ma_all_cols if "rel_" in c.lower()]
ma_sma = [c for c in ma_all_cols if ("sma_" in c.lower()) and ("lag" not in c.lower())]
ma_num = [c for c in ma_all_cols if ("num" in c.lower()) or ("since" in c.lower())]
rsi_cols = feature_sets['rsi']
macd_cols = feature_sets['macd']
volu_cols = feature_sets['volume']
atr_adx_cols = feature_sets['atr_adx']
vola_cols = feature_sets['volatility']
vix_skew_cols = feature_sets['vix_skew']
experimental_slope_cols = feature_sets['experimental_slope']

sets = [ma_lag, ma_rel, ma_sma, ma_num, rsi_cols + macd_cols, volu_cols, atr_adx_cols + vola_cols, vix_skew_cols, experimental_slope_cols]
set_names = ["ma_lag", "ma_rel", "ma_sma", "ma_num", "rsi_macd", "volu", "atr_adx" + "vola", "vix_skew", "experimental_slope"]

# -----------------------------
# Models
# -----------------------------
models = {
    #"xgboost-4": XGBClassifier(n_estimators=400, random_state=42, n_jobs=-1),
    #"xgboost-6": XGBClassifier(n_estimators=600, random_state=42, n_jobs=-1),
    "xgb_first_pass": XGBClassifier(
    n_estimators=800,
    learning_rate=0.02,
    max_depth=5,
    min_child_weight=10,
    subsample=0.7,
    colsample_bytree=0.7,
    gamma=0.3,
    reg_alpha=0.5,
    reg_lambda=15,
    tree_method="hist",
    random_state=42,
    n_jobs=-1)
}

models = {
    "xgboost-3": XGBClassifier(n_estimators=300, random_state=42, n_jobs=-1),
    "xgboost-5": XGBClassifier(n_estimators=500, random_state=42, n_jobs=-1),
    #"xgboost-6": XGBClassifier(n_estimators=600, random_state=42, n_jobs=-1),
}

# -----------------------------
# Helpers
# -----------------------------
def _compute_dist(y):
    """Distribution stats for y in {0,1}."""
    n = int(len(y))
    n_pos = int((y == 1).sum())
    n_neg = int((y == 0).sum())
    return {
        "test_n": n,
        "test_pos_n": n_pos,
        "test_neg_n": n_neg,
        "test_pos_frac": (n_pos / n) if n else np.nan,
        "test_neg_frac": (n_neg / n) if n else np.nan,
    }

def walkback_runs(
    df,
    feature_cols,
    target_col,
    *,
    date_col="Date",
    train_years=6,
    test_days=5,
    step_days=5,
    runs=20,
    horizon_days=1,        # r (used for purge)
    purge_days=None,       # defaults to horizon_days
    fill_inf=0.0,
):
    """
    Deployment-aligned evaluation:
      - For each run, take a 5-day OOT test window stepping back by 5 days.
      - Train on the prior N years (fixed-length window) ending right before test.
      - Purge 'purge_days' from the end of train to avoid overlap leakage for forward-return labels.
      - Score ONLY on the OOT test window (distribution + metrics).
    Returns: long DataFrame with one row per (feature_set/run/model).
    """
    dfw = df.sort_values("Date").reset_index(drop=True).copy()

    # Drop any accidental return cols from features (belt+suspenders)
    safe_feature_cols = [c for c in feature_cols if "Return" not in c]

    # Basic numeric cleaning
    dfw[safe_feature_cols] = dfw[safe_feature_cols].replace([np.inf, -np.inf], fill_inf)

    n = len(dfw)
    train_size = 245 * int(train_years)
    test_size = int(test_days)
    step = int(step_days)
    purge = int(purge_days) if purge_days is not None else 0 #int(horizon_days)

    X_all = dfw[safe_feature_cols].to_numpy()
    #y_all = _to_binary(dfw[target_col].to_numpy())
    y_all = dfw[target_col].to_numpy()
    dates = dfw[date_col].to_numpy() if date_col in dfw.columns else None

    rows = []

    for k in range(runs):
        test_end = n - k * step
        test_start = test_end - test_size
        if test_start < 0:
            break

        train_end = test_start - purge
        train_start = train_end - train_size
        if train_start < 0 or train_end <= train_start:
            break

        print(
            f"Run {k+1}/{runs} | "
            f"Train: {dates[train_start]} → {dates[train_end-1]} | "
            f"Test: {dates[test_start]} → {dates[test_end-1]} | "
            f"Train_n={train_end-train_start} | Test_n={test_end-test_start}"
        )

        X_train = X_all[train_start:train_end]
        y_train = y_all[train_start:train_end]
        X_test  = X_all[test_start:test_end]
        y_test  = y_all[test_start:test_end]

        dist = _compute_dist(y_test)
        single_class_test = (np.unique(y_test).size < 2)

        for model_name, model in models.items():
            #start_time = time.time()
            m = clone(model)
            m.fit(X_train, y_train)

            preds = m.predict(X_test)
            proba = np.nan
            if hasattr(m, "predict_proba"):
                proba = float(m.predict_proba(X_test)[0, 1])   # prob(class=1)
            elif hasattr(m, "decision_function"):
                s = float(m.decision_function(X_test)[0])
                proba = float(1.0 / (1.0 + np.exp(-s)))        # squash to (0,1)
            proba = np.nan if np.isnan(proba) else round(round(proba / 0.05) * 0.05, 2)



            rows.append({
                "run": k + 1,
                "model": model_name,
                "test_days": test_days,
                "pred": round(proba,2),

                # core metrics
                #"bal_acc": float(balanced_accuracy_score(y_test, preds)),
                "acc": float(accuracy_score(y_test, preds)),
                #"sign_acc": 2 * float(accuracy_score(y_test, preds)) - 1,
                #"mcc": float(matthews_corrcoef(y_test, preds)),

                # only meaningful if test has both classes
                #"f1": np.nan if single_class_test else float(f1_score(y_test, preds, zero_division=0)),
                #"precision": np.nan if single_class_test else float(precision_score(y_test, preds, zero_division=0)),
                #"recall": np.nan if single_class_test else float(recall_score(y_test, preds, zero_division=0)),

                **dist,

                "train_n": int(len(y_train)),
                "train_start": dates[train_start] if dates is not None else train_start,
                "train_end": dates[train_end - 1] if dates is not None else train_end - 1,
                "test_start": dates[test_start] if dates is not None else test_start,
                "test_end": dates[test_end - 1] if dates is not None else test_end - 1,
                "train_years": train_years,
                "horizon_days": horizon_days,
                "n_features": len(safe_feature_cols),
            })

            #total_time = time.time() - start_time
            #print(f"{model_name} - {total_time}")
    return pd.DataFrame(rows)

def perm_list(
    df,
    feature_cols,
    target_col,
    *,
    date_col="Date",
    train_years=6,
    test_days=5,
    step_days=5,
    purge_days=None, 
    fill_inf=0.0,
    k=1
):

    dfw = df.sort_values("Date").reset_index(drop=True).copy()

    # Drop any accidental return cols from features (belt+suspenders)
    safe_feature_cols = [c for c in feature_cols if not (c.startswith("Return"))]

    # Basic numeric cleaning
    dfw[safe_feature_cols] = dfw[safe_feature_cols].replace([np.inf, -np.inf], fill_inf)

    n = len(dfw)
    train_size = 245 * int(train_years)
    test_size = int(test_days)
    step = int(step_days)
    purge = int(purge_days) if purge_days is not None else 0 #int(horizon_days)

    X_all = dfw[safe_feature_cols].to_numpy()
    #y_all = _to_binary(dfw[target_col].to_numpy())
    y_all = dfw[target_col].to_numpy()
    dates = dfw[date_col].to_numpy() if date_col in dfw.columns else None

    test_end = n - k * step
    test_start = test_end - test_size
    train_end = test_start - purge
    train_start = train_end - train_size

    X_train = X_all[train_start:train_end]
    y_train = y_all[train_start:train_end]

    N_PI = int(len(X_train) * 0.55)
    X_pi = X_train[-N_PI:]
    y_pi = y_train[-N_PI:]

    # fit model
    m = clone(model).fit(X_train, y_train)

    # permutation importance on training-only slice
    pi = permutation_importance(
        m,
        X_pi,
        y_pi,
        scoring="neg_log_loss",   # or "accuracy", "neg_log_loss", etc.
        n_repeats=50,
        random_state=42,
        n_jobs=-1,
    )

    # pi.importances_mean aligns to feature_cols order
    pi_df = pd.DataFrame({
        "feature": feature_cols,                 # same order used to build X_train
        "pi_mean": pi.importances_mean,
        "pi_std":  pi.importances_std,
    }).sort_values("pi_mean", ascending=False)

    # keep only features with PI > 0
    pi_cols = pi_df['feature'][pi_df['pi_mean'] > .002].to_list()

    if len(pi_cols) < 6:
        pi_cols = (
            pi_df.sort_values("pi_mean", ascending=False)
                .head(6)["feature"]
                .tolist()
        )
    print(f"Ran permutation importance for horizon {purge_days} | Len: {N_PI} | Old: {len(feature_cols)} | New: {len(pi_cols)}")
    
    return pi_cols

# -----------------------------
# Run grid (feature sets x horizon x train_years, etc.)
# -----------------------------
returns = [2, 5, 10, 20, 30]
#train_years_grid = [4, 5, 6]#[3, 5, 7] 
days_assessed = 230
test_days = [1]
results= []
results_df = pd.DataFrame()

for test_day in test_days:

    runs = int(days_assessed / test_day)

    for r in returns:

        #fs_map = horizon_feature_cols[r]   # dict: feature_set_name

        #for list_name in fs_map.keys():

        #cols = feature_lists[list_name] + new_feats
        if r == 2:
            base_cols = experimental_slope_cols + ma_lag + rsi_cols + macd_cols + volu_cols
            base_cols = ma_lag + ma_rel + ma_sma + ma_num + rsi_cols + macd_cols + volu_cols + atr_adx_cols + vola_cols + vix_skew_cols + experimental_slope_cols
            list_name = "initial+rsi+macd" #worse
            list_name = "initial+sma" #worse
            train_years = 4
            #cols = experimental_slope_cols + ma_lag + rsi_cols + macd_cols + volu_cols
        elif r == 5:
            base_cols = experimental_slope_cols + ma_lag + ma_num + rsi_cols + macd_cols + volu_cols
            base_cols = ma_lag + ma_rel + ma_sma + ma_num + rsi_cols + macd_cols + volu_cols + atr_adx_cols + vola_cols + vix_skew_cols + experimental_slope_cols
            list_name = "initial+atradx" #worse
            list_name = "initial+sma" #worse
            list_name = "initial+vixskew" #much worse
            train_years = 5
            #cols = atr_adx_cols + vola_cols + experimental_slope_cols + ma_lag + ma_num + ma_rel + ma_sma + rsi_cols + macd_cols + volu_cols + vix_skew_cols
        elif r == 10:
            base_cols = atr_adx_cols + vola_cols + ma_num + volu_cols + ma_sma
            base_cols = ma_lag + ma_rel + ma_sma + ma_num + rsi_cols + macd_cols + volu_cols + atr_adx_cols + vola_cols + vix_skew_cols + experimental_slope_cols
            list_name = "initial+sma" # better
            list_name = "initial+sma+lag" #worse
            train_years = 5
            #cols = atr_adx_cols + vola_cols + ma_num + ma_rel + ma_sma + volu_cols + vix_skew_cols
        elif r == 20:
            base_cols = atr_adx_cols + vola_cols + ma_num + ma_sma
            #base_cols = ma_lag + ma_rel + ma_sma + ma_num + rsi_cols + macd_cols + volu_cols + atr_adx_cols + vola_cols + vix_skew_cols + experimental_slope_cols
            list_name = "initial+volu" #worse
            list_name = "initial+lag" #much worse
            train_years = 6
        else:
            base_cols = atr_adx_cols + vola_cols + ma_num + ma_sma + volu_cols + rsi_cols + macd_cols
            base_cols = ma_lag + ma_rel + ma_sma + ma_num + rsi_cols + macd_cols + volu_cols + atr_adx_cols + vola_cols + vix_skew_cols + experimental_slope_cols
            list_name = "initial+volu" #worse
            list_name = "initial-rsimacd+volu" #better
            train_years = 5

        target_col = f"Return_{r}"
        # Trime unknown (recent) outcomes
        df_final = df_main.iloc[r:].copy()

        base_cols += past_return_cols

        perm_cols = perm_list(
            df=df_final,
            feature_cols=base_cols,
            target_col=target_col,
            date_col="Date",
            train_years=train_years,
            test_days=test_day,
            step_days=test_day,
            purge_days=r, 
            fill_inf=0.0,
        )

        print(perm_cols)
        #for train_years in train_years_grid:
        df_scores = walkback_runs(
            df=df_final,
            feature_cols=perm_cols,
            target_col=target_col,
            date_col="Date",
            train_years=train_years,
            test_days=test_day,
            step_days=test_day,
            runs=runs,
            horizon_days=r,
            purge_days=r, 
            fill_inf=0.0,
        )

        df_scores["feature_set"] = "pi>.02" #list_name
        df_scores["horizon"] = r

        results.append(df_scores)

results_df = pd.concat(results, ignore_index=True)

Ran permutation importance for horizon 2 | Len: 539 | Old: 195 | New: 14
['Past_Return%_5', 'VROC_10', 'Vol_Ratio_50_zscore', 'ADX', 'VROC_5', 'VIX_1_change', 'skew_10_change', 'Zscore_50', 'vol_5', 'CCI_14', 'Past_Return%_2', 'CMF_10', 'skew_rolling_std', 'VIX']
Run 1/230 | Train: 2022-02-16 → 2026-01-13 | Test: 2026-01-16 → 2026-01-16 | Train_n=980 | Test_n=1
Run 2/230 | Train: 2022-02-15 → 2026-01-12 | Test: 2026-01-15 → 2026-01-15 | Train_n=980 | Test_n=1
Run 3/230 | Train: 2022-02-14 → 2026-01-09 | Test: 2026-01-14 → 2026-01-14 | Train_n=980 | Test_n=1
Run 4/230 | Train: 2022-02-11 → 2026-01-08 | Test: 2026-01-13 → 2026-01-13 | Train_n=980 | Test_n=1
Run 5/230 | Train: 2022-02-10 → 2026-01-07 | Test: 2026-01-12 → 2026-01-12 | Train_n=980 | Test_n=1
Run 6/230 | Train: 2022-02-09 → 2026-01-06 | Test: 2026-01-09 → 2026-01-09 | Train_n=980 | Test_n=1
Run 7/230 | Train: 2022-02-08 → 2026-01-05 | Test: 2026-01-08 → 2026-01-08 | Train_n=980 | Test_n=1
Run 8/230 | Train: 2022-02-07 → 2026

In [None]:
# -----------------------------
# Run grid (feature sets x horizon x train_years, etc.)
# -----------------------------
returns = [2, 5, 10, 20, 30]
#train_years_grid = [4, 5, 6]#[3, 5, 7] 
days_assessed = 0
test_days = [1]
results= []
results_df = pd.DataFrame()
models = {"xgboost-6": XGBClassifier(n_estimators=600, random_state=42, n_jobs=-1)}

def perm_list(
    df,
    feature_cols,
    target_col,
    *,
    date_col="Date",
    train_years=6,
    test_days=5,
    step_days=5,
    purge_days=None, 
    fill_inf=0.0,
    k=1
):

    dfw = df.sort_values("Date").reset_index(drop=True).copy()

    # Drop any accidental return cols from features (belt+suspenders)
    safe_feature_cols = [c for c in feature_cols if not (c.startswith("Return"))]

    # Basic numeric cleaning
    dfw[safe_feature_cols] = dfw[safe_feature_cols].replace([np.inf, -np.inf], fill_inf)

    n = len(dfw)
    train_size = 245 * int(train_years)
    test_size = int(test_days)
    step = int(step_days)
    purge = int(purge_days) if purge_days is not None else 0 #int(horizon_days)

    X_all = dfw[safe_feature_cols].to_numpy()
    #y_all = _to_binary(dfw[target_col].to_numpy())
    y_all = dfw[target_col].to_numpy()
    dates = dfw[date_col].to_numpy() if date_col in dfw.columns else None

    test_end = n - k * step
    test_start = test_end - test_size
    train_end = test_start - purge
    train_start = train_end - train_size

    X_train = X_all[train_start:train_end]
    y_train = y_all[train_start:train_end]

    N_PI = int(len(X_train) * .75)
    X_pi = X_train[-N_PI:]
    y_pi = y_train[-N_PI:]

    # fit model
    m = clone(model).fit(X_train, y_train)

    # permutation importance on training-only slice
    pi = permutation_importance(
        m,
        X_pi,
        y_pi,
        scoring="neg_log_loss",   # or "accuracy", "neg_log_loss", etc.
        n_repeats=50,
        random_state=42,
        n_jobs=-1,
    )

    # pi.importances_mean aligns to feature_cols order
    pi_df = pd.DataFrame({
        "feature": feature_cols,                 # same order used to build X_train
        "pi_mean": pi.importances_mean,
        "pi_std":  pi.importances_std,
    }).sort_values("pi_mean", ascending=False)

    # keep only features with PI > 0
    pi_cols = pi_df['feature'][pi_df['pi_mean'] > .005].to_list()

    if len(pi_cols) < 10:
        pi_cols = (
            pi_df.sort_values("pi_mean", ascending=False)
                .head(10)["feature"]
                .tolist()
        )
    print(f"Ran permutation importance for horizon {purge_days} | Len: {N_PI} | Old: {len(feature_cols)} | New: {len(pi_cols)}")
    
    return pi_cols

for r in returns:
#for test_day in test_days:

    #runs = int(days_assessed / test_day)

    #for r in returns:
    for test_day in test_days:

        #fs_map = horizon_feature_cols[r]   # dict: feature_set_name

        #for list_name in fs_map.keys():

        #cols = feature_lists[list_name] + new_feats
        if r == 2:
            base_cols = experimental_slope_cols + ma_lag + rsi_cols + macd_cols + volu_cols
            base_cols = ma_lag + ma_rel + ma_sma + ma_num + rsi_cols + macd_cols + volu_cols + atr_adx_cols + vola_cols + vix_skew_cols + experimental_slope_cols
            list_name = "initial+rsi+macd" #worse
            list_name = "initial+sma" #worse
            train_years = 4
            #cols = experimental_slope_cols + ma_lag + rsi_cols + macd_cols + volu_cols
        elif r == 5:
            base_cols = experimental_slope_cols + ma_lag + ma_num + rsi_cols + macd_cols + volu_cols
            base_cols = ma_lag + ma_rel + ma_sma + ma_num + rsi_cols + macd_cols + volu_cols + atr_adx_cols + vola_cols + vix_skew_cols + experimental_slope_cols
            list_name = "initial+atradx" #worse
            list_name = "initial+sma" #worse
            list_name = "initial+vixskew" #much worse
            train_years = 5
            #cols = atr_adx_cols + vola_cols + experimental_slope_cols + ma_lag + ma_num + ma_rel + ma_sma + rsi_cols + macd_cols + volu_cols + vix_skew_cols
        elif r == 10:
            base_cols = atr_adx_cols + vola_cols + ma_num + volu_cols + ma_sma
            base_cols = ma_lag + ma_rel + ma_sma + ma_num + rsi_cols + macd_cols + volu_cols + atr_adx_cols + vola_cols + vix_skew_cols + experimental_slope_cols
            list_name = "initial+sma" # better
            list_name = "initial+sma+lag" #worse
            train_years = 5
            #cols = atr_adx_cols + vola_cols + ma_num + ma_rel + ma_sma + volu_cols + vix_skew_cols
        elif r == 20:
            base_cols = atr_adx_cols + vola_cols + ma_num + ma_sma
            #base_cols = ma_lag + ma_rel + ma_sma + ma_num + rsi_cols + macd_cols + volu_cols + atr_adx_cols + vola_cols + vix_skew_cols + experimental_slope_cols
            list_name = "initial+volu" #worse
            list_name = "initial+lag" #much worse
            train_years = 6
        else:
            base_cols = atr_adx_cols + vola_cols + ma_num + ma_sma + volu_cols + rsi_cols + macd_cols
            base_cols = ma_lag + ma_rel + ma_sma + ma_num + rsi_cols + macd_cols + volu_cols + atr_adx_cols + vola_cols + vix_skew_cols + experimental_slope_cols
            list_name = "initial+volu" #worse
            list_name = "initial-rsimacd+volu" #better
            train_years = 5
            models = {"xgboost-2": XGBClassifier(n_estimators=200, random_state=42, n_jobs=-1)}

        target_col = f"Return_{r}"
        # Trime unknown (recent) outcomes
        df_final = df_main.iloc[r:].copy()

        base_cols += past_return_cols

        perm_cols = perm_list(
            df=df_final,
            feature_cols=base_cols,
            target_col=target_col,
            date_col="Date",
            train_years=train_years,
            test_days=test_day,
            step_days=test_day,
            purge_days=r, 
            fill_inf=0.0,
        )

        print(sorted(perm_cols))
        #for train_years in train_years_grid:
        df_scores = walkback_runs(
            df=df_final,
            feature_cols=perm_cols,
            target_col=target_col,
            date_col="Date",
            train_years=train_years,
            test_days=test_day,
            step_days=test_day,
            runs=runs,
            horizon_days=r,
            purge_days=r, 
            fill_inf=0.0,
        )

        df_scores["feature_set"] = "pi_.001_<10_.75_pry_trim" #list_name
        df_scores["horizon"] = r

        results.append(df_scores)

results_df = pd.concat(results, ignore_index=True)

Ran permutation importance for horizon 2 | Len: 735 | Old: 195 | New: 10
['ADX', 'Past_Return%_2', 'Past_Return%_5', 'VIX_1_change', 'VROC_10', 'VROC_5', 'Vol_Ratio_50_zscore', 'Zscore_50', 'skew_10_change', 'vol_5']
Ran permutation importance for horizon 2 | Len: 735 | Old: 195 | New: 10
['ADX', 'Past_Return%_2', 'VIX_rolling_std', 'VROC_10', 'VROC_5', 'Vol_Ratio_25_zscore', 'Vol_Ratio_50_zscore', 'Zscore_50', 'skew_1_change', 'skew_5_change']
Ran permutation importance for horizon 5 | Len: 918 | Old: 195 | New: 10
['ADL', 'ATR_14', 'BB_Mid', 'CMF_10', 'Max_120_Rows_Since', 'SMA_10_Lag200_min', 'Vol_Ratio_25_zscore', 'Vol_Ratio_50', 'num_days_100', 'num_days_200']
Ran permutation importance for horizon 5 | Len: 918 | Old: 195 | New: 10
['ATR_14', 'BB_Mid_raw', 'Min_240_Rows_Since', 'Price_Vol_Ratio_5', 'RSI_14_3', 'SMA_100_Lag100_min', 'Vol_Ratio_50_zscore', 'num_days_100', 'num_days_200', 'skew_rolling_std']
Ran permutation importance for horizon 10 | Len: 918 | Old: 195 | New: 10
['

KeyboardInterrupt: 

In [273]:
sorted(perm_cols)

['Close_Rel_Max200',
 'Min_120_Rows_Since',
 'Min_240_Rows_Since',
 'SMA_100_Lag200_min',
 'SMA_10_Lag50_min',
 'SMA_200',
 'SMA_50_Lag100_max',
 'Vol_Ratio_100',
 'num_days_100',
 'num_days_200']

In [264]:
df = pd.read_csv("best_models_perf.csv")
#df.to_csv('best_models_backup.csv', index=False)
cols = ['model', 'test_days', 'pred', 'acc', 'test_n', 'test_pos_n', 'train_n', 'test_start', 'test_end', 'train_years', 'feature_set', 'horizon']
df_new = results_df[cols].copy()
df_concat = pd.concat([df, df_new], ignore_index=True)
df_concat.to_csv('best_models_perf.csv', index=False)

In [186]:
cols = ['model', 'test_days', 'pred', 'acc', 'test_n', 'test_pos_n', 'train_n', 'test_start', 'test_end', 'train_years', 'feature_set', 'horizon']
df_new = results_df[cols].copy()
df_new.to_csv('performance.csv', index=False)