In [None]:
import min_features, daily_return
import importlib
import pandas as pd
import numpy as np
from sklearn.base import clone
from sklearn.metrics import accuracy_score
from sklearn.inspection import permutation_importance
import warnings
from xgboost import XGBClassifier
warnings.filterwarnings("ignore", message="y_pred contains classes not in y_true")
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=UserWarning)

importlib.reload(min_features)
importlib.reload(daily_return)

min_feats = "N"
returns = [1, 2, 3, 5, 10, 20, 30]

if min_feats != 'N':
    df_min = min_features.min_features()
    df_daily, feature_sets = daily_return.pull_daily('QQQ', returns) 

    df_main = pd.merge(df_min, df_daily, how='inner', on='Date')
    df_main = df_main.sort_values(by='Date', ascending=False)

    return_cols = df_main.columns[df_main.columns.str.contains("Return_")].to_list()
    daily_cols = [
        c for c in df_daily.iloc[:, 1:].columns
        if "return" not in c.lower()
    ]
    close_cols = df_min.columns[(df_min.columns.str.contains("close_")) | (df_min.columns.str.contains("post_")) | (df_min.columns.str.contains("overnight_"))].to_list()
    min_cols = (
        df_min
        .loc[:, ~df_min.columns.isin(close_cols)]  # drop close_ columns
        .iloc[:, 1:]                               # drop first column
        .columns
        .to_list()
    )
else:
    df_daily, feature_sets = daily_return.pull_daily('QQQ', returns) 
    return_cols = df_daily.columns[df_daily.columns.str.contains("Return_")].to_list()
    daily_cols = [
        c for c in df_daily.iloc[:, 1:].columns
        if "return" not in c.lower()
    ]

#top_models = pd.read_csv("top_performers2.csv")
print(f'Available Feature Sets: {feature_sets.keys()}')

Available Feature Sets: dict_keys(['ma', 'rsi', 'macd', 'volume', 'atr_adx', 'volatility', 'vix_skew', 'experimental_slope'])


In [None]:
# Add any new features
#df_daily[[f"{c}_sum10" for c in df_daily.columns if c.startswith("Past_Return_")]] = (df_daily.sort_values(by="Date", ascending=True).filter(like="Past_Return_").rolling(10, min_periods=1).sum())
past_ret_cols = [c for c in df_daily.columns if c.startswith("Past_Return%") or c.endswith("sum10")]
past_perc_cols = [c for c in df_daily.columns if c.startswith("Past_Return%")]
past_sum_cols = [c for c in df_daily.columns if c.endswith("sum10")]
df_main = df_daily.copy()#[df_daily['Date'] <= '2026-01-21'].copy()

In [None]:
# -----------------------------
# Feature Sets
# -----------------------------
ma_all_cols = feature_sets['ma']
ma_lag = [c for c in ma_all_cols if "lag" in c.lower()]
ma_rel = [c for c in ma_all_cols if "rel_" in c.lower()]
ma_sma = [c for c in ma_all_cols if ("sma_" in c.lower()) and ("lag" not in c.lower())]
ma_num = [c for c in ma_all_cols if ("num" in c.lower()) or ("since" in c.lower())]
rsi_cols = feature_sets['rsi']
macd_cols = feature_sets['macd']
volu_cols = feature_sets['volume']
atr_adx_cols = feature_sets['atr_adx']
vola_cols = feature_sets['volatility']
vix_skew_cols = feature_sets['vix_skew']
experimental_slope_cols = feature_sets['experimental_slope']

sets = [ma_lag, ma_rel, ma_sma, ma_num, rsi_cols + macd_cols, volu_cols, atr_adx_cols + vola_cols, vix_skew_cols, experimental_slope_cols, past_ret_cols]
set_names = ["ma_lag", "ma_rel", "ma_sma", "ma_num", "rsi_macd", "volu", "atr_adx" + "vola", "vix_skew", "experimental_slope", "past_ret_cols"]

# -----------------------------
# Handling of new features and permutation importance
# -----------------------------
pi_handling = 'include_new' #'exclude_new', 'include_new', 'run_separately'
new_features = [] #past_sum_cols
if pi_handling not in {"exclude_new", "include_new", "run_separately"}:
    raise ValueError(
        "Unknown permutation handling of new and hold features. "
        "Expected one of: exclude_new, include_new, run_separately."
    )

# -----------------------------
# Run grid (feature sets x horizon x train_years, etc.)
# -----------------------------
returns = [2, 5, 10, 20, 30]#[2, 5, 10, 20, 30]
#train_years = [5]#[3, 5, 7] 
days_assessed = 120
test_day = 1
results= []
results_df = pd.DataFrame()
models = {"xgboost-3": XGBClassifier(n_estimators=300, random_state=42, n_jobs=-1)}

def _compute_dist(y):
    """Distribution stats for y in {0,1}."""
    n = int(len(y))
    n_pos = int((y == 1).sum())
    n_neg = int((y == 0).sum())
    return {
        "test_n": n,
        "test_pos_n": n_pos,
        "test_neg_n": n_neg,
        "test_pos_frac": (n_pos / n) if n else np.nan,
        "test_neg_frac": (n_neg / n) if n else np.nan,
    }

def walkback_runs(
    df,
    feature_cols,
    target_col,
    *,
    date_col="Date",
    train_years=6,
    test_days=5,
    step_days=5,
    runs=20,
    purge_days=None,       # defaults to horizon_days
    fill_inf=0.0,
):
    """
    Deployment-aligned evaluation:
      - For each run, take a 5-day OOT test window stepping back by 5 days.
      - Train on the prior N years (fixed-length window) ending right before test.
      - Purge 'purge_days' from the end of train to avoid overlap leakage for forward-return labels.
      - Score ONLY on the OOT test window (distribution + metrics).
    Returns: long DataFrame with one row per (feature_set/run/model).
    """
    rows = []

    for k in range(runs):

        dfw = df.sort_values("Date").reset_index(drop=True).copy()

        n = len(dfw)
        train_size = 245 * int(train_years)
        test_size = int(test_days)
        step = int(step_days)
        purge = int(purge_days) if purge_days is not None else 0 #int(horizon_days)
        test_end = n - k * step
        test_start = test_end - test_size

        if test_start < 0:
            break

        train_end = test_start - purge
        train_start = train_end - train_size
        if train_start < 0 or train_end <= train_start:
            break
        
        dates = dfw[date_col].to_numpy() if date_col in dfw.columns else None
        dfpi = dfw[train_start:train_end].copy()

        for model_name, model in models.items():

            for pi_year in pi_years:

                for min_feat in min_feats:

                    if pi_handling == 'exclude_new':

                        feature_cols = [c for c in feature_cols if c not in new_features]

                        perm_cols = perm_list(
                            df=dfpi,
                            feature_cols=feature_cols,
                            target_col=target_col,
                            model=model,
                            fill_inf=0.0,
                            pi_year=pi_year,
                            min_feats=min_feat
                        )

                        perm_cols += new_features
                        print(f"{len(feature_cols)} | {len(perm_cols)} | {sorted(perm_cols)}")

                    elif pi_handling == 'run_separately':
                        
                        feature_cols = [c for c in feature_cols if c not in new_features]

                        perm_cols = perm_list(
                            df=dfpi,
                            feature_cols=feature_cols,
                            target_col=target_col,
                            model=model,
                            fill_inf=0.0,
                            pi_year=pi_year,
                            min_feats=min_feat
                        )

                        new_perm_cols = perm_list(
                            df=dfpi,
                            feature_cols=new_features,
                            target_col=target_col,
                            model=model,
                            fill_inf=0.0,
                            pi_year=pi_year,
                            min_feats=min_feat,
                            feat_type="New"
                        )
                        
                        print(f"{len(feature_cols)} | {len(perm_cols)} | Original Cols: {sorted(perm_cols)}")
                        print(f"{len(feature_cols)} | {len(perm_cols)} | New Cols: {sorted(new_perm_cols)}")
                        perm_cols += new_perm_cols

                    elif pi_handling == 'include_new':

                        perm_cols = perm_list(
                            df=dfpi,
                            feature_cols=feature_cols,
                            target_col=target_col,
                            model=model,
                            fill_inf=0.0,
                            pi_year=pi_year,
                            min_feats=min_feat
                        )
                        
                        print(f"{len(feature_cols)} | {len(perm_cols)} | All Cols: {sorted(perm_cols)}")
                    
                    # Drop any accidental return cols from features (belt+suspenders)
                    safe_feature_cols = [c for c in perm_cols if not c.startswith("Return")]

                    # Basic numeric cleaning
                    dfw[safe_feature_cols] = dfw[safe_feature_cols].replace([np.inf, -np.inf], fill_inf)

                    X_all = dfw[safe_feature_cols].to_numpy()
                    #y_all = _to_binary(dfw[target_col].to_numpy())
                    y_all = dfw[target_col].to_numpy()

                    print(
                        f"Run {k+1}/{runs} | "
                        f"Train: {dates[train_start]} → {dates[train_end-1]} | "
                        f"Test: {dates[test_start]} → {dates[test_end-1]} | "
                        f"Train_n={train_end-train_start} | Test_n={test_end-test_start} | "
                        f"(PI Years: {pi_year} - Feats: {min_feat})"
                    )

                    X_train = X_all[train_start:train_end]
                    y_train = y_all[train_start:train_end]
                    X_test  = X_all[test_start:test_end]
                    y_test  = y_all[test_start:test_end]

                    dist = _compute_dist(y_test)

                    #start_time = time.time()
                    m = clone(model)
                    m.fit(X_train, y_train)

                    preds = m.predict(X_test)
                    proba = np.nan
                    if hasattr(m, "predict_proba"):
                        proba = float(m.predict_proba(X_test)[0, 1])   # prob(class=1)
                    elif hasattr(m, "decision_function"):
                        s = float(m.decision_function(X_test)[0])
                        proba = float(1.0 / (1.0 + np.exp(-s)))        # squash to (0,1)
                    proba = np.nan if np.isnan(proba) else round(round(proba / 0.05) * 0.05, 2)

                    rows.append({
                        "run": k + 1,
                        "model": model_name,
                        "test_days": test_days,
                        "pred": round(proba,2),
                        "acc": float(accuracy_score(y_test, preds)),
                        **dist,
                        "train_n": int(len(y_train)),
                        "train_start": dates[train_start] if dates is not None else train_start,
                        "train_end": dates[train_end - 1] if dates is not None else train_end - 1,
                        "test_start": dates[test_start] if dates is not None else test_start,
                        "test_end": dates[test_end - 1] if dates is not None else test_end - 1,
                        "train_years": train_years,
                        "n_features": len(safe_feature_cols),
                        "pi_size": pi_year,
                        "min_feats": min_feat
                    })
        
    return pd.DataFrame(rows)

def perm_list(
    df,
    feature_cols,
    target_col,
    model,
    *,
    fill_inf=0.0,
    pi_year=1,
    min_feats=6,
    feat_type=None
):

    dfw = df.sort_values("Date").reset_index(drop=True).copy()

    # Drop any accidental return cols from features (belt+suspenders)
    safe_feature_cols = [c for c in feature_cols if not (c.startswith("Return"))]

    # Basic numeric cleaning
    dfw[safe_feature_cols] = dfw[safe_feature_cols].replace([np.inf, -np.inf], fill_inf)

    X_train = dfw[safe_feature_cols].to_numpy()
    y_train = dfw[target_col].to_numpy()
    #dates = dfw[date_col].to_numpy() if date_col in dfw.columns else None
    
    #N_PI = int(len(X_train) * perc_train)
    N_PI = int(242 * pi_year)
    #dates_pi = dates[-N_PI:]
    #print(f"PI Train: {min(dates_pi)} → {max(dates_pi)}")
    X_pi = X_train[-N_PI:]
    y_pi = y_train[-N_PI:]

    # fit model
    m = clone(model).fit(X_train, y_train)

    # permutation importance on training-only slice
    pi = permutation_importance(
        m,
        X_pi,
        y_pi,
        scoring="balanced_accuracy",   # or "accuracy", "neg_log_loss", etc.
        n_repeats=10,
        random_state=42,
        n_jobs=-1,
    )

    # pi.importances_mean aligns to feature_cols order
    pi_df = pd.DataFrame({
        "feature": feature_cols,                 # same order used to build X_train
        "pi_mean": pi.importances_mean,
        "pi_std":  pi.importances_std,
    }).sort_values("pi_mean", ascending=False)

    if feat_type != "New":

        # keep only features with PI > x
        pi_cols = pi_df['feature'][pi_df['pi_mean'] > 0.03].to_list()
        #print(pi_df.head(15))

        if len(pi_cols) < min_feats: #min_feats:
            pi_cols = (
                pi_df.sort_values("pi_mean", ascending=False)
                    .head(min_feats)["feature"]
                    .tolist()
            )
    
    else:

        # keep only features with PI > 0
        pi_cols = pi_df['feature'][pi_df['pi_mean'] > .01].to_list()

        if len(pi_cols) > 4:
            pi_cols = (
                pi_df.sort_values("pi_mean", ascending=False)
                    .head(4)["feature"]
                    .tolist()
            )

    #print(pi_df.sort_values("pi_mean", ascending=False))
    #print(f"Ran permutation importance for horizon {purge_days} | Len: {N_PI} | Old: {len(feature_cols)} | New: {len(pi_cols)}")
    
    return pi_cols

runs = int(days_assessed / test_day)

pi_handlings = ["include_new"] #["exclude_new", "include_new", "run_separately"]

for set, name in zip(sets, set_names):

    for pi_handling in pi_handlings:

        for r in returns:

            if r == 2:
                base_cols = experimental_slope_cols + ma_lag + rsi_cols + macd_cols + volu_cols
                base_cols = ma_lag + ma_rel + ma_sma + ma_num + rsi_cols + macd_cols + volu_cols + atr_adx_cols + vola_cols + vix_skew_cols + experimental_slope_cols
                ### Best Model Config ###
                train_years = [6]
                pi_years = [1.5]
                min_feats = [8, 14]
                top_3_cols = ma_num + rsi_cols + macd_cols + past_ret_cols
                list_name = "num_rsi_mac_pastr_6yr"
            elif r == 5:
                base_cols = experimental_slope_cols + ma_lag + ma_num + rsi_cols + macd_cols + volu_cols
                base_cols = ma_lag + ma_rel + ma_sma + ma_num + rsi_cols + macd_cols + volu_cols + atr_adx_cols + vola_cols + vix_skew_cols + experimental_slope_cols
                #train_years = 5
                #cols = atr_adx_cols + vola_cols + experimental_slope_cols + ma_lag + ma_num + ma_rel + ma_sma + rsi_cols + macd_cols + volu_cols + vix_skew_cols
                train_years = [4]
                top_3_cols = ma_num + vix_skew_cols + past_ret_cols
                list_name = "num_vix_skew_pastr_4yr"
            elif r == 10:
                base_cols = atr_adx_cols + vola_cols + ma_num + volu_cols + ma_sma
                base_cols = ma_lag + ma_rel + ma_sma + ma_num + rsi_cols + macd_cols + volu_cols + atr_adx_cols + vola_cols + vix_skew_cols + experimental_slope_cols
                base_cols = set
                ### Best Model Config ###
                train_years = [4]
                pi_years = [1.5]
                min_feats = [8]
                top_3_cols = ma_lag + ma_rel + atr_adx_cols
                list_name = "lag_rel_atradx_4yr"
            elif r == 20:
                base_cols = atr_adx_cols + vola_cols + ma_num + ma_sma
                #base_cols = ma_lag + ma_rel + ma_sma + ma_num + rsi_cols + macd_cols + volu_cols + atr_adx_cols + vola_cols + vix_skew_cols + experimental_slope_cols
                list_name = "initial+volu" #worse
                list_name = "initial+lag" #much worse
                train_years = [6]
                top_3_cols = ma_sma + ma_rel + ma_num
                list_name = "sma_rel_num_6yr"
            else:
                base_cols = atr_adx_cols + vola_cols + ma_num + ma_sma + volu_cols + rsi_cols + macd_cols
                base_cols = ma_lag + ma_rel + ma_sma + ma_num + rsi_cols + macd_cols + volu_cols + atr_adx_cols + vola_cols + vix_skew_cols + experimental_slope_cols
                base_cols = set
                ### Best Model Config ###
                train_years = [4]
                pi_years = [1.5]
                min_feats = [8]
                top_3_cols = ma_lag + ma_sma + atr_adx_cols
                list_name = "lag_sma_atradx_4yr"

            target_col = f"Return_{r}"
            min_feats = [6]
            # Trime unknown (recent) outcomes
            df_final = df_main.iloc[r:].copy()
            train_years = [4, 6]
            base_cols = set

            for train_year in train_years:

                print(f"Running for horizon {r} | {pi_handling}")
                base_cols += new_features

                df_scores = walkback_runs(
                    df=df_final,
                    feature_cols=base_cols,
                    target_col=target_col,
                    date_col="Date",
                    train_years=train_year,
                    test_days=test_day,
                    step_days=test_day,
                    runs=runs,
                    purge_days=r, 
                    fill_inf=0.0,
                )

                df_scores["feature_set"] = f"{name}_ba"
                df_scores["horizon"] = r

                results.append(df_scores)

results_df = pd.concat(results, ignore_index=True)
#results_df.to_csv('test.csv', index=False)
#results_df.to_csv("baseline.csv", index=False)

Running for horizon 2 | include_new
60 | 36 | All Cols: ['SMA_100_Lag10_min', 'SMA_100_Lag150_min', 'SMA_100_Lag200_min', 'SMA_100_Lag25_min', 'SMA_100_Lag50_min', 'SMA_10_Lag100_max', 'SMA_10_Lag100_min', 'SMA_10_Lag10_max', 'SMA_10_Lag10_min', 'SMA_10_Lag150_max', 'SMA_10_Lag150_min', 'SMA_10_Lag200_max', 'SMA_10_Lag200_min', 'SMA_10_Lag25_max', 'SMA_10_Lag25_min', 'SMA_10_Lag50_max', 'SMA_10_Lag50_min', 'SMA_200_Lag100_min', 'SMA_200_Lag150_min', 'SMA_200_Lag200_min', 'SMA_200_Lag50_min', 'SMA_25_Lag100_min', 'SMA_25_Lag10_max', 'SMA_25_Lag10_min', 'SMA_25_Lag150_max', 'SMA_25_Lag150_min', 'SMA_25_Lag200_min', 'SMA_25_Lag25_max', 'SMA_25_Lag25_min', 'SMA_25_Lag50_min', 'SMA_50_Lag100_min', 'SMA_50_Lag10_max', 'SMA_50_Lag10_min', 'SMA_50_Lag150_min', 'SMA_50_Lag25_min', 'SMA_50_Lag50_min']
Run 1/120 | Train: 2022-03-03 → 2026-01-28 | Test: 2026-02-02 → 2026-02-02 | Train_n=980 | Test_n=1 | (PI Years: 1.5 - Feats: 100)
60 | 30 | All Cols: ['SMA_100_Lag10_min', 'SMA_100_Lag200_min', 'S

In [557]:
df = pd.read_csv("baseline.csv")
cols = ['model', 'test_days', 'pred', 'acc', 'test_n', 'test_pos_n', 'train_n', 'test_start', 'test_end', 'train_years', 'feature_set', 'horizon',
        'pi_size', 'min_feats']
df_new = results_df[cols].copy()
df_concat = pd.concat([df[cols], df_new], ignore_index=True)
df_concat.to_csv('baseline2.csv', index=False)