Hull Tactical — Final GPU-ready Notebook

This notebook includes the patched pipeline with sorting, train-only median imputation, lagged-column handling, is_scored support, purged+embargo CV, Optuna tuning, stacking, and GPU support for LightGBM/CatBoost.

Instructions:
1. (Optional) Install dependencies.
2. Edit TRAIN_CSV/TEST_CSV paths in the run cell.
3. Run cells top-to-bottom.

In [None]:
# Optional: install required packages in a fresh environment (uncomment to run)
#!pip install numpy pandas scikit-learn lightgbm catboost optuna joblib nbformat
print('If running on a fresh environment, uncomment and run the pip install line.')

In [None]:
import os, warnings, math
from datetime import datetime
import joblib

import numpy as np
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error
import optuna

warnings.filterwarnings('ignore')

try:
    import lightgbm as lgb
except Exception as e:
    lgb = None
    print('Warning: lightgbm import failed:', e)

try:
    from catboost import CatBoostRegressor
except Exception as e:
    CatBoostRegressor = None
    print('Warning: catboost import failed:', e)

TARGET_COL = 'forward_returns'
TIME_COL = 'date_id'
DEFAULT_N_SPLITS = 5
DEFAULT_PURGE_DAYS = 30
DEFAULT_EMBARGO_DAYS = 2
DEFAULT_N_TRIALS = 40
DEFAULT_TOP_K = 120
DEFAULT_TRANSACTION_COST = 0.0003
RANDOM_STATE = 42

print('Environment ready. LGB:', lgb is not None, 'CatBoost:', CatBoostRegressor is not None)

In [None]:
def modified_sharpe(returns, market_returns, rf=0.0):
    excess = returns - rf
    market_vol = np.std(market_returns)
    strat_vol = np.std(excess)
    penalty = np.clip(strat_vol / (market_vol + 1e-8), 0.5, 2.0)
    ratio = (np.mean(excess) / (strat_vol + 1e-8)) / penalty
    return ratio


def compute_rsi(series, window=14):
    delta = series.diff().fillna(0)
    up = delta.clip(lower=0)
    down = -delta.clip(upper=0)
    roll_up = up.ewm(span=window, adjust=False).mean()
    roll_down = down.ewm(span=window, adjust=False).mean()
    rs = roll_up / (roll_down + 1e-10)
    return 100 - (100 / (1 + rs))


def feature_engineer(df, price_col):
    df = df.copy()
    df['ret_1'] = df[price_col].pct_change().fillna(0)
    for lag in (1,2,3,5,7,10):
        df[f'ret_lag_{lag}'] = df['ret_1'].shift(lag).fillna(0)
        df[f'price_lag_{lag}'] = df[price_col].shift(lag).fillna(method='bfill')
    for w in (3,5,10,20,30,60):
        df[f'ma_{w}'] = df[price_col].rolling(window=w, min_periods=1).mean()
        df[f'std_{w}'] = df['ret_1'].rolling(window=w, min_periods=1).std().fillna(0)
        df[f'rsi_{w}'] = compute_rsi(df[price_col], window=w)
        df[f'pctile_{w}'] = df[price_col].rolling(window=w, min_periods=1).apply(lambda x: pd.Series(x).rank(pct=True).iloc[-1])
    if 'ma_10' in df.columns and 'ma_30' in df.columns:
        df['mom_10_30'] = df['ma_10'] - df['ma_30']
    else:
        df['mom_10_30'] = df['ma_10'] - df[price_col].rolling(30, min_periods=1).mean()
    if 'std_10' in df.columns and 'std_30' in df.columns:
        df['vol_ratio_10_30'] = df['std_10'] / (df['std_30'].replace(0,1e-8))
    else:
        df['vol_ratio_10_30'] = df['std_10'] / (df['ret_1'].rolling(30, min_periods=1).std().replace(0,1e-8))
    if 'date' in df.columns:
        d = pd.to_datetime(df['date'], errors='coerce')
        df['dow'] = d.dt.dayofweek.fillna(-1).astype(int)
        df['month'] = d.dt.month.fillna(-1).astype(int)
        df['day'] = d.dt.day.fillna(-1).astype(int)
    df.fillna(0, inplace=False)
    return df

print('Feature engineering functions defined.')

In [None]:
def purged_embargo_splits(n_samples, n_splits=DEFAULT_N_SPLITS, purge=DEFAULT_PURGE_DAYS, embargo=DEFAULT_EMBARGO_DAYS):
    idx = np.arange(n_samples)
    block_size = max(1, n_samples // n_splits)
    for i in range(n_splits):
        val_start = i * block_size
        val_end = min(n_samples, val_start + block_size)
        train_end = max(0, val_start - purge)
        train_idx = idx[:train_end]
        val_idx = idx[val_start:val_end]
        yield train_idx, val_idx


def backtest_with_cost_from_preds(preds, realized_returns, top_q=0.2, tc=DEFAULT_TRANSACTION_COST):
    q = np.quantile(preds, 1 - top_q)
    signal = (preds >= q).astype(int)
    pos = np.roll(signal, 1)
    pos[0] = 0
    turnover = np.abs(np.diff(pos, prepend=0))
    cost = turnover * tc
    strat_return = pos * realized_returns - cost
    return strat_return, pos, cost

print('Split and backtest utilities ready.')

In [None]:
def objective(trial, X, y, ret_market, splits, use_gpu):
    lgb_params = {
        'num_leaves': trial.suggest_int('num_leaves', 31, 127),
        'learning_rate': trial.suggest_float('lgb_lr', 0.01, 0.2),
        'n_estimators': trial.suggest_int('lgb_estimators', 200, 800),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 50),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'random_state': RANDOM_STATE,
        'n_jobs': -1,
    }
    cat_iters = trial.suggest_int('cat_iters', 300, 1000)
    cat_depth = trial.suggest_int('cat_depth', 4, 8)
    cat_lr = trial.suggest_float('cat_lr', 0.01, 0.1)
    top_q = trial.suggest_float('top_q', 0.05, 0.5)

    preds_all = []
    rets_all = []
    for train_idx, val_idx in splits:
        if len(train_idx) < 10 or len(val_idx) < 10:
            continue
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        if lgb is None:
            raise RuntimeError('lightgbm not available.')
        lgb_args = dict(lgb_params)
        if use_gpu:
            lgb_args['device'] = 'gpu'
            lgb_args['gpu_platform_id'] = 0
            lgb_args['gpu_device_id'] = 0
        mdl_lgb = lgb.LGBMRegressor(**lgb_args)
        try:
            mdl_lgb.fit(X_train, y_train, eval_set=[(X_val, y_val)], early_stopping_rounds=50, verbose=False)
        except TypeError:
            # older/newer lgb builds might not accept early_stopping_rounds here — fall back to a plain fit
            mdl_lgb.fit(X_train, y_train)

        if CatBoostRegressor is None:
            raise RuntimeError('catboost not available.')
        cat_args = {'iterations': cat_iters, 'depth': cat_depth, 'learning_rate': cat_lr, 'verbose': False, 'random_state': RANDOM_STATE}
        if use_gpu:
            cat_args['task_type'] = 'GPU'
        mdl_cat = CatBoostRegressor(**cat_args)
        mdl_cat.fit(X_train, y_train, eval_set=(X_val, y_val), use_best_model=True, verbose=False)

        pred = 0.5 * mdl_lgb.predict(X_val) + 0.5 * mdl_cat.predict(X_val)
        strat_ret, _, _ = backtest_with_cost_from_preds(pred, y_val, top_q=top_q)
        preds_all.extend(pred)
        rets_all.extend(strat_ret)

    if len(rets_all) < 10:
        raise optuna.exceptions.TrialPruned()
    rets_all = np.array(rets_all)
    target_market = ret_market[: len(rets_all)]
    shr = modified_sharpe(rets_all, target_market)
    return -shr

In [None]:
# Patched train_stack_and_save function
def train_stack_and_save(
    train_df,
    test_df=None,
    outdir='outputs',
    n_splits=DEFAULT_N_SPLITS,
    purge_days=DEFAULT_PURGE_DAYS,
    embargo_days=DEFAULT_EMBARGO_DAYS,
    n_trials=DEFAULT_N_TRIALS,
    top_k=DEFAULT_TOP_K,
    use_gpu=True,
):
    os.makedirs(outdir, exist_ok=True)

    # enforce chronological order
    if TIME_COL in train_df.columns:
        train_df = train_df.sort_values(TIME_COL).reset_index(drop=True)
    else:
        train_df = train_df.reset_index(drop=True)
    if test_df is not None:
        if TIME_COL in test_df.columns:
            test_df = test_df.sort_values(TIME_COL).reset_index(drop=True)
        else:
            test_df = test_df.reset_index(drop=True)

    # choose price column
    numeric_cols = train_df.select_dtypes(include=[np.number]).columns.tolist()
    if 'E1' in train_df.columns:
        price_col = 'E1'
    else:
        candidate = [c for c in numeric_cols if c not in [TIME_COL, TARGET_COL]]
        price_col = candidate[0] if candidate else numeric_cols[0]

    # feature engineer
    train_df = feature_engineer(train_df, price_col)
    test_df = feature_engineer(test_df, price_col) if test_df is not None else None

    # exclude metadata
    exclude = {TIME_COL, TARGET_COL, 'market_forward_excess_returns', 'risk_free_rate', 'date', 'is_scored'}
    features = [c for c in train_df.select_dtypes(include=[np.number]).columns if c not in exclude]

    # prefer lagged columns in test when present
    if test_df is not None:
        for col in list(test_df.columns):
            if col.startswith('lagged_'):
                base = col.replace('lagged_', '')
                if base in features and base not in test_df.columns:
                    test_df[base] = test_df[col]

    # median imputer fit on train only
    X_full = train_df[features].copy()
    y_full = train_df[TARGET_COL].values
    ret_market = train_df['market_forward_excess_returns'].values if 'market_forward_excess_returns' in train_df.columns else y_full

    imputer = SimpleImputer(strategy='median')
    imputer.fit(X_full)
    X_full_imp = pd.DataFrame(imputer.transform(X_full), columns=features, index=X_full.index)
    if test_df is not None:
        for c in features:
            if c not in test_df.columns:
                test_df[c] = np.nan
        test_df[features] = pd.DataFrame(imputer.transform(test_df[features]), columns=features, index=test_df.index)

    # feature selection
    k = min(top_k, X_full_imp.shape[1])
    skb = SelectKBest(score_func=f_regression, k=k)
    skb.fit(X_full_imp.values, y_full)
    selected_idx = skb.get_support(indices=True)
    features = [features[i] for i in selected_idx]

    X = X_full_imp[features].values
    n = len(X)
    splits = list(purged_embargo_splits(n, n_splits=n_splits, purge=purge_days, embargo=embargo_days))

    # gpu checks
    use_gpu_effective = use_gpu
    if use_gpu:
        if lgb is None or CatBoostRegressor is None:
            print('GPU requested but LGB/CatBoost not available - falling back to CPU.')
            use_gpu_effective = False

    # optuna
    sampler = optuna.samplers.TPESampler(seed=RANDOM_STATE)
    pruner = optuna.pruners.MedianPruner()
    study = optuna.create_study(direction='minimize', sampler=sampler, pruner=pruner)
    print(f"{datetime.now()} - Starting Optuna tuning ({n_trials} trials)...")
    study.optimize(lambda t: objective(t, X, y_full, ret_market, splits, use_gpu_effective), n_trials=n_trials, show_progress_bar=True)

    print('Best params:', study.best_params)
    print('Best modified sharpe (train):', -study.best_value)
    best = study.best_params

    # final models
    lgb_final_params = {
        'num_leaves': best.get('num_leaves', 64),
        'learning_rate': best.get('lgb_lr', 0.05),
        'n_estimators': best.get('lgb_estimators', 300),
        'min_child_samples': best.get('min_child_samples', 20),
        'subsample': best.get('subsample', 0.8),
        'colsample_bytree': best.get('colsample_bytree', 0.8),
        'random_state': RANDOM_STATE,
        'n_jobs': -1,
    }
    if use_gpu_effective:
        lgb_final_params['device'] = 'gpu'
        lgb_final_params['gpu_platform_id'] = 0
        lgb_final_params['gpu_device_id'] = 0

    final_lgb = lgb.LGBMRegressor(**lgb_final_params)
    final_cat_args = {
        'iterations': best.get('cat_iters', 600),
        'depth': best.get('cat_depth', 6),
        'learning_rate': best.get('cat_lr', 0.03),
        'verbose': False,
        'random_state': RANDOM_STATE,
    }
    if use_gpu_effective:
        final_cat_args['task_type'] = 'GPU'
    final_cat = CatBoostRegressor(**final_cat_args)

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    final_lgb.fit(X_scaled, y_full)
    final_cat.fit(X_scaled, y_full)

    # OOF stacking
    oof_lgb = np.zeros_like(y_full, dtype=float)
    oof_cat = np.zeros_like(y_full, dtype=float)
    for train_idx, val_idx in splits:
        if len(train_idx) < 10 or len(val_idx) < 10:
            continue
        Xtr = scaler.transform(X[train_idx])
        Xv = scaler.transform(X[val_idx])
        ytr = y_full[train_idx]
        yv = y_full[val_idx]

        lgb_local = lgb.LGBMRegressor(**final_lgb.get_params())
        if use_gpu_effective:
            try:
                lgb_local.set_params(device='gpu', gpu_platform_id=0, gpu_device_id=0)
            except Exception:
                pass

        try:
            cat_params = final_cat.get_all_params()
            cat_local = CatBoostRegressor(
                iterations=int(cat_params.get('iterations', final_cat_args['iterations'])),
                depth=int(cat_params.get('depth', final_cat_args['depth'])),
                learning_rate=float(cat_params.get('learning_rate', final_cat_args['learning_rate'])),
                verbose=False,
                random_state=RANDOM_STATE,
            )
        except Exception:
            cat_local = CatBoostRegressor(**final_cat_args)

        if use_gpu_effective:
            try:
                cat_local.set_params(task_type='GPU')
            except Exception:
                pass

        try:
            lgb_local.fit(Xtr, ytr, eval_set=[(Xv, yv)], early_stopping_rounds=50, verbose=False)
        except Exception:
            lgb_local.fit(Xtr, ytr)

        try:
            cat_local.fit(Xtr, ytr, eval_set=(Xv, yv), use_best_model=True, verbose=False)
        except Exception:
            cat_local.fit(Xtr, ytr, verbose=False)

        oof_lgb[val_idx] = lgb_local.predict(Xv)
        oof_cat[val_idx] = cat_local.predict(Xv)

    meta_X = np.vstack([oof_lgb, oof_cat]).T
    meta = Ridge(alpha=1.0, random_state=RANDOM_STATE)
    meta.fit(meta_X, y_full)

    base_pred_full = 0.5 * final_lgb.predict(X_scaled) + 0.5 * final_cat.predict(X_scaled)
    final_meta_pred = meta.predict(np.vstack([base_pred_full, final_cat.predict(X_scaled)]).T)
    top_q = best.get('top_q', 0.2)
    train_strat, _, _ = backtest_with_cost_from_preds(final_meta_pred, y_full, top_q=top_q, tc=DEFAULT_TRANSACTION_COST)
    final_sharpe = modified_sharpe(train_strat, ret_market)
    print(f'Final full-sample modified Sharpe: {final_sharpe:.4f}')

    artifact = {'lgb': final_lgb, 'cat': final_cat, 'meta': meta, 'scaler': scaler, 'features': features, 'best_optuna': best, 'use_gpu': use_gpu_effective, 'train_sharpe': final_sharpe, 'imputer': imputer}
    joblib.dump(artifact, os.path.join(outdir, 'models.joblib'))
    print('Saved models to', os.path.join(outdir, 'models.joblib'))

    if test_df is not None:
        X_test = test_df[features].values
        X_test_scaled = scaler.transform(X_test)
        base_pred_test = 0.5 * final_lgb.predict(X_test_scaled) + 0.5 * final_cat.predict(X_test_scaled)
        pred_meta_test = meta.predict(np.vstack([base_pred_test, final_cat.predict(X_test_scaled)]).T)
        if TIME_COL in test_df.columns:
            sub = test_df[[TIME_COL]].copy()
        elif 'id' in test_df.columns:
            sub = test_df[['id']].copy()
        else:
            sub = pd.DataFrame({'id': np.arange(len(test_df))})
        sub['prediction'] = pred_meta_test
        if 'is_scored' in test_df.columns:
            sub.to_csv(os.path.join(outdir, 'submission_full.csv'), index=False)
            sub_scored = sub[test_df['is_scored'] == 1].reset_index(drop=True)
            sub_scored.to_csv(os.path.join(outdir, 'submission_scored_only.csv'), index=False)
            print('Wrote submission_full.csv and submission_scored_only.csv (scored rows only).')
        else:
            sub.to_csv(os.path.join(outdir, 'submission.csv'), index=False)
            print('Wrote submission.csv')

    return artifact

In [None]:
# ===== Optuna HPO with turnover penalty (heavy) =====
import os, time, traceback
import optuna
import joblib
import numpy as np
from copy import deepcopy
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.preprocessing import StandardScaler

# CONFIG - tune these before running
OUTDIR = "outputs"
os.makedirs(OUTDIR, exist_ok=True)
study_db = "sqlite:///optuna_turnover.db"   # persistent study (can resume)
n_trials = 20     # start with 40 for testing; raise to 100-300 for final runs
n_splits = 5
purge_days = 30
embargo_days = 2
top_k = 120         # num features selected with univariate filter
turnover_penalty_coef = 200.0   # tune this: higher => prefer lower turnover
use_gpu_flag = has_nvidia_gpu()  # your helper function from notebook

print("HPO config:", dict(n_trials=n_trials, n_splits=n_splits, top_k=top_k, use_gpu=use_gpu_flag))

# Prepare training matrix (impute medians & select top_k)
exclude = {TIME_COL, TARGET_COL, "market_forward_excess_returns", "risk_free_rate", "date", "is_scored"}
candidates = [c for c in train_df.select_dtypes(include=[np.number]).columns if c not in exclude]
X_full_df = train_df[candidates].copy()
y_full = train_df[TARGET_COL].values
ret_market_full = train_df["market_forward_excess_returns"].values if "market_forward_excess_returns" in train_df.columns else y_full

# Impute (train-only)
imputer = SimpleImputer(strategy="median")
imputer.fit(X_full_df)
X_full_imp = pd.DataFrame(imputer.transform(X_full_df), columns=candidates, index=X_full_df.index)

# Feature pre-selection
k_sel = min(top_k, X_full_imp.shape[1])
skb = SelectKBest(score_func=f_regression, k=k_sel)
skb.fit(X_full_imp.values, y_full)
sel_idx = skb.get_support(indices=True)
features_opt = [candidates[i] for i in sel_idx]
X = X_full_imp[features_opt].values
ret_market = ret_market_full

print("Prepared X with shape:", X.shape, "features:", len(features_opt))

# Splits (index-based purged+embargo)
def purged_embargo_splits_local(n_samples, n_splits=n_splits, purge=purge_days, embargo=embargo_days):
    idx = np.arange(n_samples)
    block_size = max(1, n_samples // n_splits)
    for i in range(n_splits):
        val_start = i * block_size
        val_end = min(n_samples, val_start + block_size)
        train_end = max(0, val_start - purge)
        train_idx = idx[:train_end]
        val_idx = idx[val_start:val_end]
        yield train_idx, val_idx

splits_local = list(purged_embargo_splits_local(len(X), n_splits=n_splits, purge=purge_days, embargo=embargo_days))

# Objective with turnover penalty
def objective_turnover(trial):
    t0 = time.time()
    try:
        # LGB params (expanded)
        lgb_params = {
            "num_leaves": trial.suggest_int("num_leaves", 31, 512),
            "learning_rate": trial.suggest_float("lgb_lr", 1e-4, 0.2, log=True),
            "n_estimators": trial.suggest_int("lgb_estimators", 100, 1500),
            "max_depth": trial.suggest_int("max_depth", 3, 16),
            "min_child_samples": trial.suggest_int("min_child_samples", 5, 200),
            "subsample": trial.suggest_float("subsample", 0.4, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.4, 1.0),
            "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
            "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
            "random_state": RANDOM_STATE,
            "n_jobs": -1,
        }

        # Cat params (expanded)
        cat_iters = trial.suggest_int("cat_iters", 200, 2000)
        cat_depth = trial.suggest_int("cat_depth", 3, 12)
        cat_lr = trial.suggest_float("cat_lr", 1e-3, 0.2, log=True)

        # signal selection and ensemble weight
        top_q = trial.suggest_float("top_q", 0.01, 0.4)
        ensemble_w = trial.suggest_float("ensemble_w", 0.0, 1.0)

        preds_all = []
        rets_all = []
        turnovers = []

        for train_idx, val_idx in splits_local:
            if len(train_idx) < 10 or len(val_idx) < 10:
                continue
            Xtr, Xv = X[train_idx], X[val_idx]
            ytr, yv = y_full[train_idx], y_full[val_idx]

            # LightGBM
            import lightgbm as lgb_local_mod
            lgb_args = deepcopy(lgb_params)
            if use_gpu_flag:
                # these params may be ignored depending on build
                lgb_args["device"] = "gpu"
                lgb_args["gpu_platform_id"] = 0
                lgb_args["gpu_device_id"] = 0
            mdl_l = lgb_local_mod.LGBMRegressor(**lgb_args)
            try:
                mdl_l.fit(Xtr, ytr, eval_set=[(Xv, yv)], early_stopping_rounds=50, verbose=False)
            except Exception:
                mdl_l.fit(Xtr, ytr)

            # CatBoost
            from catboost import CatBoostRegressor as CatLocal
            cat_args = {"iterations": int(cat_iters), "depth": int(cat_depth), "learning_rate": float(cat_lr), "verbose": False, "random_state": RANDOM_STATE}
            if use_gpu_flag:
                cat_args["task_type"] = "GPU"
            mdl_c = CatLocal(**cat_args)
            try:
                mdl_c.fit(Xtr, ytr, eval_set=(Xv, yv), use_best_model=True, verbose=False)
            except Exception:
                mdl_c.fit(Xtr, ytr, verbose=False)

            pred_v = ensemble_w * mdl_l.predict(Xv) + (1.0 - ensemble_w) * mdl_c.predict(Xv)
            strat_v, pos_v, cost_v = backtest_with_cost_from_preds(pred_v, yv, top_q=top_q, tc=DEFAULT_TRANSACTION_COST)
            preds_all.extend(pred_v)
            rets_all.extend(strat_v)
            turnovers.append(np.abs(np.diff(pos_v, prepend=0)).mean())

        if len(rets_all) < 10:
            raise optuna.exceptions.TrialPruned()

        rets_all = np.array(rets_all)
        avg_turn = float(np.mean(turnovers)) if len(turnovers) else 0.0
        shr = modified_sharpe(rets_all, ret_market[: len(rets_all)])
        # objective: minimize -sharpe + penalty * turnover
        objective_value = float(-shr + turnover_penalty_coef * avg_turn)

        # log user attrs
        trial.set_user_attr("sharpe", float(shr))
        trial.set_user_attr("avg_turnover", float(avg_turn))
        trial.set_user_attr("elapsed", time.time() - t0)

        return objective_value
    except Exception:
        # log and re-raise so Optuna records the failure
        traceback.print_exc()
        raise

# Create persistent study so you can resume
study = optuna.create_study(storage=study_db, study_name="turnover_study", load_if_exists=True, direction="minimize", sampler=optuna.samplers.TPESampler(seed=RANDOM_STATE), pruner=optuna.pruners.MedianPruner())
print("Starting Optuna (will save to):", study_db)
study.optimize(objective_turnover, n_trials=n_trials, show_progress_bar=True)

print("Study finished. Best value (objective):", study.best_value)
print("Best params:", study.best_params)
print("Best trial attrs:", study.best_trial.user_attrs)

# Train final models on full data using best params
best = study.best_params
lgb_final = lgb.LGBMRegressor(
    num_leaves=best.get("num_leaves", 64),
    learning_rate=best.get("lgb_lr", 0.05),
    n_estimators=best.get("lgb_estimators", 300),
    max_depth=best.get("max_depth", -1),
    min_child_samples=best.get("min_child_samples", 20),
    subsample=best.get("subsample", 0.8),
    colsample_bytree=best.get("colsample_bytree", 0.8),
    random_state=RANDOM_STATE,
    n_jobs=-1,
)
if use_gpu_flag:
    try:
        lgb_final.set_params(device="gpu", gpu_platform_id=0, gpu_device_id=0)
    except Exception:
        pass

cat_final = CatBoostRegressor(
    iterations=int(best.get("cat_iters", 600)),
    depth=int(best.get("cat_depth", 6)),
    learning_rate=float(best.get("cat_lr", 0.03)),
    verbose=False,
    random_state=RANDOM_STATE,
)
if use_gpu_flag:
    try:
        cat_final.set_params(task_type="GPU")
    except Exception:
        pass

# Fit on full X
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
print("Fitting final LGB on full data...")
try:
    lgb_final.fit(X_scaled, y_full, verbose=False)
except Exception:
    lgb_final.fit(X_scaled, y_full)
print("Fitting final CatBoost on full data...")
try:
    cat_final.fit(X_scaled, y_full, verbose=False)
except Exception:
    cat_final.fit(X_scaled, y_full)

# Build simple ensemble and (optional) meta with small LGB if desired
ensemble_w = best.get("ensemble_w", 0.5)
base_pred_full = ensemble_w * lgb_final.predict(X_scaled) + (1 - ensemble_w) * cat_final.predict(X_scaled)

# Optional: small LGB meta trained on base preds
meta_lgb = lgb.LGBMRegressor(num_leaves=31, n_estimators=200, random_state=RANDOM_STATE)
meta_lgb.fit(np.vstack([base_pred_full, cat_final.predict(X_scaled)]).T, y_full)

# diagnostics
final_strat, _, _ = backtest_with_cost_from_preds(meta_lgb.predict(np.vstack([base_pred_full, cat_final.predict(X_scaled)]).T), y_full, top_q=best.get("top_q", 0.2), tc=DEFAULT_TRANSACTION_COST)
print("Final train modified Sharpe (meta_lgb):", modified_sharpe(final_strat, ret_market))

# Save artifact
artifact = {
    "lgb": lgb_final,
    "cat": cat_final,
    "meta": meta_lgb,
    "scaler": scaler,
    "features": features_opt,
    "study_db": study_db,
    "optuna_best": best,
}
joblib.dump(artifact, os.path.join(OUTDIR, "models_optuna_turnover.joblib"))
print("Saved final artifact to:", os.path.join(OUTDIR, "models_optuna_turnover.joblib"))


In [None]:
import joblib
art = joblib.load(os.path.join(OUTDIR, 'models.joblib'))
print('Saved artifact keys:', list(art.keys()))
print('Train modified Sharpe:', art.get('train_sharpe'))
print('Selected features (sample):', art['features'][:20])
for f in ['submission_full.csv', 'submission_scored_only.csv', 'submission.csv']:
    p = os.path.join(OUTDIR, f)
    if os.path.exists(p):
        print('\n', f, '->', p)
        display(pd.read_csv(p).head())