In [1]:
# %%colab
from google.colab import drive
drive.mount('/content/drive')

# ============================================================
# Time-aware multi-model training (LGBM, HGB, RF, ET)
# - fast, time-respecting split
# - quick random search
# - per-(model,target) immediate save
# - robust load on re-runs
# - aggregated APPLY outputs + metrics table
# ============================================================
import numpy as np
import pandas as pd
import warnings, os, json, contextlib, joblib, tempfile, time
from pathlib import Path
from tqdm.auto import tqdm
from dataclasses import dataclass

from sklearn.metrics import (
    average_precision_score, roc_auc_score, f1_score, balanced_accuracy_score,
    precision_score, recall_score, confusion_matrix, precision_recall_curve
)
from sklearn.ensemble import (
    RandomForestClassifier, ExtraTreesClassifier, HistGradientBoostingClassifier
)
import lightgbm as lgb

warnings.filterwarnings("ignore")

# -------------------------
# User config
# -------------------------
MODE              = "train"   # "auto" (load if exists else train) | "train" | "load"
RUN_MODELS        = ["lgbm", "hgb", "rf", "et"]  # choose any subset
SAVE_MODELS       = True
MODEL_DIR         = "drive/MyDrive/Colab Notebooks/stock/models"
MODEL_TAG         = "multi_model_timeaware_v3"    # bump tag for new runs
ARTIFACTS_VERSION = 4

PARQUET_PATH      = "drive/MyDrive/Colab Notebooks/stock/expanded_stock_reduced.parquet"
TARGETS           = ["target_up20", "target_dd5"]

APPLY_DAYS        = 5
VAL_DAYS          = 90
VALID_POS         = 0.90     # place validation near the end of (train+valid) window
HORIZON           = 30
PURGE_DAYS        = HORIZON

# Tuning trials (kept small for speed)
N_TRIALS = {
    "lgbm": 12,
    "hgb":  10,
    "rf":    8,
    "et":    8,
}
EARLY_STOPPING_LGBM = 100

# Global downsampling caps (speed)
MAX_TRAIN_ROWS    = 180_000
MAX_VALID_ROWS    = 60_000
MAX_POS_SAMPLES   = None
NEG_POS_RATIO     = 5.0

# RF/ET-specific caps (tighter to avoid stalls)
RF_TUNE_MAX_ROWS  = 60_000
ET_TUNE_MAX_ROWS  = 80_000

ADD_TICKER_DUMMIES= True
TOPK_PER_DAY      = 20
np.random.seed(42)

# -------------------------
# Suppress LightGBM stdout/stderr
# -------------------------
@contextlib.contextmanager
def suppress_output():
    with open(os.devnull, "w") as devnull:
        with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
            yield

# -------------------------
# Paths & atomic persistence
# -------------------------
def ensure_dir(p): Path(p).mkdir(parents=True, exist_ok=True)
def _meta_path(): return os.path.join(MODEL_DIR, f"{MODEL_TAG}_meta.json")

def _model_path(model_key, target):
    ext = ".txt" if model_key == "lgbm" else ".joblib"
    return os.path.join(MODEL_DIR, f"{MODEL_TAG}_{model_key}_{target}{ext}")

def _atomic_save_json(obj, path):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with tempfile.NamedTemporaryFile('w', delete=False, dir=os.path.dirname(path)) as tf:
        json.dump(obj, tf, indent=2)
        tmp = tf.name
    os.replace(tmp, path)

def _atomic_save_lgbm(booster, path):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    tmp = path + ".tmp"
    booster.save_model(tmp)
    os.replace(tmp, path)

def _atomic_save_joblib(model, path):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    tmp = path + ".tmp"
    joblib.dump(model, tmp)
    os.replace(tmp, path)

def upsert_meta_for_model(meta, model_key, target, best_info, feature_cols_all):
    meta.setdefault("artifacts_version", ARTIFACTS_VERSION)
    meta.setdefault("model_tag", MODEL_TAG)
    meta.setdefault("models", [])
    if model_key not in meta["models"]:
        meta["models"].append(model_key)
    meta.setdefault("feature_cols", list(feature_cols_all))
    meta.setdefault("add_ticker_dummies", bool(ADD_TICKER_DUMMIES))
    meta.setdefault("targets", {})
    meta["targets"].setdefault(target, {})
    meta["targets"][target][model_key] = {
        "best_iter": best_info.get("best_iter"),
        "threshold": float(best_info["thr"]),
        "valid_report": best_info["rep"],
        "params": {k: (float(v) if isinstance(v, (np.floating, float))
                       else int(v) if isinstance(v, (np.integer, int))
                       else v)
                   for k, v in best_info["params"].items()}
    }
    return meta

def save_model_and_meta(model_key, target, model_obj, meta):
    if not SAVE_MODELS:
        return
    path = _model_path(model_key, target)
    if model_key == "lgbm":
        _atomic_save_lgbm(model_obj, path)
    else:
        _atomic_save_joblib(model_obj, path)
    _atomic_save_json(meta, _meta_path())
    print(f"[saved] {model_key}/{target} → {path}")

def load_artifacts(run_models, targets):
    meta = None
    if os.path.exists(_meta_path()):
        with open(_meta_path(), "r") as f:
            meta = json.load(f)
    loaded = {}
    for model_key in run_models:
        for target in targets:
            path = _model_path(model_key, target)
            obj = None
            if os.path.exists(path):
                if model_key == "lgbm":
                    with suppress_output():
                        obj = lgb.Booster(model_file=path)
                else:
                    obj = joblib.load(path)
            loaded[(model_key, target)] = obj
    ok = sum(1 for v in loaded.values() if v is not None)
    tot = len(run_models) * len(targets)
    print(f"[load] models loaded: {ok}/{tot}")
    return meta, loaded

# -------------------------
# Metrics & threshold
# -------------------------
def choose_threshold_by_f1(y_valid, p_valid):
    prec, rec, thr = precision_recall_curve(y_valid, p_valid)
    if len(thr) == 0:
        return 0.5
    f1_vals = 2 * (prec[:-1] * rec[:-1]) / np.clip(prec[:-1] + rec[:-1], 1e-12, None)
    idx = int(np.nanargmax(f1_vals))
    return float(thr[idx])

def metrics_report(y_true, proba, thr):
    pred = (proba >= thr).astype(int)
    cm   = confusion_matrix(y_true, pred)
    tn, fp, fn, tp = cm.ravel() if cm.size == 4 else (0,0,0,0)
    return {
        "AP":           float(average_precision_score(y_true, proba)),
        "ROC_AUC":      float(roc_auc_score(y_true, proba)),
        "F1":           float(f1_score(y_true, pred, zero_division=0)),
        "Precision":    float(precision_score(y_true, pred, zero_division=0)),
        "Recall":       float(recall_score(y_true, pred, zero_division=0)),
        "BalancedAcc":  float(balanced_accuracy_score(y_true, pred)),
        "Confusion":    [int(tn), int(fp), int(fn), int(tp)],
    }

def print_report(name, model_key, rep, thr):
    tn, fp, fn, tp = rep["Confusion"]
    print(f"\n[{name} | {model_key}] VALID metrics")
    print(f"  AP:           {rep['AP']:.4f}")
    print(f"  ROC-AUC:      {rep['ROC_AUC']:.4f}")
    print(f"  F1:           {rep['F1']:.4f}")
    print(f"  Precision:    {rep['Precision']:.4f}")
    print(f"  Recall:       {rep['Recall']:.4f}")
    print(f"  BalancedAcc:  {rep['BalancedAcc']:.4f}")
    print(f"  Confusion:    tn={tn}  fp={fp}  fn={fn}  tp={tp}")
    print(f"  Threshold:    {thr:.4f}")

# -------------------------
# Speedy undersampling
# -------------------------
def undersample_indices(y, max_rows=None, max_pos=None, neg_pos_ratio=None, seed=42):
    rng = np.random.default_rng(seed)
    idx_all = np.arange(len(y))
    pos_idx = idx_all[y == 1]
    neg_idx = idx_all[y == 0]

    if max_pos is not None and len(pos_idx) > max_pos:
        pos_idx = rng.choice(pos_idx, size=max_pos, replace=False)
    if neg_pos_ratio is not None and len(neg_idx) > neg_pos_ratio * max(1, len(pos_idx)):
        neg_keep = int(neg_pos_ratio * max(1, len(pos_idx)))
        neg_keep = max(1, min(neg_keep, len(neg_idx)))
        neg_idx  = rng.choice(neg_idx, size=neg_keep, replace=False)

    keep = np.concatenate([pos_idx, neg_idx])
    rng.shuffle(keep)
    if max_rows is not None and len(keep) > max_rows:
        keep = keep[:max_rows]
    return np.sort(keep)

def undersample_for_tuning(X_train, y_train, X_valid, y_valid):
    keep_tr = undersample_indices(
        y_train, max_rows=MAX_TRAIN_ROWS, max_pos=MAX_POS_SAMPLES,
        neg_pos_ratio=NEG_POS_RATIO, seed=42
    ) if (MAX_TRAIN_ROWS is not None and len(y_train) > MAX_TRAIN_ROWS) else np.arange(len(y_train))
    keep_va = undersample_indices(
        y_valid, max_rows=MAX_VALID_ROWS, max_pos=None,
        neg_pos_ratio=None, seed=123
    ) if (MAX_VALID_ROWS is not None and len(y_valid) > MAX_VALID_ROWS) else np.arange(len(y_valid))
    return X_train[keep_tr], y_train[keep_tr], X_valid[keep_va], y_valid[keep_va]

# -------------------------
# Load reduced parquet & build (date×ticker)
# -------------------------
df = pd.read_parquet(PARQUET_PATH)
assert isinstance(df.columns, pd.MultiIndex), "Parquet needs MultiIndex columns (feature/target, ticker)."
if not np.issubdtype(df.index.dtype, np.datetime64):
    df.index = pd.to_datetime(df.index)
df = df.sort_index()

lvl0 = df.columns.get_level_values(0).astype(str)
lvl1 = df.columns.get_level_values(1).astype(str)
assert all(t in set(lvl0) for t in TARGETS), "Required targets missing."

is_target = lvl0.str.startswith("target_")
all_feature_names = sorted(set(lvl0[~is_target]))
tickers = sorted(set(lvl1[(lvl0 == TARGETS[0])]) & set(lvl1[(lvl0 == TARGETS[1])]))

rows = []
for tk in tqdm(tickers, desc="Stacking tickers"):
    X_tk = df.xs(tk, level=1, axis=1)
    X_tk = X_tk.loc[:, [c for c in X_tk.columns if not str(c).startswith("target_")]] \
             .reindex(columns=all_feature_names, fill_value=0.0)
    y_up = df[('target_up20', tk)].astype('int8')
    y_dd = df[('target_dd5',  tk)].astype('int8')
    block = X_tk.copy()
    block['target_up20'] = y_up.values
    block['target_dd5']  = y_dd.values
    block['ticker']      = tk
    rows.append(block)

LONG = pd.concat(rows, axis=0).sort_index()

if ADD_TICKER_DUMMIES:
    dummies = pd.get_dummies(LONG['ticker'], prefix='tk', dtype=np.uint8)
    LONG = pd.concat([LONG.drop(columns=['ticker']), dummies], axis=1)
else:
    LONG = LONG.drop(columns=['ticker'])

feature_cols_all = [c for c in LONG.columns if c not in TARGETS]
LONG[feature_cols_all] = (LONG[feature_cols_all]
                          .apply(pd.to_numeric, errors='coerce')
                          .replace([np.inf, -np.inf], np.nan)
                          .fillna(0.0)
                          .astype('float32'))
for t in TARGETS: LONG[t] = LONG[t].astype('int8')

print("Long shape:", LONG.shape)

# -------------------------
# Time-aware split
# -------------------------
dates = np.array(sorted(LONG.index.unique()))
assert len(dates) >= (APPLY_DAYS + VAL_DAYS + PURGE_DAYS + 5), "History too short."

apply_dates    = dates[-APPLY_DAYS:]
preapply_dates = dates[:-APPLY_DAYS]
n_pre          = len(preapply_dates)

center = int(round(VALID_POS * (n_pre - 1)))
v_start = max(PURGE_DAYS, center - VAL_DAYS // 2)
v_end   = min(n_pre, v_start + VAL_DAYS)
v_start = max(PURGE_DAYS, v_end - VAL_DAYS)

valid_dates = preapply_dates[v_start:v_end]
train_end   = max(0, v_start - PURGE_DAYS)
train_dates = preapply_dates[:train_end]

assert len(train_dates) > 0
assert np.max(train_dates) < np.min(valid_dates) < np.min(apply_dates)

mask_apply = LONG.index.isin(apply_dates)
mask_valid = LONG.index.isin(valid_dates)
mask_train = LONG.index.isin(train_dates)

print(f"Dates — TRAIN: {train_dates[0].date()} → {train_dates[-1].date()} "
      f"| VALID: {valid_dates[0].date()} → {valid_dates[-1].date()} "
      f"| APPLY(last {APPLY_DAYS}): {apply_dates[0].date()} → {apply_dates[-1].date()} "
      f"| PURGE={PURGE_DAYS}d, HORIZON={HORIZON}d")

# Matrices (before tuning downsampling)
X_train_full = LONG.loc[mask_train, feature_cols_all].values
X_valid_full = LONG.loc[mask_valid, feature_cols_all].values
X_pre_full   = np.vstack([X_train_full, X_valid_full])

y_train_full = {t: LONG.loc[mask_train, t].values.astype('int8') for t in TARGETS}
y_valid_full = {t: LONG.loc[mask_valid, t].values.astype('int8') for t in TARGETS}
y_pre_full   = {t: np.concatenate([y_train_full[t], y_valid_full[t]]) for t in TARGETS}

# ============================================================
# TRAIN and/or LOAD
# ============================================================
meta_loaded, models_loaded = (None, {})
if MODE in ("auto", "load"):
    meta_loaded, models_loaded = load_artifacts(RUN_MODELS, TARGETS)

need_train = (MODE == "train")
if MODE == "auto":
    for mk in RUN_MODELS:
        for t in TARGETS:
            if models_loaded.get((mk, t)) is None:
                need_train = True
                break

@dataclass
class TrialResult:
    params: dict
    best_iter: int | None
    valid_ap: float
    thr: float
    report: dict
    model_obj: object

def to_py(x):
    if isinstance(x, (np.floating,)): return float(x)
    if isinstance(x, (np.integer,)):  return int(x)
    return x

# -------------------------
# Hyperparam samplers (fast ranges)
# -------------------------
def sample_lgbm_params(base_spw):
    return {
        "objective": "binary",
        "boosting_type": "gbdt",
        "learning_rate": float(10**np.random.uniform(-2.0, -0.7)),  # ~[0.01,0.20]
        "num_leaves": int(np.random.randint(24, 128)),
        "max_depth": int(np.random.choice([-1,5,6,7,8])),
        "min_child_samples": int(np.random.randint(10, 60)),
        "subsample": float(np.random.choice(np.linspace(0.7, 1.0, 4))),
        "colsample_bytree": float(np.random.choice(np.linspace(0.6, 1.0, 5))),
        "reg_lambda": float(10**np.random.uniform(-3, 1)),
        "scale_pos_weight": float(np.random.choice([0.5*base_spw, 1.0*base_spw, 1.5*base_spw])),
        "n_estimators": 1800,
        "verbosity": -1,
        "random_state": 42,
        "n_jobs": -1
    }

def sample_hgb_params():
    if np.random.rand() < 0.5:
        max_depth = np.random.choice([None, 5, 7, 9])
        max_leaf_nodes = None
    else:
        max_depth = None
        max_leaf_nodes = int(np.random.choice([31, 63, 127, 255]))
    return {
        "loss": "log_loss",
        "learning_rate": float(10**np.random.uniform(-2.0, -0.7)),
        "max_depth": max_depth,
        "max_leaf_nodes": max_leaf_nodes,
        "min_samples_leaf": int(np.random.choice([10, 20, 30, 50])),
        "l2_regularization": float(10**np.random.uniform(-4, 1)),
        "max_bins": int(np.random.choice([64, 128])),
        "class_weight": "balanced",
        "max_iter": int(np.random.choice([100, 200, 300])),
        "random_state": 42,
        "early_stopping": False,
        "verbose": 0
    }

def sample_rf_params():
    # SPEED: fewer trees, limited depth, bagging via max_samples
    params = {
        "n_estimators": int(np.random.choice([80, 120, 160])),
        "max_depth": int(np.random.choice([8, 12, 16])),
        "max_features": "sqrt",
        "min_samples_split": int(np.random.choice([2, 5, 10])),
        "min_samples_leaf": int(np.random.choice([1, 2, 4])),
        "bootstrap": True,
        "class_weight": "balanced_subsample",
        "n_jobs": -1,
        "random_state": 42,
        "verbose": 0,
        "warm_start": False,
        # sklearn >=1.1: greatly speeds each tree by sampling rows
        "max_samples": float(np.random.choice([0.4, 0.6])),
    }
    return params

def sample_et_params():
    params = {
        "n_estimators": int(np.random.choice([300, 400, 500])),
        "max_depth": int(np.random.choice([10, 14, 18])),
        "max_features": "sqrt",
        "min_samples_split": int(np.random.choice([2, 5, 10])),
        "min_samples_leaf": int(np.random.choice([1, 2, 4])),
        "bootstrap": False,
        "class_weight": "balanced",
        "n_jobs": -1,
        "random_state": 42,
        "verbose": 0,
        "warm_start": False,
    }
    return params

# -------------------------
# Tuning loops
# -------------------------
def tune_lgbm(X_tr, y_tr, X_va, y_va, n_trials):
    pos = int(y_tr.sum()); neg = int(len(y_tr) - pos)
    if pos == 0 or neg == 0:
        return None
    base_spw = max(1.0, neg / max(1, pos))
    dtrain = lgb.Dataset(X_tr, label=y_tr, free_raw_data=False)
    dvalid = lgb.Dataset(X_va, label=y_va, reference=dtrain, free_raw_data=False)
    best = None
    pbar = tqdm(range(n_trials), desc="Tuning lgbm", leave=True)
    for _ in pbar:
        params = sample_lgbm_params(base_spw)
        with suppress_output():
            booster = lgb.train(
                params,
                train_set=dtrain,
                valid_sets=[dvalid],
                valid_names=["valid"],
                num_boost_round=params["n_estimators"],
                callbacks=[lgb.early_stopping(stopping_rounds=EARLY_STOPPING_LGBM, verbose=False),
                           lgb.log_evaluation(period=0)],
            )
        with suppress_output():
            p = booster.predict(X_va, num_iteration=booster.best_iteration)
        thr = choose_threshold_by_f1(y_va, p)
        rep = metrics_report(y_va, p, thr)
        if (best is None) or (rep["AP"] > best.valid_ap):
            best = TrialResult(params=params, best_iter=booster.best_iteration,
                               valid_ap=rep["AP"], thr=thr, report=rep, model_obj=booster)
        pbar.set_postfix(AP=f"{rep['AP']:.3f}", F1=f"{rep['F1']:.3f}", AUC=f"{rep['ROC_AUC']:.3f}")
    return best

def _cap_rows_for_tree_models(X, y, cap):
    if len(y) <= cap:
        return X, y
    keep = undersample_indices(y, max_rows=cap, neg_pos_ratio=NEG_POS_RATIO, seed=101)
    return X[keep], y[keep]

def tune_hgb(X_tr, y_tr, X_va, y_va, n_trials):
    best = None
    # HGB is already fast; small cap for safety
    X_tr2, y_tr2 = _cap_rows_for_tree_models(X_tr, y_tr, 120_000)
    pbar = tqdm(range(n_trials), desc="Tuning hgb", leave=True)
    for _ in pbar:
        params = sample_hgb_params()
        clf = HistGradientBoostingClassifier(**params)
        clf.fit(X_tr2, y_tr2)
        p = clf.predict_proba(X_va)[:,1]
        thr = choose_threshold_by_f1(y_va, p)
        rep = metrics_report(y_va, p, thr)
        if (best is None) or (rep["AP"] > best.valid_ap):
            best = TrialResult(params=params, best_iter=params["max_iter"],
                               valid_ap=rep["AP"], thr=thr, report=rep, model_obj=clf)
        pbar.set_postfix(AP=f"{rep['AP']:.3f}", F1=f"{rep['F1']:.3f}", AUC=f"{rep['ROC_AUC']:.3f}")
    return best

def _instantiate_rf(params):
    # allow running on older sklearn (no max_samples)
    try:
        clf = RandomForestClassifier(**params)
    except TypeError:
        params = params.copy()
        params.pop("max_samples", None)
        clf = RandomForestClassifier(**params)
    return clf

def tune_rf(X_tr, y_tr, X_va, y_va, n_trials):
    # STRONGER cap to avoid stalling first trial
    X_tr2, y_tr2 = _cap_rows_for_tree_models(X_tr, y_tr, RF_TUNE_MAX_ROWS)
    best = None
    pbar = tqdm(range(n_trials), desc="Tuning rf", leave=True)
    for _ in pbar:
        params = sample_rf_params()
        clf = _instantiate_rf(params)
        start = time.time()
        clf.fit(X_tr2, y_tr2)
        p = clf.predict_proba(X_va)[:,1]
        thr = choose_threshold_by_f1(y_va, p)
        rep = metrics_report(y_va, p, thr)
        dur = time.time() - start
        if (best is None) or (rep["AP"] > best.valid_ap):
            best = TrialResult(params=params, best_iter=None,
                               valid_ap=rep["AP"], thr=thr, report=rep, model_obj=clf)
        pbar.set_postfix(AP=f"{rep['AP']:.3f}", F1=f"{rep['F1']:.3f}", AUC=f"{rep['ROC_AUC']:.3f}", s=f"{dur:.1f}")
    return best

def tune_et(X_tr, y_tr, X_va, y_va, n_trials):
    X_tr2, y_tr2 = _cap_rows_for_tree_models(X_tr, y_tr, ET_TUNE_MAX_ROWS)
    best = None
    pbar = tqdm(range(n_trials), desc="Tuning et", leave=True)
    for _ in pbar:
        params = sample_et_params()
        clf = ExtraTreesClassifier(**params)
        start = time.time()
        clf.fit(X_tr2, y_tr2)
        p = clf.predict_proba(X_va)[:,1]
        thr = choose_threshold_by_f1(y_va, p)
        rep = metrics_report(y_va, p, thr)
        dur = time.time() - start
        if (best is None) or (rep["AP"] > best.valid_ap):
            best = TrialResult(params=params, best_iter=None,
                               valid_ap=rep["AP"], thr=thr, report=rep, model_obj=clf)
        pbar.set_postfix(AP=f"{rep['AP']:.3f}", F1=f"{rep['F1']:.3f}", AUC=f"{rep['ROC_AUC']:.3f}", s=f"{dur:.1f}")
    return best

# -------------------------
# Final fit using PRE (train+valid)
# -------------------------
def fit_full_lgbm(best, X_pre, y_pre):
    pos2, neg2 = int(y_pre.sum()), int(len(y_pre) - y_pre.sum())
    spw2 = max(1.0, neg2 / max(1, pos2)) if pos2 > 0 else 1.0
    params = best.params.copy()
    params["scale_pos_weight"] = float(spw2)
    with suppress_output():
        booster = lgb.train(
            params,
            train_set=lgb.Dataset(X_pre, label=y_pre),
            num_boost_round=best.best_iter,
            valid_sets=[],
            callbacks=[lgb.log_evaluation(period=0)],
        )
    return booster

def fit_full_hgb(best, X_pre, y_pre):
    params = best.params.copy()
    params["max_iter"] = int(best.best_iter) if best.best_iter is not None else params.get("max_iter", 200)
    clf = HistGradientBoostingClassifier(**params)
    clf.fit(X_pre, y_pre)
    return clf

def fit_full_sklearn(best, X_pre, y_pre, kind):
    params = best.params.copy()
    if kind == "rf":
        clf = _instantiate_rf(params)
    elif kind == "et":
        clf = ExtraTreesClassifier(**params)
    else:
        raise ValueError(kind)
    clf.fit(X_pre, y_pre)
    return clf

# -------------------------
# Train / Load orchestration
# -------------------------
meta = {
    "artifacts_version": ARTIFACTS_VERSION,
    "model_tag": MODEL_TAG,
    "models": RUN_MODELS.copy(),
    "feature_cols": list(feature_cols_all),
    "add_ticker_dummies": bool(ADD_TICKER_DUMMIES),
    "dates": {
        "train_start": str(pd.Timestamp(train_dates[0]).date()), "train_end": str(pd.Timestamp(train_dates[-1]).date()),
        "valid_start": str(pd.Timestamp(valid_dates[0]).date()), "valid_end": str(pd.Timestamp(valid_dates[-1]).date()),
        "apply_start": str(pd.Timestamp(apply_dates[0]).date()), "apply_end": str(pd.Timestamp(apply_dates[-1]).date()),
        "purge_days": int(PURGE_DAYS), "horizon": int(HORIZON), "val_days": int(VAL_DAYS),
        "valid_pos": float(VALID_POS)
    },
    "targets": {t: {} for t in TARGETS}
}
final_models = {}
metrics_list = []

if (MODE in ("load", "auto")) and not need_train:
    meta = meta_loaded if meta_loaded is not None else meta

if need_train:
    for target in TARGETS:
        X_tr, y_tr = X_train_full, y_train_full[target]
        X_va, y_va = X_valid_full, y_valid_full[target]
        X_tr, y_tr, X_va, y_va = undersample_for_tuning(X_tr, y_tr, X_va, y_va)

        for model_key in RUN_MODELS:
            n_trials = N_TRIALS.get(model_key, 8)

            if model_key == "lgbm":
                best = tune_lgbm(X_tr, y_tr, X_va, y_va, n_trials)
                if best is None:
                    meta["targets"].setdefault(target, {})[model_key] = None
                    final_models[(model_key, target)] = None
                    continue
                model_full = fit_full_lgbm(best, X_pre_full, y_pre_full[target])
                with suppress_output():
                    p_va = best.model_obj.predict(X_va, num_iteration=best.best_iter)
                thr = best.thr; rep = best.report

            elif model_key == "hgb":
                best = tune_hgb(X_tr, y_tr, X_va, y_va, n_trials)
                model_full = fit_full_hgb(best, X_pre_full, y_pre_full[target])
                p_va = best.model_obj.predict_proba(X_va)[:, 1]
                thr = best.thr; rep = best.report

            elif model_key == "rf":
                best = tune_rf(X_tr, y_tr, X_va, y_va, n_trials)
                model_full = fit_full_sklearn(best, X_pre_full, y_pre_full[target], "rf")
                p_va = best.model_obj.predict_proba(X_va)[:, 1]
                thr = best.thr; rep = best.report

            elif model_key == "et":
                best = tune_et(X_tr, y_tr, X_va, y_va, n_trials)
                model_full = fit_full_sklearn(best, X_pre_full, y_pre_full[target], "et")
                p_va = best.model_obj.predict_proba(X_va)[:, 1]
                thr = best.thr; rep = best.report

            else:
                raise ValueError(f"Unknown model_key: {model_key}")

            # Print VALID metrics
            print_report(target, model_key, rep, thr)

            # keep in memory
            final_models[(model_key, target)] = model_full

            # Save this model immediately + meta
            best_info = {
                "best_iter": int(best.best_iter) if best.best_iter is not None else None,
                "thr": float(thr),
                "rep": rep,
                "params": {k: to_py(v) for k,v in best.params.items()}
            }
            meta = upsert_meta_for_model(meta, model_key, target, best_info, feature_cols_all)
            save_model_and_meta(model_key, target, model_full, meta)

            # Collect VALID metrics
            row = {"model": model_key, "target": target, "threshold": float(thr)}
            row.update({k: rep[k] for k in ["AP","ROC_AUC","F1","Precision","Recall","BalancedAcc"]})
            tn, fp, fn, tp = rep["Confusion"]
            row.update({"tn":tn,"fp":fp,"fn":fn,"tp":tp})
            metrics_list.append(row)

else:
    # LOAD: align feature order and evaluate VALID
    saved_cols = meta["feature_cols"]
    def align_columns(mat_df, cols_expected):
        for c in cols_expected:
            if c not in mat_df.columns:
                mat_df[c] = 0.0
        extra = [c for c in mat_df.columns if c not in cols_expected and c not in TARGETS and not c.startswith("tk_")]
        if extra:
            mat_df = mat_df.drop(columns=extra)
        return mat_df[cols_expected]

    valid_frame = align_columns(LONG.loc[mask_valid].copy(), saved_cols)
    X_valid_aln  = valid_frame.values

    for target in TARGETS:
        for model_key in RUN_MODELS:
            model_obj = models_loaded.get((model_key, target))
            tgt_meta  = meta.get("targets", {}).get(target, {}).get(model_key, None)
            if model_obj is None or tgt_meta is None:
                print(f"[{model_key}/{target}] missing; train first.")
                continue
            thr = float(tgt_meta["threshold"])
            best_iter = tgt_meta.get("best_iter")
            if model_key == "lgbm":
                with suppress_output():
                    p_va = model_obj.predict(X_valid_aln, num_iteration=best_iter if best_iter else model_obj.best_iteration)
            else:
                p_va = model_obj.predict_proba(X_valid_aln)[:,1]
            y_va = LONG.loc[mask_valid, target].values.astype('int8')
            rep = metrics_report(y_va, p_va, thr)
            print_report(target, model_key, rep, thr)

            row = {"model": model_key, "target": target, "threshold": float(thr)}
            row.update({k: rep[k] for k in ["AP","ROC_AUC","F1","Precision","Recall","BalancedAcc"]})
            tn, fp, fn, tp = rep["Confusion"]
            row.update({"tn":tn,"fp":fp,"fn":fn,"tp":tp})
            metrics_list.append(row)

    # use loaded for APPLY
    final_models = {k: v for k, v in models_loaded.items() if v is not None}

# ============================================================
# APPLY (last 5 days) — aggregate outputs
# ============================================================
saved_cols = meta["feature_cols"]
def align_columns(df_like, cols_expected):
    for c in cols_expected:
        if c not in df_like.columns:
            df_like[c] = 0.0
    extra = [c for c in df_like.columns if c not in cols_expected and c not in TARGETS and not c.startswith("tk_")]
    if extra:
        df_like = df_like.drop(columns=extra)
    return df_like[cols_expected]

apply_df = LONG.loc[mask_apply].copy()
apply_df_aln = align_columns(apply_df.copy(), saved_cols)

# recover tickers from dummies for display
if ADD_TICKER_DUMMIES:
    tk_cols = [c for c in apply_df.columns if c.startswith("tk_")]
    def recover_ticker(row):
        if not tk_cols: return "N/A"
        idx = int(np.argmax(row[tk_cols].values))
        return tk_cols[idx].replace("tk_", "")
    apply_df["ticker_rec"] = apply_df.apply(recover_ticker, axis=1)
else:
    apply_df["ticker_rec"] = "N/A"

# predictions per model × target
for target in TARGETS:
    for model_key in RUN_MODELS:
        meta_t = meta.get("targets", {}).get(target, {}).get(model_key, None)
        model  = final_models.get((model_key, target))
        colp   = f"proba_{target}__{model_key}"
        coly   = f"pred_{target}__{model_key}"
        if (meta_t is None) or (model is None):
            apply_df[colp] = 0.0
            apply_df[coly] = 0
            continue
        thr = float(meta_t["threshold"])
        best_iter = meta_t.get("best_iter")
        if model_key == "lgbm":
            with suppress_output():
                p = model.predict(apply_df_aln.values, num_iteration=best_iter if best_iter else model.best_iteration)
        else:
            p = model.predict_proba(apply_df_aln.values)[:,1]
        apply_df[colp] = p
        apply_df[coly] = (p >= thr).astype(int)

# aggregated outputs & metrics df
cols_keep = ["ticker_rec"]
for target in TARGETS:
    for mk in RUN_MODELS:
        cols_keep += [f"proba_{target}__{mk}", f"pred_{target}__{mk}"]
APPLY_AGG = apply_df[cols_keep].copy()

METRICS_DF = pd.DataFrame(metrics_list).sort_values(["target","AP"], ascending=[True, False])

print("\n=== Metrics comparison (VALID) ===")
display(METRICS_DF)

print("\n=== Aggregated APPLY outputs (last 5 days) — head ===")
display(APPLY_AGG.head(50))

# Optional: Top-K per day (example: LGBM on target_up20)
apply_days = sorted(apply_df.index.unique())
def top_per_day(df_in, prob_col, k=TOPK_PER_DAY):
    rows=[]
    for d in apply_days:
        day = df_in.loc[df_in.index==d]
        rows.append(day.sort_values(prob_col, ascending=False).head(k).assign(date=d))
    return pd.concat(rows)

if "lgbm" in RUN_MODELS:
    print("\nTop-K by LGBM for target_up20:")
    display(top_per_day(apply_df, "proba_target_up20__lgbm")[["date","ticker_rec","proba_target_up20__lgbm","pred_target_up20__lgbm"]])

Mounted at /content/drive


Stacking tickers:   0%|          | 0/147 [00:00<?, ?it/s]

Long shape: (969759, 316)
Dates — TRAIN: 2005-01-03 → 2023-10-24 | VALID: 2023-11-24 → 2024-02-21 | APPLY(last 5): 2025-10-29 → 2025-11-02 | PURGE=30d, HORIZON=30d


Tuning lgbm:   0%|          | 0/12 [00:00<?, ?it/s]


[target_up20 | lgbm] VALID metrics
  AP:           0.4649
  ROC-AUC:      0.8250
  F1:           0.4555
  Precision:    0.4113
  Recall:       0.5105
  BalancedAcc:  0.7059
  Confusion:    tn=10504  fp=1151  fn=771  tp=804
  Threshold:    0.2890
[saved] lgbm/target_up20 → drive/MyDrive/Colab Notebooks/stock/models/multi_model_timeaware_v3_lgbm_target_up20.txt


Tuning hgb:   0%|          | 0/10 [00:00<?, ?it/s]


[target_up20 | hgb] VALID metrics
  AP:           0.4799
  ROC-AUC:      0.8373
  F1:           0.4812
  Precision:    0.4392
  Recall:       0.5321
  BalancedAcc:  0.7201
  Confusion:    tn=10585  fp=1070  fn=737  tp=838
  Threshold:    0.7032
[saved] hgb/target_up20 → drive/MyDrive/Colab Notebooks/stock/models/multi_model_timeaware_v3_hgb_target_up20.joblib


Tuning rf:   0%|          | 0/8 [00:00<?, ?it/s]


[target_up20 | rf] VALID metrics
  AP:           0.4056
  ROC-AUC:      0.8253
  F1:           0.4612
  Precision:    0.3529
  Recall:       0.6654
  BalancedAcc:  0.7502
  Confusion:    tn=9733  fp=1922  fn=527  tp=1048
  Threshold:    0.5069
[saved] rf/target_up20 → drive/MyDrive/Colab Notebooks/stock/models/multi_model_timeaware_v3_rf_target_up20.joblib


Tuning et:   0%|          | 0/8 [00:00<?, ?it/s]


[target_up20 | et] VALID metrics
  AP:           0.4836
  ROC-AUC:      0.8350
  F1:           0.4698
  Precision:    0.4233
  Recall:       0.5276
  BalancedAcc:  0.7152
  Confusion:    tn=10523  fp=1132  fn=744  tp=831
  Threshold:    0.6698
[saved] et/target_up20 → drive/MyDrive/Colab Notebooks/stock/models/multi_model_timeaware_v3_et_target_up20.joblib


Tuning lgbm:   0%|          | 0/12 [00:00<?, ?it/s]


[target_dd5 | lgbm] VALID metrics
  AP:           0.7498
  ROC-AUC:      0.8495
  F1:           0.7425
  Precision:    0.6966
  Recall:       0.7948
  BalancedAcc:  0.7865
  Confusion:    tn=6276  fp=1788  fn=1060  tp=4106
  Threshold:    0.6270
[saved] lgbm/target_dd5 → drive/MyDrive/Colab Notebooks/stock/models/multi_model_timeaware_v3_lgbm_target_dd5.txt


Tuning hgb:   0%|          | 0/10 [00:00<?, ?it/s]


[target_dd5 | hgb] VALID metrics
  AP:           0.7436
  ROC-AUC:      0.8502
  F1:           0.7433
  Precision:    0.6516
  Recall:       0.8651
  BalancedAcc:  0.7844
  Confusion:    tn=5675  fp=2389  fn=697  tp=4469
  Threshold:    0.6499
[saved] hgb/target_dd5 → drive/MyDrive/Colab Notebooks/stock/models/multi_model_timeaware_v3_hgb_target_dd5.joblib


Tuning rf:   0%|          | 0/8 [00:00<?, ?it/s]


[target_dd5 | rf] VALID metrics
  AP:           0.7532
  ROC-AUC:      0.8580
  F1:           0.7600
  Precision:    0.6746
  Recall:       0.8701
  BalancedAcc:  0.8006
  Confusion:    tn=5896  fp=2168  fn=671  tp=4495
  Threshold:    0.6474
[saved] rf/target_dd5 → drive/MyDrive/Colab Notebooks/stock/models/multi_model_timeaware_v3_rf_target_dd5.joblib


Tuning et:   0%|          | 0/8 [00:00<?, ?it/s]


[target_dd5 | et] VALID metrics
  AP:           0.7225
  ROC-AUC:      0.8469
  F1:           0.7561
  Precision:    0.6732
  Recall:       0.8622
  BalancedAcc:  0.7970
  Confusion:    tn=5902  fp=2162  fn=712  tp=4454
  Threshold:    0.6906
[saved] et/target_dd5 → drive/MyDrive/Colab Notebooks/stock/models/multi_model_timeaware_v3_et_target_dd5.joblib

=== Metrics comparison (VALID) ===


Unnamed: 0,model,target,threshold,AP,ROC_AUC,F1,Precision,Recall,BalancedAcc,tn,fp,fn,tp
6,rf,target_dd5,0.647419,0.753218,0.858049,0.759997,0.674621,0.870112,0.800632,5896,2168,671,4495
4,lgbm,target_dd5,0.627045,0.749784,0.849482,0.742495,0.696641,0.794812,0.786543,6276,1788,1060,4106
5,hgb,target_dd5,0.649872,0.743646,0.850244,0.743347,0.651648,0.865079,0.784412,5675,2389,697,4469
7,et,target_dd5,0.690619,0.722489,0.846886,0.756069,0.673216,0.862176,0.797035,5902,2162,712,4454
3,et,target_up20,0.66981,0.483576,0.835024,0.469757,0.423332,0.527619,0.715247,10523,1132,744,831
1,hgb,target_up20,0.703248,0.479915,0.837332,0.481194,0.439203,0.532063,0.720129,10585,1070,737,838
0,lgbm,target_up20,0.288982,0.4649,0.824987,0.455524,0.411253,0.510476,0.70586,10504,1151,771,804
2,rf,target_up20,0.506918,0.405592,0.825296,0.461166,0.352862,0.665397,0.750245,9733,1922,527,1048



=== Aggregated APPLY outputs (last 5 days) — head ===


Unnamed: 0_level_0,ticker_rec,proba_target_up20__lgbm,pred_target_up20__lgbm,proba_target_up20__hgb,pred_target_up20__hgb,proba_target_up20__rf,pred_target_up20__rf,proba_target_up20__et,pred_target_up20__et,proba_target_dd5__lgbm,pred_target_dd5__lgbm,proba_target_dd5__hgb,pred_target_dd5__hgb,proba_target_dd5__rf,pred_target_dd5__rf,proba_target_dd5__et,pred_target_dd5__et
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2025-10-29,HCTR11.SA,0.159178,0,0.163183,0,0.19737,0,0.593108,0,0.443626,0,0.52344,0,0.583356,0,0.698866,1
2025-10-29,SOL-USD,0.297239,1,0.725816,1,0.589879,1,0.671515,1,0.738883,1,0.894869,1,0.912273,1,0.761871,1
2025-10-29,B3SA3.SA,0.277697,0,0.307546,0,0.462647,0,0.652774,0,0.67037,1,0.874743,1,0.742848,1,0.722945,1
2025-10-29,PVBI11.SA,0.171424,0,0.072443,0,0.090293,0,0.419979,0,0.515848,0,0.54067,0,0.576213,0,0.66498,0
2025-10-29,BBSE3.SA,0.153,0,0.117814,0,0.241237,0,0.626869,0,0.544768,0,0.51891,0,0.616493,0,0.710651,1
2025-10-29,VGHF11.SA,0.159178,0,0.042539,0,0.087854,0,0.56766,0,0.325632,0,0.356438,0,0.431614,0,0.6363,0
2025-10-29,TUPY3.SA,0.254115,0,0.608985,0,0.56388,1,0.664034,0,0.685555,1,0.79178,1,0.802366,1,0.723585,1
2025-10-29,EMBR3.SA,0.268438,0,0.287297,0,0.334377,0,0.641679,0,0.694869,1,0.84115,1,0.753018,1,0.74823,1
2025-10-29,KNHY11.SA,0.151722,0,0.073408,0,0.065569,0,0.479248,0,0.361245,0,0.212638,0,0.329364,0,0.531484,0
2025-10-29,EGIE3.SA,0.220689,0,0.26508,0,0.255525,0,0.628785,0,0.623589,0,0.694978,1,0.72311,1,0.722816,1



Top-K by LGBM for target_up20:


Unnamed: 0_level_0,date,ticker_rec,proba_target_up20__lgbm,pred_target_up20__lgbm
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-29,2025-10-29,^SSE,0.343929,1
2025-10-29,2025-10-29,DOGE-USD,0.325967,1
2025-10-29,2025-10-29,RAIZ4.SA,0.324825,1
2025-10-29,2025-10-29,AZUL4.SA,0.323762,1
2025-10-29,2025-10-29,TCSA3.SA,0.321868,1
...,...,...,...,...
2025-11-02,2025-11-02,NATU3.SA,0.297322,1
2025-11-02,2025-11-02,POMO4.SA,0.297071,1
2025-11-02,2025-11-02,CSNA3.SA,0.296195,1
2025-11-02,2025-11-02,BEEF3.SA,0.294642,1


In [2]:
print("Última data no PARQUET:", LONG.index.max())
print("Faixa do APPLY:", apply_dates[0], "→", apply_dates[-1])
print("Qtd linhas por dia no APPLY:")
print(apply_df.groupby(apply_df.index.date).size().tail(10))


Última data no PARQUET: 2025-11-02 00:00:00
Faixa do APPLY: 2025-10-29 00:00:00 → 2025-11-02 00:00:00
Qtd linhas por dia no APPLY:
2025-10-29    147
2025-10-30    147
2025-10-31    147
2025-11-01    147
2025-11-02    147
dtype: int64
