In [1]:
# %%colab
# ============================================================
# Ensemblagem time-safe melhorada:
#  - Mantém: ens_weighted (AP otimizado, CV purgado) e stacked LR
#  - Remove: rank-percentile e stacked LightGBM (piores nos seus VALID)
#  - Adiciona:
#     * ens_weighted_recency  (AP com pesos por recência)
#     * ens_logit_weighted    (blend no espaço do logit)
#     * ens_trimmed_mean      (média podada contra outliers)
#     * ens_selection         (Caruana ensemble selection, forward com repetição)
#  - Continua: calibração isotônica OOF (CV purgado), threshold por F1
# ============================================================
from google.colab import drive
drive.mount('/content/drive')

import os, json, joblib, warnings, contextlib
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

import lightgbm as lgb
from sklearn.linear_model import LogisticRegression
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import (average_precision_score, roc_auc_score, f1_score,
                             precision_score, recall_score, balanced_accuracy_score,
                             confusion_matrix)
from sklearn.preprocessing import StandardScaler

warnings.filterwarnings("ignore")
np.set_printoptions(suppress=True, floatmode="fixed", precision=6)

# ------------------------- Config -------------------------
MODEL_DIR    = "drive/My Drive/Colab Notebooks/stock/models"
PARQUET_PATH = "drive/My Drive/Colab Notebooks/stock/expanded_stock_reduced.parquet"
MODEL_TAG    = "multi_model_timeaware_v3"

TARGETS        = ["target_up20", "target_dd5"]
APPLY_DAYS     = 5

# Outer split (como nos seus runs)
VALID_DAYS     = 180
VALID_POS      = 0.90
HORIZON        = 30
PURGE_DAYS     = HORIZON
MIN_TRAIN_DAYS = 45

# Inner CV (p/ calibração e stacking)
TEST_LEN_DAYS = 15
STEP_DAYS     = 3
MIN_META_ROWS = 2000
MIN_META_POS  = 20
MIN_META_NEG  = 20

ADD_TICKER_DUMMIES = True
TOPK_PER_DAY       = 20
ENSEMBLE_WEIGHT_TRIALS = 300
RECENCY_ALPHA      = 3.0     # maior => dá mais peso ao final do VALID
RNG = np.random.default_rng(42)

# ------------------ Utilidades ------------------
@contextlib.contextmanager
def suppress_output():
    import sys
    with open(os.devnull, "w") as devnull, \
         contextlib.redirect_stdout(devnull), \
         contextlib.redirect_stderr(devnull):
        yield

def robust_choose_thr_by_f1(y_true, proba):
    p = np.asarray(proba, dtype=np.float64)
    y = np.asarray(y_true, dtype=np.int8)
    if len(np.unique(p)) == 1:
        return 0.5
    qs = np.unique(np.quantile(p, np.linspace(0.01, 0.99, 199)))
    best_f1, best_thr = -1.0, 0.5
    for thr in qs:
        pred = (p >= thr)
        f1 = f1_score(y, pred, zero_division=0)
        if f1 > best_f1:
            best_f1, best_thr = f1, float(thr)
    return best_thr

def metrics_report(y_true, proba, thr):
    y = np.asarray(y_true, dtype=np.int8)
    p = np.asarray(proba, dtype=np.float64)
    pred = (p >= thr)
    tn, fp, fn, tp = confusion_matrix(y, pred, labels=[0,1]).ravel()
    try:
        auc = roc_auc_score(y, p)
    except Exception:
        auc = float("nan")
    return {
        "AP":           float(average_precision_score(y, p)),
        "ROC_AUC":      float(auc),
        "F1":           float(f1_score(y, pred, zero_division=0)),
        "Precision":    float(precision_score(y, pred, zero_division=0)),
        "Recall":       float(recall_score(y, pred, zero_division=0)),
        "BalancedAcc":  float(balanced_accuracy_score(y, pred)),
        "Confusion":    [int(tn), int(fp), int(fn), int(tp)],
    }

def print_report(label, model_key, rep, thr):
    tn, fp, fn, tp = rep["Confusion"]
    print(f"\n[{label} | {model_key}] VALID metrics")
    print(f"  AP:           {rep['AP']:.6f}")
    print(f"  ROC-AUC:      {rep['ROC_AUC']:.6f}")
    print(f"  F1:           {rep['F1']:.6f}")
    print(f"  Precision:    {rep['Precision']:.6f}")
    print(f"  Recall:       {rep['Recall']:.6f}")
    print(f"  BalancedAcc:  {rep['BalancedAcc']:.6f}")
    print(f"  Confusion:    tn={tn}  fp={fp}  fn={fn}  tp={tp}")
    print(f"  Threshold:    {thr:.6f}")

def _meta_path(tag):  return os.path.join(MODEL_DIR, f"{tag}_meta.json")
def _model_path(tag, model_key, target):
    ext = ".txt" if model_key == "lgbm" else ".joblib"
    return os.path.join(MODEL_DIR, f"{tag}_{model_key}_{target}{ext}")

# ------------------------- Carrega meta/modelos -------------------------
with open(_meta_path(MODEL_TAG), "r") as f:
    META = json.load(f)
RUN_MODELS     = META["models"]                  # ['lgbm','hgb','rf','et']
SAVED_FEATURES = META["feature_cols"]
print(f"[load] model_tag={META.get('model_tag')} | models in meta: {RUN_MODELS}")

MODELS = {}
loaded = 0
for t in TARGETS:
    for mk in RUN_MODELS:
        p = _model_path(MODEL_TAG, mk, t)
        if os.path.exists(p):
            if mk == "lgbm":
                with suppress_output():
                    MODELS[(mk, t)] = lgb.Booster(model_file=p)
            else:
                MODELS[(mk, t)] = joblib.load(p)
            loaded += 1
        else:
            MODELS[(mk, t)] = None
print(f"[load] models loaded: {loaded}/{len(TARGETS)*len(RUN_MODELS)}")

# -------------------- Carrega parquet reduzido --------------------
df = pd.read_parquet(PARQUET_PATH)
if not isinstance(df.columns, pd.MultiIndex):
    raise ValueError("Parquet deve ter MultiIndex (nível 0: feature/target; nível 1: ticker).")
if not np.issubdtype(df.index.dtype, np.datetime64):
    df.index = pd.to_datetime(df.index)
df = df.sort_index()

lvl0 = df.columns.get_level_values(0).astype(str)
lvl1 = df.columns.get_level_values(1).astype(str)
is_target  = lvl0.str.startswith("target_")
feat_names = sorted(set(lvl0[~is_target]))
tickers    = sorted(set(lvl1[(lvl0 == TARGETS[0])]) & set(lvl1[(lvl0 == TARGETS[1])]))

rows = []
for tk in tqdm(tickers, desc="Stacking tickers"):
    X_tk = df.xs(tk, level=1, axis=1)
    X_tk = X_tk.loc[:, [c for c in X_tk.columns if not str(c).startswith("target_")]] \
             .reindex(columns=feat_names, fill_value=0.0)
    y_up = df[('target_up20', tk)].astype('int8')
    y_dd = df[('target_dd5',  tk)].astype('int8')
    blk = X_tk.copy()
    blk['target_up20'] = y_up.values
    blk['target_dd5']  = y_dd.values
    blk['ticker']      = tk
    rows.append(blk)

LONG = pd.concat(rows, axis=0).sort_index()
if ADD_TICKER_DUMMIES:
    dummies = pd.get_dummies(LONG['ticker'], prefix='tk', dtype=np.uint8)
    LONG = pd.concat([LONG.drop(columns=['ticker']), dummies], axis=1)
else:
    LONG = LONG.drop(columns=['ticker'])

feature_cols_all = [c for c in LONG.columns if c not in TARGETS]
LONG[feature_cols_all] = (LONG[feature_cols_all]
                          .apply(pd.to_numeric, errors='coerce')
                          .replace([np.inf, -np.inf], np.nan)
                          .fillna(0.0).astype('float32'))
for t in TARGETS:
    LONG[t] = LONG[t].astype('int8')

print("Long shape:", LONG.shape)

# -------------- Split externo (TRAIN / VALID / APPLY) --------------
dates_all = np.array(sorted(LONG.index.unique()))
apply_dates    = dates_all[-APPLY_DAYS:]
preapply_dates = dates_all[:-APPLY_DAYS]
n_pre = len(preapply_dates)

center = int(round(VALID_POS * (n_pre - 1)))
v_start = max(PURGE_DAYS + MIN_TRAIN_DAYS, center - VALID_DAYS // 2)
v_end   = min(n_pre, v_start + VALID_DAYS)
v_start = max(PURGE_DAYS + MIN_TRAIN_DAYS, v_end - VALID_DAYS)

valid_dates = preapply_dates[v_start:v_end]
train_end   = max(0, v_start - PURGE_DAYS)
train_dates = preapply_dates[:train_end]

mask_apply = LONG.index.isin(apply_dates)
mask_valid = LONG.index.isin(valid_dates)
mask_train = LONG.index.isin(train_dates)

print(f"Dates — TRAIN: {pd.Timestamp(train_dates[0]).date()} → {pd.Timestamp(train_dates[-1]).date()} "
      f"| VALID: {pd.Timestamp(valid_dates[0]).date()} → {pd.Timestamp(valid_dates[-1]).date()} "
      f"| APPLY(last {APPLY_DAYS}): {pd.Timestamp(apply_dates[0]).date()} → {pd.Timestamp(apply_dates[-1]).date()} "
      f"| PURGE={PURGE_DAYS}d, HORIZON={HORIZON}d, MIN_TRAIN_DAYS={MIN_TRAIN_DAYS}d")

def align_columns(df_like, cols_expected):
    for c in cols_expected:
        if c not in df_like.columns:
            df_like[c] = 0.0
    extra = [c for c in df_like.columns
             if c not in cols_expected and c not in TARGETS and not c.startswith("tk_")]
    if extra:
        df_like = df_like.drop(columns=extra)
    return df_like[cols_expected]

valid_frame = align_columns(LONG.loc[mask_valid].copy(), META["feature_cols"])
apply_frame = align_columns(LONG.loc[mask_apply].copy(), META["feature_cols"])

X_valid = valid_frame.values
X_apply = apply_frame.values
valid_rows_dates = LONG.index[mask_valid]
valid_unique     = np.array(sorted(pd.Index(valid_rows_dates).unique()))

# Filtra linhas válidas por target (evita NaN nos alvos)
y_valid_raw = {t: LONG.loc[mask_valid, t] for t in TARGETS}
valid_masks = {t: y_valid_raw[t].notna().values for t in TARGETS}
y_valid = {t: y_valid_raw[t][valid_masks[t]].values.astype('int8') for t in TARGETS}
valid_rows_dates_by_target = {t: valid_rows_dates[valid_masks[t]] for t in TARGETS}

# ------------------ Splits purgados (dentro VALID) ------------------
def make_purged_rolling_splits(unique_dates, test_len_days=15, step_days=3, purge_days=30):
    m = len(unique_dates)
    if m <= purge_days + 5:
        s = max(0, m - test_len_days)
        return [(unique_dates[:max(0, s - purge_days)], unique_dates[s:])]
    starts = list(range(purge_days, max(purge_days+1, m - test_len_days + 1), step_days))
    splits = []
    for s in starts:
        e = min(m, s + test_len_days)
        tr = unique_dates[:max(0, s - purge_days)]
        te = unique_dates[s:e]
        if len(te) > 0:
            splits.append((tr, te))
    if not splits:
        s = max(purge_days, m - test_len_days)
        splits.append((unique_dates[:max(0, s - purge_days)], unique_dates[s:]))
    return splits

def mask_rows_by_dates(row_dates, train_dates, test_dates):
    tr_mask = row_dates.isin(train_dates)
    te_mask = row_dates.isin(test_dates)
    return np.where(tr_mask)[0], np.where(te_mask)[0]

CV_SPLITS = make_purged_rolling_splits(valid_unique,
                                       test_len_days=TEST_LEN_DAYS,
                                       step_days=STEP_DAYS,
                                       purge_days=PURGE_DAYS)

# ------------------ Score dos modelos base ------------------
BASE_VALID = {t: {} for t in TARGETS}
BASE_APPLY = {t: {} for t in TARGETS}

print("\nScoring base models on VALID/APPLY:")
for target in TARGETS:
    for mk in RUN_MODELS:
        mdl = MODELS.get((mk, target))
        meta_t = META.get("targets", {}).get(target, {}).get(mk, None)
        if mdl is None or meta_t is None:
            print(f"  - {mk}/{target}: missing (skipping)")
            continue
        best_iter = meta_t.get("best_iter")
        mask_t = valid_masks[target]
        if mask_t.sum() == 0:
            print(f"  - {mk}/{target}: no valid rows after filtering (skipping)")
            continue
        X_valid_t = X_valid[mask_t]
        row_dates_t = valid_rows_dates_by_target[target]
        y_valid_t = y_valid[target]

        with suppress_output():
            if mk == "lgbm":
                p_va = mdl.predict(X_valid_t, num_iteration=best_iter or mdl.best_iteration)
                p_ap = mdl.predict(X_apply,  num_iteration=best_iter or mdl.best_iteration)
            else:
                p_va = mdl.predict_proba(X_valid_t)[:, 1]
                p_ap = mdl.predict_proba(X_apply)[:, 1]

        BASE_VALID[target][mk] = np.clip(p_va.astype('float64'), 1e-6, 1-1e-6)
        BASE_APPLY[target][mk] = np.clip(p_ap.astype('float64'), 1e-6, 1-1e-6)
        print(f"  - {mk}/{target}: VALID/APPLY done")

# ------------------ Calibração isotônica OOF ------------------
def oof_isotonic(y_true, proba, row_dates, cv_splits,
                 min_rows=MIN_META_ROWS, min_pos=MIN_META_POS, min_neg=MIN_META_NEG):
    y = np.asarray(y_true, dtype=np.int8)
    p = np.asarray(proba, dtype=np.float64)
    oof = np.full_like(p, np.nan, dtype=float)
    covered = np.zeros(len(p), dtype=bool)
    for tr_dates, te_dates in cv_splits:
        tr_idx, te_idx = mask_rows_by_dates(row_dates, tr_dates, te_dates)
        if len(te_idx) == 0 or len(tr_idx) < min_rows:
            continue
        y_tr = y[tr_idx]
        pos = int(y_tr.sum()); neg = int(len(y_tr) - pos)
        if pos < min_pos or neg < min_neg:
            continue
        iso = IsotonicRegression(out_of_bounds="clip")
        iso.fit(p[tr_idx], y_tr)
        oof[te_idx] = iso.transform(p[te_idx])
        covered[te_idx] = True
    cover_rate = covered.mean()
    oof[~covered] = p[~covered]
    # para APPLY: calibrador full (fallback se classes presentes)
    if y.sum() > 0 and y.sum() < len(y):
        iso_full = IsotonicRegression(out_of_bounds="clip")
        iso_full.fit(p, y)
    else:
        iso_full = None
    return oof, iso_full, cover_rate

def apply_iso(iso, proba):
    return proba if iso is None else iso.transform(proba)

CAL_VALID = {t: {} for t in TARGETS}
CAL_APPLY = {t: {} for t in TARGETS}

print("\nCalibrating base models (isotonic, purged CV):")
for target in TARGETS:
    y_va = y_valid[target]
    if len(y_va) == 0:
        print(f"[{target}] No valid rows for calibration — skipping")
        continue
    for mk, p_va in BASE_VALID[target].items():
        oof, iso_full, cov = oof_isotonic(y_va, p_va, valid_rows_dates_by_target[target], CV_SPLITS)
        p_ap = BASE_APPLY[target][mk]
        CAL_VALID[target][mk] = np.clip(oof, 1e-6, 1-1e-6)
        CAL_APPLY[target][mk] = np.clip(apply_iso(iso_full, p_ap), 1e-6, 1-1e-6)
        print(f"  - {mk}/{target}: OOF coverage={cov*100:.1f}%")

# --------- Meta-features (para stacking LR) ---------
def build_meta_matrix(pred_dict):
    keys = list(pred_dict.keys())
    mat = np.vstack([pred_dict[k] for k in keys]).T  # (n, M)
    mean_ = mat.mean(axis=1, keepdims=True)
    std_  = mat.std(axis=1, keepdims=True)
    mn    = mat.min(axis=1, keepdims=True)
    mx    = mat.max(axis=1, keepdims=True)
    rng   = mx - mn
    disc  = np.sum((mat >= 0.5), axis=1, keepdims=True)
    centered = mat - mean_
    Xmeta = np.hstack([mat, mean_, std_, mn, mx, rng, disc, centered])
    return Xmeta, keys

# ----------------- Funções de ensemble -----------------
def equal_weight_avg(preds_dict):
    arr = np.vstack([v for v in preds_dict.values()]).T
    return arr.mean(axis=1)

def cv_weight_search(y_true, preds_dict, row_dates, cv_splits, trials=300, seed=42,
                     sample_weight=None, logit_space=False):
    """Random search de pesos (>=0, soma=1) maximizando AP médio nos folds.
       - sample_weight: vetor (opcional) de pesos por amostra (para dar mais peso ao fim do VALID)
       - logit_space: se True, combina no espaço do logit (geo-mean das probabilidades)
    """
    keys = list(preds_dict.keys())
    base = np.vstack([preds_dict[k] for k in keys]).T  # (n, M)
    if logit_space:
        eps = 1e-6
        base = np.log(base.clip(eps, 1-eps) / (1 - base.clip(eps, 1-eps)))

    rng = np.random.default_rng(seed)
    best_w, best_score = None, -1.0
    pbar = tqdm(range(trials), desc="  weight search (purged CV)", leave=False)
    for _ in pbar:
        raw = rng.random(len(keys)) + 1e-8
        w = raw / raw.sum()
        ap_scores = []
        for tr_dates, te_dates in cv_splits:
            _, te_idx = mask_rows_by_dates(row_dates, tr_dates, te_dates)
            if len(te_idx) == 0:
                continue
            blend_lin = base[te_idx].dot(w)
            if logit_space:
                # volta de logit -> prob
                blend = 1.0 / (1.0 + np.exp(-blend_lin))
            else:
                blend = blend_lin
            if sample_weight is not None:
                ap = average_precision_score(y_true[te_idx], blend, sample_weight=sample_weight[te_idx])
            else:
                ap = average_precision_score(y_true[te_idx], blend)
            ap_scores.append(ap)
        if not ap_scores:
            continue
        score = float(np.mean(ap_scores))
        if score > best_score:
            best_score, best_w = score, w.copy()
        pbar.set_postfix(AP=f"{score:.3f}", best=f"{best_score:.3f}")
    if best_w is None:
        best_w = np.ones(len(keys))/len(keys)
    return best_w, keys

def recency_weights(row_dates, alpha=3.0):
    """Gera pesos exponenciais por recência dentro do VALID (normalizados em [0,1] nas datas únicas)."""
    uniq = np.array(sorted(pd.Index(row_dates).unique()))
    pos = {d:i for i,d in enumerate(uniq)}
    idx = np.array([pos[d] for d in row_dates])
    t = idx / (len(uniq)-1 + 1e-9)
    w = np.exp(alpha * t)  # mais peso no fim
    return w

def logit_blend(preds_dict, weights):
    keys = list(preds_dict.keys())
    M = len(keys)
    W = np.asarray(weights, dtype=float)
    W /= W.sum()
    eps = 1e-6
    logits = np.vstack([np.log(preds_dict[k].clip(eps,1-eps)/(1-preds_dict[k].clip(eps,1-eps))) for k in keys]).T
    agg = logits.dot(W)
    return 1.0 / (1.0 + np.exp(-agg))

def trimmed_mean_blend(preds_dict, trim_k=1):
    """Remove menor e maior (ou até k extremos de cada lado, se trim_k>1) e faz média."""
    mat = np.vstack([preds_dict[k] for k in preds_dict.keys()])  # (M, n)
    M, n = mat.shape
    if M <= 2*trim_k:
        return mat.mean(axis=0)
    sort_idx = np.argsort(mat, axis=0)  # ranks por coluna
    keep_mask = np.ones_like(mat, dtype=bool)
    for j in range(n):
        if trim_k>0:
            keep_mask[sort_idx[:trim_k, j], j] = False
            keep_mask[sort_idx[-trim_k:, j], j] = False
    kept = np.where(keep_mask, mat, np.nan)
    return np.nanmean(kept, axis=0)

def ensemble_selection_forward(y_true, preds_dict, row_dates, cv_splits, steps=32, tol=1e-4):
    """Caruana Ensemble Selection (forward com repetição) otimizando AP nos folds purgados."""
    keys = list(preds_dict.keys())
    base = {k: preds_dict[k] for k in keys}
    n = len(y_true)
    current = np.zeros(n, dtype=float)
    counts = {k:0 for k in keys}
    best_score = -1.0

    def score_of(pred):
        aps = []
        for tr_dates, te_dates in cv_splits:
            _, te_idx = mask_rows_by_dates(row_dates, tr_dates, te_dates)
            if len(te_idx)==0:
                continue
            aps.append(average_precision_score(y_true[te_idx], pred[te_idx]))
        return float(np.mean(aps)) if aps else -1.0

    for step in range(steps):
        cand_best, cand_key = None, None
        for k in keys:
            trial = (current * (step/(step+1))) + (base[k] / (step+1))
            sc = score_of(trial)
            if sc > best_score + tol:
                best_score = sc
                cand_best = trial
                cand_key = k
        if cand_key is None:
            break
        current = cand_best
        counts[cand_key] += 1

    total = sum(counts.values())
    if total == 0:  # fallback
        w = np.ones(len(keys))/len(keys)
    else:
        w = np.array([counts[k] for k in keys], dtype=float) / total
    return w, keys

def purged_stacked_lr(y_true, preds_dict, row_dates, cv_splits,
                      min_rows=MIN_META_ROWS, min_pos=MIN_META_POS, min_neg=MIN_META_NEG):
    Xmeta_all, keys = build_meta_matrix(preds_dict)
    y = np.asarray(y_true, dtype=np.int8)
    oof = np.full(len(y), np.nan, dtype=float)
    covered = np.zeros(len(y), dtype=bool)

    for tr_dates, te_dates in cv_splits:
        tr_idx, te_idx = mask_rows_by_dates(row_dates, tr_dates, te_dates)
        if len(te_idx) == 0 or len(tr_idx) < min_rows:
            continue
        y_tr = y[tr_idx]
        pos = int(y_tr.sum()); neg = int(len(y_tr)-pos)
        if pos < min_pos or neg < min_neg:
            continue
        Xtr = Xmeta_all[tr_idx]; Xte = Xmeta_all[te_idx]
        sc  = StandardScaler(with_mean=True, with_std=True)
        Xtr_s = sc.fit_transform(Xtr); Xte_s = sc.transform(Xte)
        clf = LogisticRegression(solver="lbfgs", max_iter=500, class_weight="balanced", C=1.0)
        clf.fit(Xtr_s, y_tr)
        oof[te_idx] = clf.predict_proba(Xte_s)[:, 1]
        covered[te_idx] = True

    print(f"[stackLR] OOF coverage: {covered.mean()*100:.1f}%")
    fallback = equal_weight_avg(preds_dict)
    oof[~covered] = fallback[~covered]

    # meta final (para APPLY)
    scF = StandardScaler(with_mean=True, with_std=True)
    XF  = scF.fit_transform(Xmeta_all)
    clfF = LogisticRegression(solver="lbfgs", max_iter=500, class_weight="balanced", C=1.0)
    clfF.fit(XF, y)
    return oof, (scF, clfF)

# ------------------ Ensemblagem (sem rank & sem stackGBM) ------------------
print("\nCalibrated bases ready → building improved ensembles ...")
PRED_SOURCE = {"valid": CAL_VALID, "apply": CAL_APPLY}

ALL_METRICS = []
ENSEMBLE_VALID = {t: {} for t in TARGETS}
ENSEMBLE_APPLY = {t: {} for t in TARGETS}

def add_entry(target, key, proba_va, thr, rpt, proba_ap):
    ENSEMBLE_VALID[target][key] = (proba_va, thr, rpt)
    ENSEMBLE_APPLY[target][key] = proba_ap
    tn, fp, fn, tp = rpt["Confusion"]
    ALL_METRICS.append({"target": target, "model": key, "threshold": thr,
                        "AP": rpt["AP"], "ROC_AUC": rpt["ROC_AUC"], "F1": rpt["F1"],
                        "Precision": rpt["Precision"], "Recall": rpt["Recall"],
                        "BalancedAcc": rpt["BalancedAcc"], "tn": tn, "fp": fp, "fn": fn, "tp": tp})

print("\n=== Ensembles (purged inside VALID; refined) ===")
for target in TARGETS:
    y_va = y_valid[target]
    if len(y_va) == 0:
        print(f"[{target}] No valid rows for ensemble — skipping")
        continue
    base_va = PRED_SOURCE["valid"][target]
    base_ap = PRED_SOURCE["apply"][target]
    keys = list(base_va.keys())
    if not keys:
        print(f"[{target}] No base predictions — skipping.")
        continue

    # 1) Weighted linear blend (AP CV) — baseline vencedor
    print(f"\n[{target}] 1) ens_weighted — CV-optimized weights (AP)")
    w_lin, keys_w = cv_weight_search(y_va, base_va, valid_rows_dates_by_target[target], CV_SPLITS,
                                     trials=ENSEMBLE_WEIGHT_TRIALS, seed=42, sample_weight=None, logit_space=False)
    M_va = np.vstack([base_va[k] for k in keys_w]).T
    M_ap = np.vstack([base_ap[k] for k in keys_w]).T
    p_va_w = M_va.dot(w_lin)
    p_ap_w = M_ap.dot(w_lin)
    thr_w  = robust_choose_thr_by_f1(y_va, p_va_w)
    rpt_w  = metrics_report(y_va, p_va_w, thr_w)
    print(f"  best weights: {dict(zip(keys_w, [float(x) for x in w_lin]))}")
    print_report(target, "ens_weighted", rpt_w, thr_w)
    add_entry(target, "ens_weighted", p_va_w, thr_w, rpt_w, p_ap_w)

    # 2) Weighted linear blend com recência
    print(f"[{target}] 2) ens_weighted_recency — CV-optimized weights (AP, recency-weighted)")
    sw = recency_weights(valid_rows_dates_by_target[target], alpha=RECENCY_ALPHA)
    w_rec, keys_r = cv_weight_search(y_va, base_va, valid_rows_dates_by_target[target], CV_SPLITS,
                                     trials=ENSEMBLE_WEIGHT_TRIALS, seed=123, sample_weight=sw, logit_space=False)
    M_va_r = np.vstack([base_va[k] for k in keys_r]).T
    M_ap_r = np.vstack([base_ap[k] for k in keys_r]).T
    p_va_r = M_va_r.dot(w_rec)
    p_ap_r = M_ap_r.dot(w_rec)
    thr_r  = robust_choose_thr_by_f1(y_va, p_va_r)
    rpt_r  = metrics_report(y_va, p_va_r, thr_r)
    print(f"  best weights (recency): {dict(zip(keys_r, [float(x) for x in w_rec]))}")
    print_report(target, "ens_weighted_recency", rpt_r, thr_r)
    add_entry(target, "ens_weighted_recency", p_va_r, thr_r, rpt_r, p_ap_r)

    # 3) Logit-weighted (geo-mean das probs)
    print(f"[{target}] 3) ens_logit_weighted — weight search no espaço do logit")
    w_logit, keys_l = cv_weight_search(y_va, base_va, valid_rows_dates_by_target[target], CV_SPLITS,
                                       trials=ENSEMBLE_WEIGHT_TRIALS, seed=7, sample_weight=None, logit_space=True)
    p_va_log = logit_blend(base_va, w_logit)
    # apply:
    base_ap_sub = {k: base_ap[k] for k in keys_l}
    p_ap_log = logit_blend(base_ap_sub, w_logit)
    thr_log = robust_choose_thr_by_f1(y_va, p_va_log)
    rpt_log = metrics_report(y_va, p_va_log, thr_log)
    print(f"  best weights (logit): {dict(zip(keys_l, [float(x) for x in w_logit]))}")
    print_report(target, "ens_logit_weighted", rpt_log, thr_log)
    add_entry(target, "ens_logit_weighted", p_va_log, thr_log, rpt_log, p_ap_log)

    # 4) Trimmed-mean (robusto a outliers entre modelos)
    print(f"[{target}] 4) ens_trimmed_mean — média podada (trim_k=1)")
    p_va_tm = trimmed_mean_blend(base_va, trim_k=1)
    thr_tm  = robust_choose_thr_by_f1(y_va, p_va_tm)
    rpt_tm  = metrics_report(y_va, p_va_tm, thr_tm)
    print_report(target, "ens_trimmed_mean", rpt_tm, thr_tm)
    # apply:
    p_ap_tm = trimmed_mean_blend(base_ap, trim_k=1)
    add_entry(target, "ens_trimmed_mean", p_va_tm, thr_tm, rpt_tm, p_ap_tm)

    # 5) Ensemble Selection (Caruana) — forward com repetição
    print(f"[{target}] 5) ens_selection — forward selection (AP, CV purgado)")
    w_sel, keys_s = ensemble_selection_forward(y_va, base_va, valid_rows_dates_by_target[target], CV_SPLITS,
                                               steps=32, tol=1e-4)
    M_va_s = np.vstack([base_va[k] for k in keys_s]).T
    M_ap_s = np.vstack([base_ap[k] for k in keys_s]).T
    p_va_s = M_va_s.dot(w_sel)
    p_ap_s = M_ap_s.dot(w_sel)
    thr_s  = robust_choose_thr_by_f1(y_va, p_va_s)
    rpt_s  = metrics_report(y_va, p_va_s, thr_s)
    print(f"  selected weights: {dict(zip(keys_s, [float(x) for x in w_sel]))}")
    print_report(target, "ens_selection", rpt_s, thr_s)
    add_entry(target, "ens_selection", p_va_s, thr_s, rpt_s, p_ap_s)

    # 6) Stacked Logistic Regression (meta-features) — mantido
    print(f"[{target}] 6) ens_stackLR — stacking LR (purged)")
    oof_lr, (scLR, clfLR) = purged_stacked_lr(y_va, base_va, valid_rows_dates_by_target[target], CV_SPLITS,
                                              min_rows=MIN_META_ROWS, min_pos=MIN_META_POS, min_neg=MIN_META_NEG)
    thr_lr = robust_choose_thr_by_f1(y_va, oof_lr)
    rpt_lr = metrics_report(y_va, oof_lr, thr_lr)
    print_report(target, "ens_stackLR", rpt_lr, thr_lr)
    Xmeta_ap, _ = build_meta_matrix(base_ap)
    Xmeta_ap_s = scLR.transform(Xmeta_ap)
    p_ap_lr = clfLR.predict_proba(Xmeta_ap_s)[:, 1]
    add_entry(target, "ens_stackLR", oof_lr, thr_lr, rpt_lr, p_ap_lr)

# -------------------- Tabela de métricas --------------------
METRICS_DF = pd.DataFrame(ALL_METRICS).sort_values(["target","AP"], ascending=[True, False])
print("\n=== Ensemble metrics comparison (VALID; purged) ===")
print(METRICS_DF.to_string(index=False))

# -------------------- APPLY (últimos 5 dias) --------------------
APPLY_OUT = LONG.loc[mask_apply, TARGETS].copy()
if ADD_TICKER_DUMMIES:
    tk_cols = [c for c in LONG.columns if c.startswith("tk_")]
    APPLY_OUT["ticker_rec"] = LONG.loc[mask_apply, tk_cols].idxmax(axis=1).str.replace("tk_", "", regex=False)
else:
    APPLY_OUT["ticker_rec"] = "N/A"

# Exporta prob/pred para todos os ensembles acima
for target in TARGETS:
    for ens in ENSEMBLE_APPLY[target].keys():
        APPLY_OUT[f"proba_{target}__{ens}"] = ENSEMBLE_APPLY[target][ens]
        thr = ENSEMBLE_VALID[target][ens][1]
        APPLY_OUT[f"pred_{target}__{ens}"]  = (ENSEMBLE_APPLY[target][ens] >= thr).astype(int)

print("\n=== APPLY (last 5 days) — head (first 50 rows) ===")
print(APPLY_OUT.head(50).to_string())

def top_per_day(df_apply, prob_col, k=TOPK_PER_DAY):
    out = []
    for d, sl in df_apply.groupby(level=0):
        s = sl.sort_values(prob_col, ascending=False).head(k).copy()
        s.insert(0, "date", d)
        out.append(s)
    return pd.concat(out)

for target, ens in [(TARGETS[0], "ens_weighted"), (TARGETS[1], "ens_weighted")]:
    col = f"proba_{target}__{ens}"
    if col in APPLY_OUT.columns:
        print(f"\nTop-{TOPK_PER_DAY} per day — {ens} on {target}")
        print(top_per_day(APPLY_OUT, col).to_string())

Mounted at /content/drive
[load] model_tag=multi_model_timeaware_v3 | models in meta: ['lgbm', 'hgb', 'rf', 'et']
[load] models loaded: 8/8


Stacking tickers:   0%|          | 0/147 [00:00<?, ?it/s]

Long shape: (969759, 316)
Dates — TRAIN: 2005-01-03 → 2023-09-09 | VALID: 2023-10-10 → 2024-04-06 | APPLY(last 5): 2025-10-29 → 2025-11-02 | PURGE=30d, HORIZON=30d, MIN_TRAIN_DAYS=45d

Scoring base models on VALID/APPLY:
  - lgbm/target_up20: VALID/APPLY done
  - hgb/target_up20: VALID/APPLY done
  - rf/target_up20: VALID/APPLY done
  - et/target_up20: VALID/APPLY done
  - lgbm/target_dd5: VALID/APPLY done
  - hgb/target_dd5: VALID/APPLY done
  - rf/target_dd5: VALID/APPLY done
  - et/target_dd5: VALID/APPLY done

Calibrating base models (isotonic, purged CV):
  - lgbm/target_up20: OOF coverage=75.0%
  - hgb/target_up20: OOF coverage=75.0%
  - rf/target_up20: OOF coverage=75.0%
  - et/target_up20: OOF coverage=75.0%
  - lgbm/target_dd5: OOF coverage=75.0%
  - hgb/target_dd5: OOF coverage=75.0%
  - rf/target_dd5: OOF coverage=75.0%
  - et/target_dd5: OOF coverage=75.0%

Calibrated bases ready → building improved ensembles ...

=== Ensembles (purged inside VALID; refined) ===

[target_up

  weight search (purged CV):   0%|          | 0/300 [00:00<?, ?it/s]

  best weights: {'lgbm': 0.0016985464357251168, 'hgb': 0.11462318314794669, 'rf': 0.8824171102854566, 'et': 0.0012611601308715754}

[target_up20 | ens_weighted] VALID metrics
  AP:           0.739004
  ROC-AUC:      0.927722
  F1:           0.691457
  Precision:    0.661334
  Recall:       0.724454
  BalancedAcc:  0.831987
  Confusion:    tn=21375  fp=1376  fn=1022  tp=2687
  Threshold:    0.575229
[target_up20] 2) ens_weighted_recency — CV-optimized weights (AP, recency-weighted)


  weight search (purged CV):   0%|          | 0/300 [00:00<?, ?it/s]

  best weights (recency): {'lgbm': 0.022910336619996474, 'hgb': 0.008107905500381644, 'rf': 0.8228939186383311, 'et': 0.1460878392412908}

[target_up20 | ens_weighted_recency] VALID metrics
  AP:           0.736237
  ROC-AUC:      0.922247
  F1:           0.690747
  Precision:    0.671160
  Recall:       0.711513
  BalancedAcc:  0.827340
  Confusion:    tn=21458  fp=1293  fn=1070  tp=2639
  Threshold:    0.580183
[target_up20] 3) ens_logit_weighted — weight search no espaço do logit


  weight search (purged CV):   0%|          | 0/300 [00:00<?, ?it/s]

  best weights (logit): {'lgbm': 0.0040471726700526175, 'hgb': 0.2553253533100351, 'rf': 0.6590974631397676, 'et': 0.08153001088014475}

[target_up20 | ens_logit_weighted] VALID metrics
  AP:           0.732513
  ROC-AUC:      0.925146
  F1:           0.685682
  Precision:    0.715962
  Recall:       0.657859
  BalancedAcc:  0.807656
  Confusion:    tn=21783  fp=968  fn=1269  tp=2440
  Threshold:    0.641091
[target_up20] 4) ens_trimmed_mean — média podada (trim_k=1)

[target_up20 | ens_trimmed_mean] VALID metrics
  AP:           0.699356
  ROC-AUC:      0.910022
  F1:           0.659829
  Precision:    0.688967
  Recall:       0.633055
  BalancedAcc:  0.793232
  Confusion:    tn=21691  fp=1060  fn=1361  tp=2348
  Threshold:    0.624435
[target_up20] 5) ens_selection — forward selection (AP, CV purgado)
  selected weights: {'lgbm': 0.0, 'hgb': 0.0, 'rf': 1.0, 'et': 0.0}

[target_up20 | ens_selection] VALID metrics
  AP:           0.739100
  ROC-AUC:      0.926713
  F1:           0.6932

  weight search (purged CV):   0%|          | 0/300 [00:00<?, ?it/s]

  best weights: {'lgbm': 0.013233930042403903, 'hgb': 0.21808173381455218, 'rf': 0.12517859313051222, 'et': 0.6435057430125316}

[target_dd5 | ens_weighted] VALID metrics
  AP:           0.690117
  ROC-AUC:      0.803507
  F1:           0.716015
  Precision:    0.609254
  Recall:       0.868142
  BalancedAcc:  0.746156
  Confusion:    tn=9860  fp=5937  fn=1406  tp=9257
  Threshold:    0.332204
[target_dd5] 2) ens_weighted_recency — CV-optimized weights (AP, recency-weighted)


  weight search (purged CV):   0%|          | 0/300 [00:00<?, ?it/s]

  best weights (recency): {'lgbm': 0.01683337975517008, 'hgb': 0.028031569865505174, 'rf': 0.35046464908877467, 'et': 0.60467040129055}

[target_dd5 | ens_weighted_recency] VALID metrics
  AP:           0.685600
  ROC-AUC:      0.800196
  F1:           0.713577
  Precision:    0.611572
  Recall:       0.856419
  BalancedAcc:  0.744631
  Confusion:    tn=9997  fp=5800  fn=1531  tp=9132
  Threshold:    0.344562
[target_dd5] 3) ens_logit_weighted — weight search no espaço do logit


  weight search (purged CV):   0%|          | 0/300 [00:00<?, ?it/s]

  best weights (logit): {'lgbm': 0.03929933856404872, 'hgb': 0.07587866571239577, 'rf': 0.4074134435265421, 'et': 0.47740855219701345}

[target_dd5 | ens_logit_weighted] VALID metrics
  AP:           0.686413
  ROC-AUC:      0.803141
  F1:           0.714219
  Precision:    0.609905
  Recall:       0.861577
  BalancedAcc:  0.744804
  Confusion:    tn=9921  fp=5876  fn=1476  tp=9187
  Threshold:    0.330470
[target_dd5] 4) ens_trimmed_mean — média podada (trim_k=1)

[target_dd5 | ens_trimmed_mean] VALID metrics
  AP:           0.677308
  ROC-AUC:      0.802217
  F1:           0.712664
  Precision:    0.613025
  Recall:       0.850980
  BalancedAcc:  0.744190
  Confusion:    tn=10069  fp=5728  fn=1589  tp=9074
  Threshold:    0.342590
[target_dd5] 5) ens_selection — forward selection (AP, CV purgado)
  selected weights: {'lgbm': 0.0, 'hgb': 0.0, 'rf': 0.3333333333333333, 'et': 0.6666666666666666}

[target_dd5 | ens_selection] VALID metrics
  AP:           0.685884
  ROC-AUC:      0.79877

In [2]:
# ============================================================
# Melhor ensemble por Precision — versão MultiIndex-safe (v2)
# ============================================================
import os, json, joblib, datetime as dt
import numpy as np, pandas as pd

# ---------------- Config ----------------
MODEL_TAG = globals().get("MODEL_TAG", "multi_model_timeaware_v3")
SAVE_DIR  = f"/content/drive/My Drive/Colab Notebooks/stock/models/{MODEL_TAG}/ensembles_best_precision"
os.makedirs(SAVE_DIR, exist_ok=True)

# Se quiser forçar o nome da tabela de métricas, defina:
# METRICS_DF_NAME = "ens_cmp_df"
METRICS_DF_NAME = globals().get("METRICS_DF_NAME", None)

# Dicionários (usa o que existir no ambiente)
def _pick_first_existing(*names):
    for n in names:
        if n in globals():
            return globals()[n]
    return None

ENS_OBJECTS    = _pick_first_existing("ENS_OBJECTS","ENSEMBLES","ENSEMBLE_OBJECTS")
ENS_THRESHOLDS = _pick_first_existing("ENS_THRESHOLDS","ENSEMBLE_THRESHOLDS","THRESHOLDS_ENS","THRESHOLDS")
CALIBRATORS    = _pick_first_existing("CALIBRATORS","BASE_CALIBRATORS","CALIBS")
BASE_MODELS    = _pick_first_existing("BASE_MODELS","BASES","BASE_MODELS_BY_TARGET")
BASE_NAMES     = globals().get("BASE_NAMES", ['lgbm','hgb','rf','et'])

# Recall mínimo desejado por alvo (opcional)
MIN_RECALL = {
    "target_up20": 0.00,  # ex.: 0.70
    "target_dd5":  0.00,  # ex.: 0.85
}

# ---------------- Helpers ----------------
def _flatten_cols(cols):
    if isinstance(cols, pd.MultiIndex):
        return ["__".join(map(str, tup)) for tup in cols.to_list()]
    return [str(c) for c in cols]

def _score_metrics_df(df: pd.DataFrame) -> float:
    # Score para ranquear candidatos de forma robusta a MultiIndex
    try:
        cols = set(_flatten_cols(df.columns))
    except Exception:
        return -1
    required = {"target","model","Precision","Recall","F1","threshold"}
    nice     = {"AP","ROC_AUC","BalancedAcc","tn","fp","fn","tp"}
    base_score = len(required & cols) + 0.2*len(nice & cols)
    if "model" in cols:
        try:
            m = df["model"].astype(str)
            if len(m) > 0:
                base_score += 2.0 * (m.str.startswith("ens_")).mean()
        except Exception:
            pass
    base_score += min(len(df)/2000.0, 1.0)
    return base_score

def _find_col(df, name):
    """Encontra coluna por nome (case-insensitive) após flatten."""
    name = name.lower()
    for c in df.columns:
        if str(c).lower() == name:
            return c
    return None

def _autodetect_metrics_df():
    # 1) Nome forçado
    if METRICS_DF_NAME and METRICS_DF_NAME in globals():
        df0 = globals()[METRICS_DF_NAME]
        df = df0.copy()
        df.columns = _flatten_cols(df.columns)
        return df, METRICS_DF_NAME

    # 2) Nomes comuns
    for cand in ["ens_cmp_df","df_ens_cmp","ENS_CMP_DF","ens_metrics_df","df_ens_metrics","METRICS_ENS_DF"]:
        if cand in globals() and isinstance(globals()[cand], pd.DataFrame):
            df0 = globals()[cand]
            df = df0.copy()
            df.columns = _flatten_cols(df.columns)
            return df, cand

    # 3) Varredura geral
    best = (None, None, -1.0)
    for name, obj in list(globals().items()):
        if isinstance(obj, pd.DataFrame) and len(obj) > 0:
            try:
                s = _score_metrics_df(obj)
                if s > best[2]:
                    best = (name, obj, s)
            except Exception:
                continue
    name, obj, s = best
    if obj is None:
        return None, None
    df = obj.copy()
    df.columns = _flatten_cols(df.columns)
    # Checa se tem as colunas essenciais (case-insensitive)
    need = ["target","model","Precision","Recall","F1","threshold"]
    if all(_find_col(df, c) is not None for c in need):
        return df, name
    return None, None

# ---------------- Detectar DF de métricas ----------------
ens_cmp_df, detected_name = _autodetect_metrics_df()
if ens_cmp_df is None:
    print("⚠️ Não consegui localizar a tabela de métricas de ensembles.")
    print("DataFrames no ambiente (primeiras colunas):")
    for name, obj in list(globals()).items():
        if isinstance(obj, pd.DataFrame):
            try:
                cols_preview = _flatten_cols(obj.columns)[:10]
            except Exception:
                cols_preview = list(obj.columns)[:10]
            print(f" - {name}: cols={cols_preview}")
    raise NameError("Defina METRICS_DF_NAME = 'NOME_DO_DF' e reexecute.")

print(f"✅ Tabela de métricas detectada: {detected_name} (linhas={len(ens_cmp_df)})")

# Normaliza acesso às colunas
C_TARGET    = _find_col(ens_cmp_df, "target")
C_MODEL     = _find_col(ens_cmp_df, "model")
C_PREC      = _find_col(ens_cmp_df, "Precision")
C_REC       = _find_col(ens_cmp_df, "Recall")
C_F1        = _find_col(ens_cmp_df, "F1")
C_THR       = _find_col(ens_cmp_df, "threshold")
C_AP        = _find_col(ens_cmp_df, "AP")
C_ROC       = _find_col(ens_cmp_df, "ROC_AUC")
C_BAL       = _find_col(ens_cmp_df, "BalancedAcc")
C_TN        = _find_col(ens_cmp_df, "tn")
C_FP        = _find_col(ens_cmp_df, "fp")
C_FN        = _find_col(ens_cmp_df, "fn")
C_TP        = _find_col(ens_cmp_df, "tp")

need_all = [C_TARGET,C_MODEL,C_PREC,C_REC,C_F1,C_THR]
if any(c is None for c in need_all):
    raise ValueError("A tabela detectada não contém colunas essenciais (target/model/Precision/Recall/F1/threshold).")

df = ens_cmp_df.copy()

# Filtro para ensembles
df[C_MODEL] = df[C_MODEL].astype(str)
df = df[df[C_MODEL].str.startswith("ens_")].reset_index(drop=True)
if df.empty:
    raise ValueError("A tabela de métricas não contém linhas com 'model' iniciando por 'ens_'. Verifique seu pipeline.")

# Garante numéricos
for c in [C_PREC, C_REC, C_F1, C_THR, C_AP, C_ROC, C_BAL, C_TN, C_FP, C_FN, C_TP]:
    if c is not None and c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")

# ---------------- Escolher melhor por Precision ----------------
best_rows = []
for tgt, grp in df.groupby(C_TARGET):
    min_rec = MIN_RECALL.get(str(tgt), 0.0)
    grp2 = grp[(grp[C_REC] >= min_rec) | grp[C_REC].isna()].copy()
    if grp2.empty:
        grp2 = grp.copy()
    order = [col for col in [C_PREC, C_F1, C_ROC] if col is not None and col in grp2.columns]
    grp2 = grp2.sort_values(order, ascending=[False]*len(order))
    best_rows.append(grp2.iloc[0])

best_df = pd.DataFrame(best_rows).reset_index(drop=True)

cols_show = [c for c in [C_TARGET,C_MODEL,C_PREC,C_REC,C_F1,C_THR,C_AP,C_ROC,C_BAL,C_TN,C_FP,C_FN,C_TP] if c in best_df.columns]
print("\n=== Escolha do 'melhor por Precision' (por alvo) ===")
print(best_df[cols_show])

# ---------------- Montar artefato e salvar ----------------
def _row_val(row, colname):
    return None if colname is None or colname not in row or pd.isna(row[colname]) else row[colname]

def _build_artifact(row):
    target = str(row[C_TARGET])
    name   = str(row[C_MODEL])
    thr    = float(_row_val(row, C_THR) or 0.5)

    artifact = {
        "kind": "ensemble",
        "model_tag": MODEL_TAG,
        "target": target,
        "strategy": name,
        "threshold": thr,
        "metrics_valid": {
            "AP": _row_val(row, C_AP),
            "ROC_AUC": _row_val(row, C_ROC),
            "F1": _row_val(row, C_F1),
            "Precision": _row_val(row, C_PREC),
            "Recall": _row_val(row, C_REC),
            "BalancedAcc": _row_val(row, C_BAL),
            "tn": _row_val(row, C_TN),
            "fp": _row_val(row, C_FP),
            "fn": _row_val(row, C_FN),
            "tp": _row_val(row, C_TP),
        },
        "bases": BASE_NAMES,
        "created_at": dt.datetime.utcnow().isoformat() + "Z",
    }

    try:
        if ENS_OBJECTS is not None and target in ENS_OBJECTS and name in ENS_OBJECTS[target]:
            artifact["ensemble_object"] = ENS_OBJECTS[target][name]
    except Exception as e:
        print(f"[warn] ensemble_object ausente p/ {target}/{name}: {e}")

    try:
        if ENS_THRESHOLDS is not None and target in ENS_THRESHOLDS and name in ENS_THRESHOLDS[target]:
            artifact["threshold"] = float(ENS_THRESHOLDS[target][name])
    except Exception as e:
        print(f"[warn] threshold ausente p/ {target}/{name}: {e}")

    try:
        if CALIBRATORS is not None and target in CALIBRATORS:
            artifact["calibrators"] = CALIBRATORS[target]
    except Exception as e:
        print(f"[warn] calibrators ausentes p/ {target}: {e}")

    try:
        if BASE_MODELS is not None and target in BASE_MODELS:
            artifact["base_models"] = {b: BASE_MODELS[target].get(b) for b in BASE_NAMES if b in BASE_MODELS[target]}
    except Exception as e:
        print(f"[warn] base_models ausentes p/ {target}: {e}")

    return artifact

saved_paths = []
for _, row in best_df.iterrows():
    target = str(row[C_TARGET])
    name   = str(row[C_MODEL])
    artifact = _build_artifact(row)
    fname = f"{target}__{name}__best_precision.joblib".replace("/","_")
    fpath = os.path.join(SAVE_DIR, fname)
    joblib.dump(artifact, fpath, compress=3)
    saved_paths.append(fpath)
    # resumo json
    summary = {
        "file": fname,
        "target": target,
        "strategy": name,
        "threshold": artifact.get("threshold"),
        "metrics_valid": artifact["metrics_valid"],
        "created_at": artifact["created_at"],
    }
    with open(os.path.join(SAVE_DIR, fname.replace(".joblib",".json")), "w") as f:
        json.dump(summary, f, indent=2)

print("\nArquivos salvos:")
for p in saved_paths:
    print(" -", p)

# ---------------- Loader para inferência ----------------
def load_best_precision(path):
    """
    Carrega o artefato e expõe:
      - predict_proba_fn(base_probas_matrix) -> array de probas (classe 1)
      - predict_fn(base_probas_matrix)       -> rótulos (usa threshold salvo)
    base_probas_matrix: shape [n_samples, n_bases] na ordem BASE_NAMES.
    """
    art = joblib.load(path)
    ens_obj = art.get("ensemble_object", None)
    thr     = art.get("threshold", 0.5)

    if ens_obj is None:
        raise ValueError(
            "O artefato não contém ensemble_object. "
            "Re-salve quando ENS_OBJECTS/ENS_THRESHOLDS estiverem preenchidos, "
            "ou adapte para reconstruir a estratégia."
        )

    def predict_proba_fn(base_probas_matrix):
        if hasattr(ens_obj, "predict_proba_meta"):
            return ens_obj.predict_proba_meta(base_probas_matrix)
        prob = ens_obj.predict_proba(base_probas_matrix)
        return prob[:,1] if prob.ndim == 2 and prob.shape[1] == 2 else np.asarray(prob).ravel()

    def predict_fn(base_probas_matrix):
        p = predict_proba_fn(base_probas_matrix)
        return (p >= thr).astype(int)

    return {"artifact": art, "predict_proba_fn": predict_proba_fn, "predict_fn": predict_fn}


✅ Tabela de métricas detectada: METRICS_DF (linhas=12)

=== Escolha do 'melhor por Precision' (por alvo) ===
        target               model  Precision    Recall        F1  threshold  \
0   target_dd5    ens_trimmed_mean   0.613025  0.850980  0.712664   0.342590   
1  target_up20  ens_logit_weighted   0.715962  0.657859  0.685682   0.641091   

         AP   ROC_AUC  BalancedAcc     tn    fp    fn    tp  
0  0.677308  0.802217     0.744190  10069  5728  1589  9074  
1  0.732513  0.925146     0.807656  21783   968  1269  2440  

Arquivos salvos:
 - /content/drive/My Drive/Colab Notebooks/stock/models/multi_model_timeaware_v3/ensembles_best_precision/target_dd5__ens_trimmed_mean__best_precision.joblib
 - /content/drive/My Drive/Colab Notebooks/stock/models/multi_model_timeaware_v3/ensembles_best_precision/target_up20__ens_logit_weighted__best_precision.joblib
