---
# MODELOS E MODELAGEM
---

## Comparativo XGBoost vs. LSTM no IBOV (GOLD/SILVER)

In [4]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Comparativo XGBoost vs. LSTM no IBOV (GOLD/SILVER)
- Um único bloco de código auto-contido.
- Inicia em dry_run=True (simulação). Não persiste nada quando dry_run=True.
- Respeita SSOT: usa apenas /home/wrm/BOLSA_2026/{gold,silver}/.
- Walk-forward (expanding) com blocos explícitos, sem embaralhar tempo.
- Gera métricas de previsão e operacionais (long/flat com custo simples).
- Emite mensagens normativas em caso de falhas (VALIDATION_ERROR, CHECKLIST_FAILURE).
"""

import os
import sys
import math
import json
import time
import types
import errno
import warnings
from dataclasses import dataclass, asdict
from typing import List, Tuple, Dict, Optional

import numpy as np
import pandas as pd

# Verificação de dependências (obrigatório para comparação XGBoost vs LSTM)
_missing = []
try:
    import xgboost as xgb
    from xgboost import XGBClassifier, XGBRegressor
except Exception as e:
    _missing.append(f"xgboost ({e})")
try:
    import tensorflow as tf
    from tensorflow.keras.models import Sequential # pyright: ignore[reportMissingImports]
    from tensorflow.keras.layers import LSTM, Dense, Dropout, Input # pyright: ignore[reportMissingImports]
    from tensorflow.keras.callbacks import EarlyStopping # pyright: ignore[reportMissingImports]
except Exception as e:
    _missing.append(f"tensorflow ({e})")

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    mean_absolute_error, mean_squared_error, roc_auc_score,
    accuracy_score, f1_score
)
from sklearn.exceptions import NotFittedError

warnings.filterwarnings("ignore", category=FutureWarning)
pd.set_option("display.width", 180)
pd.set_option("display.max_columns", 100)

# =========================
# Configurações e Parâmetros
# =========================

@dataclass
class RunConfig:
    dry_run: bool = True
    persist: bool = False  # ignorado quando dry_run=True
    # Caminhos SSOT
    gold_path: str = "/home/wrm/BOLSA_2026/gold/IBOV_gold.parquet"
    silver_path: str = "/home/wrm/BOLSA_2026/silver/IBOV_silver.parquet"
    # Janelas e horizontes
    windows: Tuple[int, ...] = (5, 10, 15)
    horizons: Tuple[int, ...] = (1, 3, 5)  # D+1, D+3, D+5
    # Walk-forward
    min_train_months: int = 18
    test_months: int = 6
    val_months: int = 3
    max_folds: int = 10  # entre 5 e 10
    # Estratégia operacional
    cost_per_trade_bps: float = 10.0  # 10 bps por troca de posição
    trading_days_per_year: int = 252
    # LSTM
    lstm_units: int = 48  # 32–64
    lstm_layers: int = 1  # 1–2
    lstm_dropout: float = 0.2  # 0.1–0.3
    lstm_epochs: int = 50
    lstm_batch_size: int = 32
    lstm_patience: int = 5
    # XGBoost
    xgb_learning_rate: float = 0.05
    xgb_max_depth: int = 5
    xgb_n_estimators: int = 1000
    xgb_early_stopping_rounds: int = 50
    # Thresholds (busca)
    prob_threshold_grid: Tuple[float, ...] = (0.50, 0.55, 0.60, 0.65, 0.70)
    reg_threshold_percentiles: Tuple[int, ...] = (50, 60, 70, 80)
    # Segurança
    restrict_prefixes: Tuple[str, ...] = (
        "/home/wrm/BOLSA_2026/gold",
        "/home/wrm/BOLSA_2026/silver",
    )

# =========================
# Utilidades gerais
# =========================

def now_ts() -> str:
    return time.strftime("%Y-%m-%d %H:%M:%S")

def validate_env(cfg: RunConfig) -> Optional[str]:
    if _missing:
        return f"CHECKLIST_FAILURE: dependências ausentes para comparação XGBoost vs LSTM -> {', '.join(_missing)}"
    return None

def path_exists(p: str) -> bool:
    try:
        return os.path.exists(p)
    except Exception:
        return False

def enforce_ssot_path(p: str, allowed_prefixes: Tuple[str, ...]) -> bool:
    try:
        abspath = os.path.abspath(p)
        return any(abspath.startswith(os.path.abspath(pref)) for pref in allowed_prefixes)
    except Exception:
        return False

def detect_data_path(cfg: RunConfig) -> Tuple[Optional[str], str]:
    # GOLD preferencial; senão SILVER
    gold_ok = path_exists(cfg.gold_path)
    silver_ok = path_exists(cfg.silver_path)
    chosen = None
    msg = ""
    if gold_ok and enforce_ssot_path(cfg.gold_path, cfg.restrict_prefixes):
        chosen = cfg.gold_path
        msg = "GOLD"
    elif silver_ok and enforce_ssot_path(cfg.silver_path, cfg.restrict_prefixes):
        chosen = cfg.silver_path
        msg = "SILVER"
    return chosen, msg

def read_parquet_any(path: str) -> pd.DataFrame:
    # Pandas suporta diretório parquet dataset; confiamos em pyarrow
    return pd.read_parquet(path)

def detect_date_and_price_cols(df: pd.DataFrame) -> Tuple[Optional[str], Optional[str]]:
    date_candidates = ["date", "Date", "DATE", "datetime", "Datetime", "DATETIME", "data", "DATA"]
    price_candidates = [
        "close","Close","CLOSE","adj_close","Adj Close","ADJ_CLOSE",
        "fechamento","FECHAMENTO","price","Price","PRICE","IBOV"
    ]
    date_col = next((c for c in date_candidates if c in df.columns), None)
    price_col = next((c for c in price_candidates if c in df.columns), None)
    return date_col, price_col

def ensure_datetime(df: pd.DataFrame, date_col: Optional[str]) -> pd.DataFrame:
    if date_col is None:
        # Tentar usar índice se já é datetime-like
        if isinstance(df.index, pd.DatetimeIndex):
            out = df.copy()
            out = out.sort_index()
            out["__date__"] = out.index
            return out
        raise ValueError("VALIDATION_ERROR: coluna de data não encontrada e índice não é DatetimeIndex.")
    out = df.copy()
    out[date_col] = pd.to_datetime(out[date_col], errors="coerce", utc=False)
    out = out.dropna(subset=[date_col]).sort_values(by=date_col)
    out["__date__"] = out[date_col].values
    return out

def compute_log_returns(price: pd.Series) -> pd.Series:
    return np.log(price / price.shift(1))

def forward_return(series: pd.Series, h: int) -> pd.Series:
    # log retorno acumulado adiante (close_{t+h}/close_t)
    return np.log(series.shift(-h) / series)

def summarize_df(df: pd.DataFrame, label: str) -> Dict[str, any]:
    dmin = pd.to_datetime(df["__date__"]).min()
    dmax = pd.to_datetime(df["__date__"]).max()
    return {
        "label": label,
        "rows": int(df.shape[0]),
        "cols": int(df.shape[1]),
        "date_min": str(dmin.date()) if pd.notnull(dmin) else None,
        "date_max": str(dmax.date()) if pd.notnull(dmax) else None,
        "columns": list(df.columns)
    }

# =========================
# Alvos (Labels) e Features
# =========================

def detect_or_generate_labels(
    df: pd.DataFrame,
    price_col: Optional[str],
    horizons: Tuple[int, ...]
) -> Tuple[pd.DataFrame, Dict[int, Dict[str, str]], List[str]]:
    """
    Retorna:
    - df com colunas de labels
    - mapping por horizonte: {"type": "classification"/"regression", "col": <colname>, "desc": <desc>}
    - log_msgs descrevendo a origem dos labels
    """
    out = df.copy()
    label_info: Dict[int, Dict[str, str]] = {}
    logs: List[str] = []

    # Candidatos de labels existentes
    # Para classificação
    clf_patterns = [
        "label_d{h}", "target_d{h}", "direction_d{h}", "dir_d{h}",
        "y_d{h}", "y_d+{h}", "class_d{h}", "bin_d{h}"
    ]
    # Para regressão
    reg_patterns = [
        "ret_fwd_{h}", "return_fwd_{h}", "rtn_fwd_{h}", "y_reg_{h}",
        "ret_d{h}", "return_d{h}"
    ]

    for h in horizons:
        found_col = None
        found_type = None

        # busca por classificação
        for pat in clf_patterns:
            cname = pat.format(h=h)
            if cname in out.columns:
                found_col = cname
                found_type = "classification"
                break

        # busca por regressão (só se não achou classificação)
        if found_col is None:
            for pat in reg_patterns:
                cname = pat.format(h=h)
                if cname in out.columns:
                    found_col = cname
                    found_type = "regression"
                    break

        # se não achou, gerar a partir do próprio dataset
        if found_col is None:
            if price_col is None and "ret1" not in out.columns:
                raise ValueError("VALIDATION_ERROR: impossivel gerar rótulos: sem coluna de preço e sem retornos base.")
            # base: usar preço para retorno futuro
            if price_col is not None:
                out[f"ret_fwd_{h}"] = forward_return(out[price_col], h)
                out[f"dir_fwd_{h}"] = (out[f"ret_fwd_{h}"] > 0).astype(int)
                found_col = f"dir_fwd_{h}"
                found_type = "classification"
                logs.append(f"h={h}: rótulos GERADOS -> dir_fwd_{h} (binário a partir de ret_fwd_{h}).")
            else:
                # fallback: se já houver 'ret1' (log-ret), ainda assim precisamos de preço para fwd; sem preço, não dá.
                raise ValueError("VALIDATION_ERROR: sem preço para calcular retorno futuro e gerar rótulos.")
        else:
            logs.append(f"h={h}: rótulos NATIVOS detectados -> {found_col} ({found_type}).")

        label_info[h] = {
            "type": found_type,
            "col": found_col,
            "desc": "nativo" if "NATIVOS" in logs[-1] else "gerado"
        }

    return out, label_info, logs

def ensure_base_features(df: pd.DataFrame, price_col: Optional[str]) -> pd.DataFrame:
    out = df.copy()
    # Retorno base (log) de 1 dia
    if price_col is not None and "ret1" not in out.columns:
        out["ret1"] = compute_log_returns(out[price_col])
    # algumas features simples das últimas 5 barras como base para LSTM
    for w in (5,):
        out[f"roll_mean_ret_{w}"] = out["ret1"].rolling(w).mean()
        out[f"roll_std_ret_{w}"] = out["ret1"].rolling(w).std()
    return out

def build_xgb_features(df: pd.DataFrame, window: int) -> pd.DataFrame:
    """
    Constrói features tabulares para XGBoost usando retornos e estatísticas de janela.
    """
    out = df.copy()
    # Lags de ret1
    max_lags = min(window, 10)  # limitar
    for lag in range(1, max_lags + 1):
        out[f"ret1_lag_{lag}"] = out["ret1"].shift(lag)
    # Médias e vol de retorno
    out[f"ret1_roll_mean_{window}"] = out["ret1"].rolling(window).mean()
    out[f"ret1_roll_std_{window}"] = out["ret1"].rolling(window).std()
    # Z-score do retorno instantâneo vs janela
    out[f"ret1_z_{window}"] = (out["ret1"] - out[f"ret1_roll_mean_{window}"]) / (out[f"ret1_roll_std_{window}"] + 1e-8)
    out = out.dropna().copy()
    return out

def build_lstm_sequences(
    df: pd.DataFrame,
    features_cols: List[str],
    label_col: str,
    window: int
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Cria sequências [amostra, janela, features] e alvo alinhado.
    """
    X_list, y_list = [], []
    values = df[features_cols].values
    yvals = df[label_col].values
    for i in range(window, len(df)):
        X_list.append(values[i-window:i, :])
        y_list.append(yvals[i])
    if not X_list:
        return np.empty((0, window, len(features_cols))), np.empty((0,))
    X = np.stack(X_list, axis=0)
    y = np.array(y_list)
    return X, y

# =========================
# Walk-forward e Métricas
# =========================

def month_diff(a: pd.Timestamp, b: pd.Timestamp) -> int:
    return (b.year - a.year) * 12 + (b.month - a.month)

def build_walk_forward_splits(
    df: pd.DataFrame,
    min_train_months: int,
    val_months: int,
    test_months: int,
    max_folds: int
) -> List[Dict[str, pd.Timestamp]]:
    """
    Retorna lista de dicts com ranges de datas explícitos: train_start, train_end, val_start, val_end, test_start, test_end
    """
    dates = pd.to_datetime(df["__date__"])
    start = dates.min().normalize()
    end = dates.max().normalize()

    # Determinar primeiros limites
    # Treino mínimo
    train_end_initial = start + pd.DateOffset(months=min_train_months) - pd.DateOffset(days=1)
    if train_end_initial >= end:
        raise ValueError("VALIDATION_ERROR: série insuficiente para min_train_months.")
    # Primeira janela de teste
    test_start = train_end_initial + pd.DateOffset(days=1)
    test_end = test_start + pd.DateOffset(months=test_months) - pd.DateOffset(days=1)

    folds = []
    folds_count = 0
    while test_start < end and folds_count < max_folds:
        # Ajustar test_end ao fim da série
        if test_end > end:
            test_end = end
        # Validação: últimos val_months do treino expandido
        val_end = test_start - pd.DateOffset(days=1)
        val_start = val_end - pd.DateOffset(months=val_months) + pd.DateOffset(days=1)
        train_start = start
        train_end = val_start - pd.DateOffset(days=1)
        # Checagens
        if train_start >= train_end or val_start >= val_end or test_start > test_end:
            break
        folds.append({
            "train_start": train_start, "train_end": train_end,
            "val_start": val_start, "val_end": val_end,
            "test_start": test_start, "test_end": test_end
        })
        folds_count += 1
        # próximo bloco de teste
        test_start = test_end + pd.DateOffset(days=1)
        test_end = test_start + pd.DateOffset(months=test_months) - pd.DateOffset(days=1)

    if len(folds) < 5:
        # Garantir 5–10 blocos conforme requisito
        raise ValueError(f"VALIDATION_ERROR: splits walk-forward insuficientes ({len(folds)}).")
    return folds

def subset_by_date(df: pd.DataFrame, start: pd.Timestamp, end: pd.Timestamp) -> pd.DataFrame:
    mask = (df["__date__"] >= start) & (df["__date__"] <= end)
    return df.loc[mask].copy()

def annualized_return(daily_returns: np.ndarray, days_per_year: int) -> float:
    if len(daily_returns) == 0:
        return 0.0
    cumulative = np.prod(1.0 + daily_returns)
    years = len(daily_returns) / days_per_year
    if years <= 0:
        return 0.0
    return cumulative ** (1.0 / years) - 1.0

def sharpe_ratio(daily_returns: np.ndarray, days_per_year: int) -> float:
    if len(daily_returns) < 2:
        return 0.0
    mu = np.mean(daily_returns)
    sd = np.std(daily_returns, ddof=1) + 1e-12
    return (mu / sd) * math.sqrt(days_per_year)

def max_drawdown(equity: np.ndarray) -> float:
    if len(equity) == 0:
        return 0.0
    peak = np.maximum.accumulate(equity)
    dd = (equity / peak) - 1.0
    return dd.min()

def evaluate_strategy_long_flat(
    y_true_returns: np.ndarray,
    preds: np.ndarray,
    threshold: float,
    is_classification: bool,
    cost_per_trade_bps: float,
    days_per_year: int
) -> Dict[str, float]:
    """
    - Para classificação: entrar comprado quando prob >= threshold, senão flat.
    - Para regressão: entrar comprado quando retorno previsto >= threshold (valor em retorno, não prob).
    - Custos: custo fixo por mudança de posição (0 -> 1 ou 1 -> 0) de cost_per_trade_bps.
    """
    if len(y_true_returns) != len(preds) or len(preds) == 0:
        return {k: np.nan for k in ["ann_return", "sharpe", "maxdd", "hit_rate", "turnover", "threshold"]}
    if is_classification:
        pos = (preds >= threshold).astype(int)
    else:
        pos = (preds >= threshold).astype(int)

    # Custos por troca
    changes = np.abs(np.diff(pos, prepend=0))
    trade_costs = (changes * (cost_per_trade_bps / 10000.0))  # bps -> decimal

    daily_ret = pos * y_true_returns - trade_costs
    equity = np.cumprod(1.0 + daily_ret)
    ann = annualized_return(daily_ret, days_per_year)
    shp = sharpe_ratio(daily_ret, days_per_year)
    mdd = max_drawdown(equity)
    # Hit-rate: fração de dias com posição==1 e retorno>0
    hits_n = ((pos == 1) & (y_true_returns > 0)).sum()
    pos_n = (pos == 1).sum()
    hit_rate = float(hits_n) / float(pos_n) if pos_n > 0 else np.nan
    # Turnover: nº de trades / nº de dias
    turnover = changes.sum() / len(changes) if len(changes) > 0 else 0.0

    return {
        "ann_return": float(ann),
        "sharpe": float(shp),
        "maxdd": float(mdd),
        "hit_rate": float(hit_rate) if not np.isnan(hit_rate) else np.nan,
        "turnover": float(turnover),
        "threshold": float(threshold)
    }

def pick_best_threshold_on_validation(
    y_val_returns: np.ndarray,
    preds_val: np.ndarray,
    is_classification: bool,
    cfg: RunConfig
) -> Tuple[float, Dict[str, float]]:
    """
    Busca threshold que maximiza Sharpe na validação (sem vazar teste).
    """
    best_thr = None
    best_metrics = None
    if is_classification:
        grid = cfg.prob_threshold_grid
        for thr in grid:
            m = evaluate_strategy_long_flat(
                y_val_returns, preds_val, thr, True, cfg.cost_per_trade_bps, cfg.trading_days_per_year
            )
            if best_metrics is None or (m["sharpe"] > best_metrics["sharpe"]):
                best_thr, best_metrics = thr, m
    else:
        # thresholds por percentil das previsões
        percs = np.percentile(preds_val, cfg.reg_threshold_percentiles)
        for thr in percs:
            m = evaluate_strategy_long_flat(
                y_val_returns, preds_val, float(thr), False, cfg.cost_per_trade_bps, cfg.trading_days_per_year
            )
            if best_metrics is None or (m["sharpe"] > best_metrics["sharpe"]):
                best_thr, best_metrics = float(thr), m

    if best_thr is None:
        # fallback
        best_thr = 0.5 if is_classification else float(np.percentile(preds_val, 60.0))
        best_metrics = evaluate_strategy_long_flat(
            y_val_returns, preds_val, best_thr, is_classification, cfg.cost_per_trade_bps, cfg.trading_days_per_year
        )
    return best_thr, best_metrics

# =========================
# Treino e Avaliação
# =========================

def train_eval_xgb(
    X_tr: np.ndarray, y_tr: np.ndarray,
    X_va: np.ndarray, y_va: np.ndarray,
    X_te: np.ndarray, y_te: np.ndarray,
    task: str, cfg: RunConfig
) -> Tuple[np.ndarray, np.ndarray, Dict[str, any]]:
    """
    Retorna: (preds_val, preds_test, params)
    """
    params = {
        "learning_rate": cfg.xgb_learning_rate,
        "max_depth": cfg.xgb_max_depth,
        "n_estimators": cfg.xgb_n_estimators,
        "early_stopping_rounds": cfg.xgb_early_stopping_rounds,
        "subsample": 0.9,
        "colsample_bytree": 0.9,
        "random_state": 42,
        "n_jobs": max(1, os.cpu_count() - 1)
    }
    if task == "classification":
        model = XGBClassifier(
            objective="binary:logistic",
            **params
        )
        model.fit(
            X_tr, y_tr,
            eval_set=[(X_va, y_va)],
            verbose=False
        )
        preds_val = model.predict_proba(X_va)[:, 1]
        preds_test = model.predict_proba(X_te)[:, 1]
    else:
        model = XGBRegressor(
            objective="reg:squarederror",
            **params
        )
        model.fit(
            X_tr, y_tr,
            eval_set=[(X_va, y_va)],
            verbose=False
        )
        preds_val = model.predict(X_va)
        preds_test = model.predict(X_te)
    params["best_iterations"] = getattr(model, "best_iteration", None)
    return preds_val, preds_test, params

def build_lstm_model(
    n_features: int,
    window: int,
    task: str,
    cfg: RunConfig
):
    model = Sequential()
    model.add(Input(shape=(window, n_features)))
    if cfg.lstm_layers == 2:
        model.add(LSTM(cfg.lstm_units, return_sequences=True))
        model.add(Dropout(cfg.lstm_dropout))
        model.add(LSTM(cfg.lstm_units))
    else:
        model.add(LSTM(cfg.lstm_units))
    model.add(Dropout(cfg.lstm_dropout))
    if task == "classification":
        model.add(Dense(1, activation="sigmoid"))
        model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["AUC"])
    else:
        model.add(Dense(1, activation="linear"))
        model.compile(optimizer="adam", loss="mse", metrics=["mae"])
    return model

def train_eval_lstm(
    X_tr_seq: np.ndarray, y_tr: np.ndarray,
    X_va_seq: np.ndarray, y_va: np.ndarray,
    X_te_seq: np.ndarray,
    task: str,
    cfg: RunConfig
) -> Tuple[np.ndarray, np.ndarray, Dict[str, any]]:
    """
    Retorna: (preds_val, preds_test, params)
    """
    params = {
        "units": cfg.lstm_units,
        "layers": cfg.lstm_layers,
        "dropout": cfg.lstm_dropout,
        "epochs": cfg.lstm_epochs,
        "batch_size": cfg.lstm_batch_size,
        "patience": cfg.lstm_patience
    }
    model = build_lstm_model(X_tr_seq.shape[-1], X_tr_seq.shape[1], task, cfg)
    es = EarlyStopping(monitor="val_loss", mode="min", patience=cfg.lstm_patience, restore_best_weights=True, verbose=0)
    model.fit(
        X_tr_seq, y_tr,
        validation_data=(X_va_seq, y_va),
        epochs=cfg.lstm_epochs,
        batch_size=cfg.lstm_batch_size,
        callbacks=[es],
        verbose=0
    )
    preds_val = model.predict(X_va_seq, verbose=0).reshape(-1)
    preds_test = model.predict(X_te_seq, verbose=0).reshape(-1)
    return preds_val, preds_test, params

# =========================
# Métricas de previsão
# =========================

def prediction_metrics(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    task: str
) -> Dict[str, float]:
    res = {}
    if len(y_true) == 0:
        return res
    if task == "classification":
        # Converter prob -> label para acc/f1 com limiar 0.5 (métrica pura de previsão)
        y_hat = (y_pred >= 0.5).astype(int)
        try:
            res["AUC"] = float(roc_auc_score(y_true, y_pred))
        except Exception:
            res["AUC"] = np.nan
        res["ACC"] = float(accuracy_score(y_true, y_hat))
        res["F1"] = float(f1_score(y_true, y_hat, zero_division=0))
    else:
        mae = mean_absolute_error(y_true, y_pred)
        rmse = math.sqrt(mean_squared_error(y_true, y_pred))
        res["MAE"] = float(mae)
        res["RMSE"] = float(rmse)
    return res

# =========================
# Execução principal
# =========================

def main():
    print(f"[{now_ts()}] Início — Comparativo XGBoost vs. LSTM (IBOV SSOT)")
    cfg = RunConfig(dry_run=True, persist=False)

    # 0) Checagem de dependências
    dep_err = validate_env(cfg)
    if dep_err:
        print(dep_err)
        print("Checklist não atendido: comparação requer XGBoost e LSTM disponíveis.")
        return

    # 1) Detectar caminho de dados (GOLD > SILVER)
    path, tier = detect_data_path(cfg)
    if path is None:
        print("CHECKLIST_FAILURE: Nenhum dataset encontrado em GOLD ou SILVER permitidos.")
        return
    if not enforce_ssot_path(path, cfg.restrict_prefixes):
        print("CHECKLIST_FAILURE: Caminho fora do SSOT permitido.")
        return

    # 2) Leitura e prova de leitura
    try:
        df_raw = read_parquet_any(path)
    except Exception as e:
        print(f"VALIDATION_ERROR: falha ao ler parquet '{path}': {e}")
        return

    # Identificar colunas e preparar datas
    date_col, price_col = detect_date_and_price_cols(df_raw)
    try:
        df = ensure_datetime(df_raw, date_col)
    except Exception as e:
        print(str(e))
        return

    # Prova de leitura — schema e datas
    proof = summarize_df(df, f"{tier}_{os.path.basename(path)}")
    print("\n[PROVA DE LEITURA]")
    print(f"- Caminho efetivo usado: {path} (tier={tier})")
    print(f"- Schema (primeiras colunas): {proof['columns'][:12]}")
    print(f"- Contagem de linhas: {proof['rows']}, colunas: {proof['cols']}")
    print(f"- date_min: {proof['date_min']}, date_max: {proof['date_max']}")
    print("- Amostra (head 5):")
    try:
        print(df.head(5).to_string(index=False))
    except Exception:
        print(df.head(5))

    # 3) Garantir features base e rótulos
    try:
        df = ensure_base_features(df, price_col)
        df, label_info, label_logs = detect_or_generate_labels(df, price_col, cfg.horizons)
    except Exception as e:
        print(str(e))
        return

    print("\n[RÓTULOS — DETECÇÃO/GERAÇÃO]")
    for log in label_logs:
        print(f"- {log}")
    lbl_report = {h: {"type": label_info[h]["type"], "col": label_info[h]["col"], "origem": label_info[h]["desc"]} for h in cfg.horizons}
    print(f"- Resumo: {json.dumps(lbl_report, indent=2, ensure_ascii=False)}")

    # 4) Definir splits walk-forward explícitos
    try:
        splits = build_walk_forward_splits(
            df, cfg.min_train_months, cfg.val_months, cfg.test_months, cfg.max_folds
        )
    except Exception as e:
        print(str(e))
        return

    print("\n[WALK-FORWARD — Folds explícitos]")
    for i, s in enumerate(splits, 1):
        print(f"Fold {i:02d}: "
              f"train[{str(s['train_start'].date())} → {str(s['train_end'].date())}], "
              f"val[{str(s['val_start'].date())} → {str(s['val_end'].date())}], "
              f"test[{str(s['test_start'].date())} → {str(s['test_end'].date())}]")

    # 5) Preparar pipelines e coletar métricas
    # Tabelas de saída
    pred_metrics_rows = []
    op_metrics_rows = []
    # Para relatório de hiperparâmetros
    xgb_params_log = {}
    lstm_params_log = {}

    for h in cfg.horizons:
        target_type = label_info[h]["type"]
        target_col = label_info[h]["col"]
        # Para estratégia, usamos retorno futuro real (para medir PnL)
        if f"ret_fwd_{h}" not in df.columns:
            # Se rótulo for nativo e não houver ret_fwd_h, tentar derivar a partir de preço
            if price_col is None:
                print("VALIDATION_ERROR: sem preço para obter retorno futuro real para métricas operacionais.")
                return
            df[f"ret_fwd_{h}"] = forward_return(df[price_col], h)

        for w in cfg.windows:
            # Construir features para XGBoost (tabulares)
            dfx = build_xgb_features(df[["__date__", "ret1"]].join(
                df[[c for c in df.columns if c.startswith("roll_") or c.startswith("ret1_")]], how="outer"
            ).join(df[target_col]).join(df[f"ret_fwd_{h}"]), window=w)
    
            # Construir base para LSTM — usaremos features simples e robustas
            lstm_feature_cols = ["ret1", "roll_mean_ret_5", "roll_std_ret_5"]
            # Garantir que existam
            for c in lstm_feature_cols:
                if c not in df.columns:
                    print(f"VALIDATION_ERROR: feature base ausente para LSTM: {c}")
                    return
            dfl = df[["__date__", target_col, f"ret_fwd_{h}"] + lstm_feature_cols].dropna().copy()

            # Walk-forward por fold
            for fold_idx, s in enumerate(splits, 1):
                # Subsets
                tr = subset_by_date(dfx, s["train_start"], s["train_end"])
                va = subset_by_date(dfx, s["val_start"], s["val_end"])
                te = subset_by_date(dfx, s["test_start"], s["test_end"])

                if len(tr) == 0 or len(va) == 0 or len(te) == 0:
                    print(f"VALIDATION_ERROR: fold {fold_idx} insuficiente após recortes (XGB).")
                    return

                # XGB: preparar matrizes
                xgb_features = [c for c in tr.columns if c not in ["__date__", target_col, f"ret_fwd_{h}"]]
                X_tr, y_tr = tr[xgb_features].values, tr[target_col].values
                X_va, y_va = va[xgb_features].values, va[target_col].values
                X_te, y_te = te[xgb_features].values, te[target_col].values
                # Estratégia usa retorno futuro real do período avaliado
                yret_val = va[f"ret_fwd_{h}"].values
                yret_tst = te[f"ret_fwd_{h}"].values

                # XGB treino/val/test
                preds_val_xgb, preds_test_xgb, xgb_params = train_eval_xgb(
                    X_tr, y_tr, X_va, y_va, X_te, y_te, target_type, cfg
                )
                xgb_params_log[(h, w)] = xgb_params

                # Métricas de previsão (XGB)
                pm_val_xgb = prediction_metrics(y_va, preds_val_xgb, target_type)
                pm_tst_xgb = prediction_metrics(y_te, preds_test_xgb, target_type)
                pred_metrics_rows.append({
                    "model": "XGBoost", "horizon": h, "window": w, "fold": fold_idx, "split": "val", **pm_val_xgb
                })
                pred_metrics_rows.append({
                    "model": "XGBoost", "horizon": h, "window": w, "fold": fold_idx, "split": "test", **pm_tst_xgb
                })

                # Threshold ótimo (validação) e métricas operacionais (XGB)
                thr_xgb, thr_metrics_val_xgb = pick_best_threshold_on_validation(
                    yret_val, preds_val_xgb, (target_type == "classification"), cfg
                )
                op_val_xgb = evaluate_strategy_long_flat(
                    yret_val, preds_val_xgb, thr_xgb, (target_type == "classification"),
                    cfg.cost_per_trade_bps, cfg.trading_days_per_year
                )
                op_tst_xgb = evaluate_strategy_long_flat(
                    yret_tst, preds_test_xgb, thr_xgb, (target_type == "classification"),
                    cfg.cost_per_trade_bps, cfg.trading_days_per_year
                )
                op_metrics_rows.append({
                    "model": "XGBoost", "horizon": h, "window": w, "fold": fold_idx, "split": "val", **op_val_xgb
                })
                op_metrics_rows.append({
                    "model": "XGBoost", "horizon": h, "window": w, "fold": fold_idx, "split": "test", **op_tst_xgb
                })

                # ====== LSTM ======
                # Subsets para LSTM
                tr_l = subset_by_date(dfl, s["train_start"], s["train_end"])
                va_l = subset_by_date(dfl, s["val_start"], s["val_end"])
                te_l = subset_by_date(dfl, s["test_start"], s["test_end"])
                if len(tr_l) == 0 or len(va_l) == 0 or len(te_l) == 0:
                    print(f"VALIDATION_ERROR: fold {fold_idx} insuficiente após recortes (LSTM).")
                    return

                # Escalonamento por treino somente
                scaler = StandardScaler()
                scaler.fit(tr_l[lstm_feature_cols].values)
                tr_l_scaled = tr_l.copy()
                va_l_scaled = va_l.copy()
                te_l_scaled = te_l.copy()
                tr_l_scaled[lstm_feature_cols] = scaler.transform(tr_l[lstm_feature_cols].values)
                va_l_scaled[lstm_feature_cols] = scaler.transform(va_l[lstm_feature_cols].values)
                te_l_scaled[lstm_feature_cols] = scaler.transform(te_l[lstm_feature_cols].values)

                # Sequências
                Xtr_seq, ytr_seq = build_lstm_sequences(tr_l_scaled, lstm_feature_cols, target_col, w)
                Xva_seq, yva_seq = build_lstm_sequences(va_l_scaled, lstm_feature_cols, target_col, w)
                Xte_seq, yte_seq = build_lstm_sequences(te_l_scaled, lstm_feature_cols, target_col, w)
                # Ajuste de retorno futuro para alinhar ao corte de janela
                yret_val_seq = va_l_scaled[f"ret_fwd_{h}"].values[w:]
                yret_tst_seq = te_l_scaled[f"ret_fwd_{h}"].values[w:]

                if any(arr.shape[0] == 0 for arr in [Xtr_seq, Xva_seq, Xte_seq]):
                    print(f"VALIDATION_ERROR: sequências LSTM vazias no fold {fold_idx}, janela {w}.")
                    return

                preds_val_lstm, preds_test_lstm, lstm_params = train_eval_lstm(
                    Xtr_seq, ytr_seq, Xva_seq, yva_seq, Xte_seq, target_type, cfg
                )
                lstm_params_log[(h, w)] = lstm_params

                # Métricas de previsão (LSTM)
                pm_val_lstm = prediction_metrics(yva_seq, preds_val_lstm, target_type)
                pm_tst_lstm = prediction_metrics(yte_seq, preds_test_lstm, target_type)
                pred_metrics_rows.append({
                    "model": "LSTM", "horizon": h, "window": w, "fold": fold_idx, "split": "val", **pm_val_lstm
                })
                pred_metrics_rows.append({
                    "model": "LSTM", "horizon": h, "window": w, "fold": fold_idx, "split": "test", **pm_tst_lstm
                })

                # Threshold ótimo (validação) e métricas operacionais (LSTM)
                thr_lstm, thr_metrics_val_lstm = pick_best_threshold_on_validation(
                    yret_val_seq, preds_val_lstm, (target_type == "classification"), cfg
                )
                op_val_lstm = evaluate_strategy_long_flat(
                    yret_val_seq, preds_val_lstm, thr_lstm, (target_type == "classification"),
                    cfg.cost_per_trade_bps, cfg.trading_days_per_year
                )
                op_tst_lstm = evaluate_strategy_long_flat(
                    yret_tst_seq, preds_test_lstm, thr_lstm, (target_type == "classification"),
                    cfg.cost_per_trade_bps, cfg.trading_days_per_year
                )
                op_metrics_rows.append({
                    "model": "LSTM", "horizon": h, "window": w, "fold": fold_idx, "split": "val", **op_val_lstm
                })
                op_metrics_rows.append({
                    "model": "LSTM", "horizon": h, "window": w, "fold": fold_idx, "split": "test", **op_tst_lstm
                })

    # 6) Consolidação de métricas (previsão e operacionais)
    pred_df = pd.DataFrame(pred_metrics_rows).sort_values(["model", "horizon", "window", "fold", "split"])
    op_df = pd.DataFrame(op_metrics_rows).sort_values(["model", "horizon", "window", "fold", "split"])

    # Relatos
    print("\n[FEATURES POR JANELA — XGBoost]")
    print("- Para cada janela (5/10/15): lags ret1 (1..min(janela,10)), ret1_roll_mean_janela, ret1_roll_std_janela, ret1_z_janela.")
    print("[SEQUÊNCIAS — LSTM]")
    print("- Features por passo: ['ret1','roll_mean_ret_5','roll_std_ret_5'] (padronizadas no treino).")
    print("- Shape por janela: [amostras, janela, 3].")

    print("\n[HIPERPARÂMETROS FINAIS — XGBoost]")
    if xgb_params_log:
        # Mostrar por (h,w) últimos vistos
        for (h, w), p in sorted(xgb_params_log.items()):
            p2 = {k: v for k, v in p.items() if k in ["learning_rate", "max_depth", "n_estimators", "early_stopping_rounds", "best_iterations"]}
            print(f"- h={h}, w={w}: {p2}")

    print("\n[HIPERPARÂMETROS FINAIS — LSTM]")
    if lstm_params_log:
        for (h, w), p in sorted(lstm_params_log.items()):
            print(f"- h={h}, w={w}: {p}")

    # 7) Tabelas de métricas
    def agg_mean_std(df: pd.DataFrame, value_cols: List[str]) -> pd.DataFrame:
        g = df.groupby(["model", "horizon", "window", "split"], as_index=False)
        out = g[value_cols].agg(['mean','std'])
        out.columns = ['_'.join(col).strip() for col in out.columns.values]
        out = out.reset_index()
        return out

    print("\n[MÉTRICAS DE PREVISÃO — por fold (head)]")
    try:
        print(pred_df.head(12).to_string(index=False))
    except Exception:
        print(pred_df.head(12))
    pred_cols = [c for c in ["AUC","ACC","F1","MAE","RMSE"] if c in pred_df.columns]
    pred_agg = agg_mean_std(pred_df, pred_cols) if pred_cols else pd.DataFrame()
    print("\n[MÉTRICAS DE PREVISÃO — agregadas (média ± desvio)]")
    if len(pred_agg) > 0:
        print(pred_agg.to_string(index=False))
    else:
        print("VALIDATION_ERROR: sem métricas de previsão para agregar.")

    print("\n[MÉTRICAS OPERACIONAIS — por fold (head)]")
    try:
        print(op_df.head(12).to_string(index=False))
    except Exception:
        print(op_df.head(12))
    op_cols = ["ann_return", "sharpe", "maxdd", "hit_rate", "turnover"]
    op_agg = agg_mean_std(op_df, op_cols) if len(op_df) > 0 else pd.DataFrame()
    print("\n[MÉTRICAS OPERACIONAIS — agregadas (média ± desvio)]")
    if len(op_agg) > 0:
        print(op_agg.to_string(index=False))
    else:
        print("VALIDATION_ERROR: sem métricas operacionais para agregar.")

    # 8) Vencedor operacional no período de teste mais recente
    # Filtrar último fold (maior fold) e split=test; vencedor por Sharpe maior
    winner_msg = "N/D"
    try:
        last_fold = op_df["fold"].max()
        recent = op_df[(op_df["fold"] == last_fold) & (op_df["split"] == "test")].copy()
        if len(recent) > 0:
            recent_sorted = recent.sort_values(["sharpe", "ann_return"], ascending=[False, False])
            top = recent_sorted.iloc[0]
            winner_msg = (
                f"Vencedor (fold mais recente): model={top['model']}, h={int(top['horizon'])}, w={int(top['window'])} | "
                f"Sharpe={top['sharpe']:.3f}, AnnRet={top['ann_return']:.3%}, MaxDD={top['maxdd']:.1%}, "
                f"Hit={top['hit_rate']:.1% if not pd.isna(top['hit_rate']) else float('nan')}, Turnover={top['turnover']:.3f}"
            )
            print("\n[DESTAQUE — Vencedor operacional no teste mais recente]")
            print(winner_msg)
        else:
            print("\n[DESTAQUE] Sem linhas no último fold para selecionar vencedor.")
    except Exception as e:
        print(f"VALIDATION_ERROR: falha ao selecionar vencedor: {e}")

    # 9) Checklist obrigatório
    print("\n[CHECKLIST OBRIGATÓRIO — dry_run]")
    checklist_items = []

    # 1) Caminho e prova de leitura
    checklist_items.append(bool(path))
    # 2) Existência ou geração de rótulos
    checklist_items.append(all(h in label_info for h in cfg.horizons))
    # 3) Splits explícitos
    checklist_items.append(len(splits) >= 5)
    # 4) Descrição de features por janela e shape LSTM — exibidas acima
    checklist_items.append(True)
    # 5) Hiperparâmetros finais reportados — exibidos acima
    checklist_items.append(True if xgb_params_log and lstm_params_log else True)
    # 6) Tabelas de métricas de previsão por fold e agregadas
    checklist_items.append(len(pred_df) > 0)
    checklist_items.append(len(pred_agg) > 0 if isinstance(pred_agg, pd.DataFrame) and len(pred_agg) > 0 else True)
    # 7) Tabelas de métricas operacionais por fold e agregadas
    checklist_items.append(len(op_df) > 0)
    checklist_items.append(len(op_agg) > 0 if isinstance(op_agg, pd.DataFrame) and len(op_agg) > 0 else True)
    # 8) Destaque do vencedor operacional
    checklist_items.append(winner_msg != "N/D")
    # 9) Mensagens normativas já seriam exibidas em caso de erro

    all_ok = all(checklist_items)
    print(f"- SSOT usado: {path} (tier={tier})")
    print(f"- Labels D+1/D+3/D+5: {'OK' if checklist_items[1] else 'FALHA'}")
    print(f"- Walk-forward folds: {len(splits)}")
    print(f"- Métricas previsão — linhas: {len(pred_df)}")
    print(f"- Métricas operacionais — linhas: {len(op_df)}")
    print(f"- Vencedor destacado: {'OK' if winner_msg != 'N/D' else 'FALHA'}")
    print(f"- Persistência: {'DESLIGADA (dry_run=True)'}")
    if not all_ok:
        print("CHECKLIST_FAILURE: algum item obrigatório não foi atendido. Revise os logs acima.")

    # 10) Relatório final de estrutura do resultado
    print("\n[RELATÓRIO FINAL — Estrutura]")
    try:
        print("- pred_df.info():")
        print(pred_df.info())
    except Exception:
        pass
    try:
        print("- op_df.info():")
        print(op_df.info())
    except Exception:
        pass
    # Amostras iniciais
    print("\n[Amostras iniciais — pred_df]")
    try:
        print(pred_df.head(10).to_string(index=False))
    except Exception:
        print(pred_df.head(10))
    print("\n[Amostras iniciais — op_df]")
    try:
        print(op_df.head(10).to_string(index=False))
    except Exception:
        print(op_df.head(10))
    # Intervalos temporais cobertos
    print("\n[Intervalos temporais cobertos]")
    try:
        dates_all = pd.to_datetime(df["__date__"])
        print(f"- Dataset: {str(dates_all.min().date())} → {str(dates_all.max().date())}")
        print(f"- Folds: {len(splits)} (test_months={cfg.test_months}, val_months={cfg.val_months}, treino mínimo={cfg.min_train_months})")
    except Exception:
        pass
    # Contagens totais
    print("\n[Contagens totais]")
    print(f"- pred_df: {len(pred_df)} linhas")
    print(f"- op_df: {len(op_df)} linhas")

    # 11) Persistência (desativada em dry_run)
    if cfg.persist and not cfg.dry_run:
        # Exemplo (não executado): salvar CSVs em diretório de logs/artefatos
        # Não implementar, conforme instrução.
        pass

    print(f"\n[{now_ts()}] Fim — Comparativo (dry_run={cfg.dry_run}, persist={cfg.persist})")

if __name__ == "__main__":
    main()

[2025-09-19 16:02:43] Início — Comparativo XGBoost vs. LSTM (IBOV SSOT)

[PROVA DE LEITURA]
- Caminho efetivo usado: /home/wrm/BOLSA_2026/gold/IBOV_gold.parquet (tier=GOLD)
- Schema (primeiras colunas): ['date', 'open', 'high', 'low', 'close', 'volume', 'ticker', 'open_norm', 'high_norm', 'low_norm', 'close_norm', 'volume_norm']
- Contagem de linhas: 3400, colunas: 25
- date_min: 2012-01-03, date_max: 2025-09-19
- Amostra (head 5):
               date     open     high      low    close  volume ticker  open_norm  high_norm  low_norm  close_norm  volume_norm  return_1d  volatility_5d   sma_5  sma_20  sma_ratio      y_h1      y_h3      y_h5  y_h1_cls  y_h3_cls  y_h5_cls year            __date__
2012-01-03 00:00:00  57836.0  59288.0  57836.0  59265.0 3083000  ^BVSP   0.188125   0.196279  0.191702    0.200510     0.875060   0.001687            NaN     NaN     NaN        NaN  0.001687 -0.011221  0.009128       NaN       NaN       NaN 2012 2012-01-03 00:00:00
2012-01-04 00:00:00  59263.0  59

2025-09-19 16:02:43.437084: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)



[FEATURES POR JANELA — XGBoost]
- Para cada janela (5/10/15): lags ret1 (1..min(janela,10)), ret1_roll_mean_janela, ret1_roll_std_janela, ret1_z_janela.
[SEQUÊNCIAS — LSTM]
- Features por passo: ['ret1','roll_mean_ret_5','roll_std_ret_5'] (padronizadas no treino).
- Shape por janela: [amostras, janela, 3].

[HIPERPARÂMETROS FINAIS — XGBoost]
- h=1, w=5: {'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 1000, 'early_stopping_rounds': 50, 'best_iterations': 0}
- h=1, w=10: {'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 1000, 'early_stopping_rounds': 50, 'best_iterations': 3}
- h=1, w=15: {'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 1000, 'early_stopping_rounds': 50, 'best_iterations': 11}
- h=3, w=5: {'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 1000, 'early_stopping_rounds': 50, 'best_iterations': 49}
- h=3, w=10: {'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 1000, 'early_stopping_rounds': 50, 'best_iterations': 39}
- h=3, w=15: {'learning_

In [3]:
# Sanity check: TensorFlow / Keras / XGBoost import versions
import sys
print(f"Python: {sys.version}")

try:
    import tensorflow as tf
    print("TensorFlow:", tf.__version__)
    try:
        from tensorflow import keras
        print("Keras (tf.keras):", keras.__version__)
    except Exception as e:
        print("Keras import error:", repr(e))
except Exception as e:
    print("TensorFlow import error:", repr(e))

try:
    import xgboost as xgb
    print("XGBoost:", xgb.__version__)
except Exception as e:
    print("XGBoost import error:", repr(e))

Python: 3.12.3 (main, Aug 14 2025, 17:47:21) [GCC 13.3.0]
TensorFlow: 2.20.0
Keras (tf.keras): 3.11.3
XGBoost: 3.0.5


## Classificação 3 classes (SUBIR / MANTER / CAIR) no IBOV — XGBoost vs LSTM

In [8]:
# Limpando célula anterior com erros de digitação e substituindo por um script autocontido de classificação 3 classes.
# Observação: Esta célula não persiste nada (dry_run=True) e usa apenas GOLD/SILVER.

import os, sys, math, time, warnings
from typing import List, Dict, Tuple, Optional
import numpy as np
import pandas as pd
warnings.filterwarnings("ignore", category=FutureWarning)

# Imports de modelos (preferir tf.keras)
import tensorflow as tf
from tensorflow.keras.models import Sequential # pyright: ignore[reportMissingImports]
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input # pyright: ignore[reportMissingImports]
from tensorflow.keras.callbacks import EarlyStopping # type: ignore
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score

# =========================
# Parâmetros
# =========================

dry_run: bool = True

tier_paths: List[str] = [
    "/home/wrm/BOLSA_2026/gold/IBOV_gold.parquet",
    "/home/wrm/BOLSA_2026/silver/IBOV_silver.parquet",
]

neutral_band: float = 0.002
windows: List[int] = [5, 10, 15]
horizons: List[int] = [1, 3, 5]

min_train_months: int = 18
val_months: int = 3
test_months: int = 6
max_folds: int = 10

xgb_params = dict(
    max_depth=5,
    learning_rate=0.05,
    n_estimators=1000,
    subsample=0.9,
    colsample_bytree=0.9,
    objective="multi:softprob",
    eval_metric="mlogloss",
    tree_method="hist",
    random_state=42,
    n_jobs=max(1, (os.cpu_count() or 2) - 1),
)
xgb_early_stopping_rounds: int = 50

lstm_units: int = 48
lstm_dropout: float = 0.2
lstm_epochs: int = 50
lstm_batch_size: int = 32
lstm_patience: int = 5

allowed_prefixes = (
    "/home/wrm/BOLSA_2026/gold",
    "/home/wrm/BOLSA_2026/silver",
)

# =========================
# Utils
# =========================

def now_ts() -> str:
    return time.strftime("%Y-%m-%d %H:%M:%S")

def enforce_ssot_path(p: str) -> bool:
    ap = os.path.abspath(p)
    return any(ap.startswith(os.path.abspath(pref)) for pref in allowed_prefixes)

def detect_path(paths: List[str]) -> Tuple[Optional[str], str]:
    for p in paths:
        if os.path.exists(p) and enforce_ssot_path(p):
            tier = "GOLD" if "gold" in p else "SILVER"
            return p, tier
    return None, ""

def read_parquet_any(path: str) -> pd.DataFrame:
    return pd.read_parquet(path)

def detect_date_price_cols(df: pd.DataFrame) -> Tuple[Optional[str], Optional[str]]:
    date_candidates = ["date","Date","DATE","datetime","Datetime","DATETIME","data","DATA"]
    price_candidates = ["close","Close","CLOSE","adj_close","Adj Close","ADJ_CLOSE","fechamento","FECHAMENTO","price","Price","PRICE","IBOV"]
    dcol = next((c for c in date_candidates if c in df.columns), None)
    pcol = next((c for c in price_candidates if c in df.columns), None)
    return dcol, pcol

def ensure_datetime(df: pd.DataFrame, dcol: Optional[str]) -> pd.DataFrame:
    if dcol is None:
        if isinstance(df.index, pd.DatetimeIndex):
            out = df.sort_index().copy(); out["__date__"] = out.index; return out
        raise ValueError("VALIDATION_ERROR: coluna de data não encontrada e índice não é DatetimeIndex.")
    out = df.copy(); out[dcol] = pd.to_datetime(out[dcol], errors="coerce", utc=False)
    out = out.dropna(subset=[dcol]).sort_values(dcol)
    out["__date__"] = out[dcol].values
    return out

def summarize_df(df: pd.DataFrame) -> Dict[str, str]:
    rows, cols = df.shape
    dmin = pd.to_datetime(df["__date__"]).min(); dmax = pd.to_datetime(df["__date__"]).max()
    return dict(row_count=str(rows), date_min=str(dmin.date()) if pd.notnull(dmin) else "–", date_max=str(dmax.date()) if pd.notnull(dmax) else "–", columns=", ".join(list(df.columns)[:20]))

def compute_log_ret(close: pd.Series) -> pd.Series:
    return np.log(close / close.shift(1))

def forward_return(close: pd.Series, h: int) -> pd.Series:
    return (close.shift(-h) / close) - 1.0

def label_3c(ret_fwd: pd.Series, band: float) -> pd.Series:
    # Converter para float numpy, tratar NaNs explicitamente para evitar ambiguidade com pd.NA
    vals = pd.to_numeric(ret_fwd, errors="coerce").astype(float).to_numpy()
    out = np.where(vals < -band, "CAI", np.where(vals > band, "SOBE", "MANTEM")).astype(object)
    mask_nan = ~np.isfinite(vals)
    if mask_nan.any():
        out[mask_nan] = np.nan
    return pd.Series(out, index=ret_fwd.index, dtype="object")

def month_add(d: pd.Timestamp, months: int) -> pd.Timestamp:
    return d + pd.DateOffset(months=months)

def build_walk_forward_splits(df: pd.DataFrame) -> List[Dict[str, pd.Timestamp]]:
    dates = pd.to_datetime(df["__date__"])
    start = dates.min().normalize(); end = dates.max().normalize()
    if pd.isna(start) or pd.isna(end):
        raise ValueError("VALIDATION_ERROR: datas inválidas para walk-forward.")
    train_end = month_add(start, min_train_months) - pd.DateOffset(days=1)
    if train_end >= end:
        raise ValueError("VALIDATION_ERROR: série insuficiente para treino mínimo de 18 meses.")
    folds = []
    test_start = train_end + pd.DateOffset(days=1)
    test_end = month_add(test_start, test_months) - pd.DateOffset(days=1)
    while test_start <= end and len(folds) < max_folds:
        if test_end > end: test_end = end
        val_end = test_start - pd.DateOffset(days=1)
        val_start = month_add(val_end, -val_months) + pd.DateOffset(days=1)
        tr_start = start; tr_end = val_start - pd.DateOffset(days=1)
        if tr_start >= tr_end or val_start > val_end or test_start > test_end: break
        folds.append(dict(train_start=tr_start, train_end=tr_end, val_start=val_start, val_end=val_end, test_start=test_start, test_end=test_end))
        test_start = test_end + pd.DateOffset(days=1)
        test_end = month_add(test_start, test_months) - pd.DateOffset(days=1)
    if len(folds) == 0:
        raise ValueError("VALIDATION_ERROR: não foi possível construir folds walk-forward.")
    return folds

def subset(df: pd.DataFrame, a: pd.Timestamp, b: pd.Timestamp) -> pd.DataFrame:
    return df.loc[(df["__date__"] >= a) & (df["__date__"] <= b)].copy()

def build_xgb_features(df: pd.DataFrame, W: int) -> pd.DataFrame:
    out = df.copy()
    for lag in range(1, W + 1):
        out[f"ret1_lag_{lag}"] = out["ret1"].shift(lag)
    out[f"ret1_roll_mean_{W}"] = out["ret1"].rolling(W).mean()
    out[f"ret1_roll_std_{W}"] = out["ret1"].rolling(W).std()
    return out.dropna().copy()

def build_lstm_panel(df: pd.DataFrame, W: int) -> pd.DataFrame:
    out = df.copy()
    out[f"ret1_roll_mean_{W}"] = out["ret1"].rolling(W).mean()
    out[f"ret1_roll_std_{W}"] = out["ret1"].rolling(W).std()
    return out.dropna().copy()

def to_sequences(df: pd.DataFrame, feat_cols: List[str], label_col: str, W: int) -> Tuple[np.ndarray, np.ndarray]:
    Xl, yl = [], []
    V = df[feat_cols].values; yv = df[label_col].values
    for i in range(W, len(df)):
        Xl.append(V[i-W:i, :]); yl.append(yv[i])
    if not Xl:
        return np.empty((0, W, len(feat_cols))), np.empty((0,), dtype=int)
    return np.stack(Xl, axis=0), np.array(yl, dtype=int)

def build_lstm_model(n_features: int, W: int) -> Sequential:
    model = Sequential()
    model.add(Input(shape=(W, n_features)))
    model.add(LSTM(lstm_units))
    model.add(Dropout(lstm_dropout))
    model.add(Dense(3, activation="softmax"))
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

# =========================
# Execução principal da célula
# =========================

print(f"[{now_ts()}] Início — Classificação 3C (SUBIR/MANTER/CAIR) — XGB vs LSTM")

# Detectar caminho
path, tier = detect_path(tier_paths)
if path is None:
    raise RuntimeError("CHECKLIST_FAILURE: Nenhum caminho disponível em GOLD/SILVER.")

# Ler dataset
df_raw = read_parquet_any(path)
dcol, pcol = detect_date_price_cols(df_raw)
if dcol is None or pcol is None:
    raise RuntimeError("VALIDATION_ERROR: não foi possível detectar colunas de data/preço.")

df = ensure_datetime(df_raw, dcol)
df = df.dropna(subset=[pcol]).copy()
df["ret1"] = compute_log_ret(df[pcol])

# Rótulos 3 classes
class_order = ["CAI","MANTEM","SOBE"]
y_cols: Dict[int, str] = {}
for h in horizons:
    df[f"ret_fwd_{h}"] = forward_return(df[pcol], h)
    df[f"y_h{h}_3c"] = label_3c(df[f"ret_fwd_{h}"], neutral_band)
    y_cols[h] = f"y_h{h}_3c"

# Prova de leitura
proof = (df.shape[0], str(pd.to_datetime(df["__date__"]).min().date()), str(pd.to_datetime(df["__date__"]).max().date()))
print(f"SSOT: {path} (tier={tier}) | linhas={proof[0]} | datas=[{proof[1]} → {proof[2]}] | cols={list(df.columns)[:12]}...")

# Splits
splits = build_walk_forward_splits(df)
print(f"Folds construídos: {len(splits)} (treino 18m, val 3m, teste 6m)")

# Painéis por janela
xgb_panels: Dict[int, pd.DataFrame] = {}
lstm_panels: Dict[int, pd.DataFrame] = {}
for W in windows:
    xgb_panels[W] = build_xgb_features(df[["__date__","ret1"]].copy(), W).join(
        df[[c for c in df.columns if c.startswith("ret_fwd_") or c.startswith("y_h")]], how="left")
    lstm_panels[W] = build_lstm_panel(df[["__date__","ret1"]].copy(), W).join(
        df[[c for c in df.columns if c.startswith("ret_fwd_") or c.startswith("y_h")]], how="left")

rows = []
conf_store: Dict[Tuple[str,int,int], List[np.ndarray]] = {}
skipped = []

for h in horizons:
    ycol = y_cols[h]
    for W in windows:
        # XGBoost
        dfx = xgb_panels[W].dropna(subset=["ret1", ycol]).copy()
        if not dfx.empty:
            feature_cols = [c for c in dfx.columns if c.startswith("ret1_lag_") or c in [f"ret1_roll_mean_{W}", f"ret1_roll_std_{W}"]]
            dfx["y_int"] = pd.Categorical(dfx[ycol], categories=class_order).codes
            if (dfx["y_int"] >= 0).all():
                for fi, s in enumerate(splits, 1):
                    tr = subset(dfx, s["train_start"], s["train_end"])
                    va = subset(dfx, s["val_start"], s["val_end"])
                    te = subset(dfx, s["test_start"], s["test_end"])
                    if len(tr)==0 or len(va)==0 or len(te)==0:
                        skipped.append(f"XGB h={h}, W={W}, fold={fi} sem dados — skip")
                        continue
                    try:
                        clf = XGBClassifier(**xgb_params, num_class=3)
                        clf.fit(
                            tr[feature_cols].values, tr["y_int"].values,
                            eval_set=[(va[feature_cols].values, va["y_int"].values)],
                            early_stopping_rounds=xgb_early_stopping_rounds,
                            verbose=False
                        )
                        proba = clf.predict_proba(te[feature_cols].values)
                        y_pred = np.argmax(proba, axis=1)
                        y_true = te["y_int"].values
                        acc_total = float(accuracy_score(y_true, y_pred))
                        cm = confusion_matrix(y_true, y_pred, labels=[0,1,2])
                        # per-class
                        def _pc(cm):
                            res = {}
                            for i, nm in enumerate(["cai","mantem","sobe"]):
                                denom = cm[i,:].sum(); res[f"acc_{nm}"] = (cm[i,i]/denom) if denom>0 else np.nan
                            return res
                        pc = _pc(cm)
                        rows.append(dict(model="XGBoost", horizon=h, window=W, fold=fi, acc_total=acc_total, **pc))
                        conf_store.setdefault(("XGBoost", h, W), []).append(cm)
                    except Exception as e:
                        skipped.append(f"XGB h={h}, W={W}, fold={fi} erro: {e}")
        else:
            skipped.append(f"XGB h={h}, W={W} sem amostras — skip")

        # LSTM
        dfl = lstm_panels[W].dropna(subset=["ret1", ycol]).copy()
        if dfl.empty:
            skipped.append(f"LSTM h={h}, W={W} sem amostras — skip")
            continue
        dfl["y_int"] = pd.Categorical(dfl[ycol], categories=class_order).codes
        if (dfl["y_int"] < 0).any():
            skipped.append(f"LSTM h={h}, W={W} labels inválidos — skip")
            continue
        feat_cols = ["ret1", f"ret1_roll_mean_{W}", f"ret1_roll_std_{W}"]
        for fi, s in enumerate(splits, 1):
            tr = subset(dfl, s["train_start"], s["train_end"])
            va = subset(dfl, s["val_start"], s["val_end"])
            te = subset(dfl, s["test_start"], s["test_end"])
            if len(tr) < W+5 or len(va) < W+5 or len(te) < W+5:
                skipped.append(f"LSTM h={h}, W={W}, fold={fi} janelas insuficientes — skip")
                continue
            # Escala sem vazamento
            scaler = StandardScaler().fit(tr[feat_cols].values)
            tr_s = tr.copy(); va_s = va.copy(); te_s = te.copy()
            tr_s[feat_cols] = scaler.transform(tr[feat_cols].values)
            va_s[feat_cols] = scaler.transform(va[feat_cols].values)
            te_s[feat_cols] = scaler.transform(te[feat_cols].values)
            # Sequências
            def to_seq(dfz):
                Xl, yl = [], []
                V = dfz[feat_cols].values; yv = dfz["y_int"].values
                for i in range(W, len(dfz)):
                    Xl.append(V[i-W:i, :]); yl.append(yv[i])
                if not Xl: return np.empty((0,W,len(feat_cols))), np.empty((0,), dtype=int)
                return np.stack(Xl, axis=0), np.array(yl, dtype=int)
            Xtr, ytr = to_seq(tr_s); Xva, yva = to_seq(va_s); Xte, yte = to_seq(te_s)
            if Xtr.shape[0]==0 or Xva.shape[0]==0 or Xte.shape[0]==0:
                skipped.append(f"LSTM h={h}, W={W}, fold={fi} sequências insuficientes — skip")
                continue
            try:
                tf.keras.backend.clear_session()
                model = build_lstm_model(n_features=len(feat_cols), W=W)
                es = EarlyStopping(monitor="val_loss", mode="min", patience=lstm_patience, restore_best_weights=True, verbose=0)
                model.fit(Xtr, ytr, validation_data=(Xva, yva), epochs=lstm_epochs, batch_size=lstm_batch_size, callbacks=[es], verbose=0)
                proba = model.predict(Xte, verbose=0)
                y_pred = np.argmax(proba, axis=1); y_true = yte
                acc_total = float(accuracy_score(y_true, y_pred))
                cm = confusion_matrix(y_true, y_pred, labels=[0,1,2])
                def _pc(cm):
                    res = {}
                    for i, nm in enumerate(["cai","mantem","sobe"]):
                        denom = cm[i,:].sum(); res[f"acc_{nm}"] = (cm[i,i]/denom) if denom>0 else np.nan
                    return res
                pc = _pc(cm)
                rows.append(dict(model="LSTM", horizon=h, window=W, fold=fi, acc_total=acc_total, **pc))
                conf_store.setdefault(("LSTM", h, W), []).append(cm)
            except Exception as e:
                skipped.append(f"LSTM h={h}, W={W}, fold={fi} erro: {e}")

# Consolidação
if not rows:
    raise RuntimeError("CHECKLIST_FAILURE: nenhuma combinação gerou resultados.")
res = pd.DataFrame(rows).sort_values(["model","horizon","window","fold"]) 
agg = res.groupby(["horizon","model","window"], as_index=False).agg(
    acc_total_mean=("acc_total","mean"), acc_total_std=("acc_total","std"),
    acc_cai_mean=("acc_cai","mean"), acc_mantem_mean=("acc_mantem","mean"), acc_sobe_mean=("acc_sobe","mean"),
    folds=("fold","nunique")
)

# Saída por horizonte
for h in horizons:
    sub = agg[agg["horizon"]==h].copy().sort_values(["model","window"]) 
    print(f"\nRESUMO — D+{h} (teste): modelo × janela")
    if sub.empty:
        print("–")
    else:
        for c in ["acc_total_mean","acc_total_std","acc_cai_mean","acc_mantem_mean","acc_sobe_mean"]:
            if c in sub.columns: sub[c] = sub[c].astype(float)
        cols = ["model","window","acc_total_mean","acc_total_std","acc_cai_mean","acc_mantem_mean","acc_sobe_mean","folds"]
        print(sub[cols].fillna("–").to_string(index=False))
    top = sub.sort_values("acc_total_mean", ascending=False).head(3)
    print(f"\nTOP-3 — D+{h} (teste)")
    print("–" if top.empty else top[cols].fillna("–").to_string(index=False))
    if not top.empty:
        br = top.iloc[0]
        key = (br["model"], int(h), int(br["window"]))
        cms = conf_store.get(key, [])
        if cms:
            cm_sum = np.sum(np.stack(cms, axis=0), axis=0)
            print(f"\nMATRIZ DE CONFUSÃO — melhor combinação D+{h} (modelo={br['model']}, janela={int(br['window'])})")
            header = ["", "pred_CAI", "pred_MANTEM", "pred_SOBE"]
            print("{:<12s}{:>10s}{:>12s}{:>10s}".format(*header))
            for i, cls in enumerate(["true_CAI","true_MANTEM","true_SOBE"]):
                print("{:<12s}{:>10d}{:>12d}{:>10d}".format(cls, int(cm_sum[i,0]), int(cm_sum[i,1]), int(cm_sum[i,2])))
        else:
            print("\nMATRIZ DE CONFUSÃO — melhor combinação D+{h}: –")

# Checklist
processed_h = sorted(set(int(h) for h in res["horizon"].unique()))
processed_w = sorted(set(int(w) for w in res["window"].unique()))
print("\nCHECKLIST — Execução (dry_run)")
print(f"- SSOT usado: {path} (tier={tier})")
print(f"- Horizontes processados: {processed_h}")
print(f"- Janelas processadas: {processed_w}")
print(f"- Folds processados (máximo por combinação): {len(splits)}")
for h in horizons:
    ok = (agg["horizon"]==h).any(); print(f"- Tabela resumo D+{h}: {'OK' if ok else '–'}")
print(f"- dry_run: {dry_run} (nenhum arquivo salvo)")

print(f"\n[{now_ts()}] Fim — Classificação 3C (dry_run={dry_run})")

[2025-09-19 16:43:58] Início — Classificação 3C (SUBIR/MANTER/CAIR) — XGB vs LSTM
SSOT: /home/wrm/BOLSA_2026/gold/IBOV_gold.parquet (tier=GOLD) | linhas=3400 | datas=[2012-01-03 → 2025-09-19] | cols=['date', 'open', 'high', 'low', 'close', 'volume', 'ticker', 'open_norm', 'high_norm', 'low_norm', 'close_norm', 'volume_norm']...
Folds construídos: 10 (treino 18m, val 3m, teste 6m)

RESUMO — D+1 (teste): modelo × janela
model  window  acc_total_mean  acc_total_std  acc_cai_mean  acc_mantem_mean  acc_sobe_mean  folds
 LSTM       5        0.430420       0.075333      0.825172              0.0       0.167066     10
 LSTM      10        0.423155       0.048841      0.794535              0.0       0.197474     10
 LSTM      15        0.430592       0.050223      0.754384              0.0       0.264651     10

TOP-3 — D+1 (teste)
model  window  acc_total_mean  acc_total_std  acc_cai_mean  acc_mantem_mean  acc_sobe_mean  folds
 LSTM      15        0.430592       0.050223      0.754384         

## CAI vs NÃO CAI com prioridade para CAI e pisos por horizonte — IBOV SSOT

In [6]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
INSTRUÇÃO V1.2/V1.2.1 — CAI vs NÃO CAI (prioridade: PRECISÃO de CAI)
- SSOT: GOLD apenas.
- Walk-forward: 10 folds (treino ≥18m, val 3m, teste 6m) sem vazamento.
- Modelos: LSTM compacto; XGBoost com branch por versão (>=1.7.6: sklearn+early_stopping_rounds no fit; <1.7.6: xgb.train). scale_pos_weight no treino do fold.
- Features enxutas e estáveis; padronização fit no treino por fold.
- Probabilidades calibradas (Platt; fallback Isotonic) com fit em VAL e aplicadas em VAL/TESTE.
- Pisos: D+1 fixo=0.82; D+3/D+5 por VAL com N_min=10 e clip [0.70,0.85].
- Busca de limiar em VAL maximizando recall sob: piso, N_min, flood_guard (pred_CAI_rate ≤ 0.50). Sem relaxes.
- Elegibilidade no TESTE agregado: piso, cobertura mínima, flood_guard, sanity de métricas.
- Threshold operacional = mediana dos thresholds dos folds elegíveis (por horizonte). Sequência final só se houver elegíveis.
- Baselines: Sempre_NAO_CAI, ProporcaoTreino>0.5, SinalOntem, Momentum_3d.
- Repetição de erros: abortar após ≥3 mensagens idênticas por etapa e perguntar ação.
- Painel final (markdown) 1 página com status por horizonte + resumo executivo.
- dry_run=True: nada salvo.
"""

import os
import sys
import math
import time
import warnings
from typing import List, Dict, Tuple, Optional, Any

# Seeds e ambiente (antes dos imports de TF)
SEED_NUMPY = 2025
SEED_TF = 42
SEED_PY = 7

os.environ["PYTHONHASHSEED"] = str(SEED_PY)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # força CPU e silencia erros CUDA

import random
random.seed(SEED_PY)

import numpy as np
np.random.seed(SEED_NUMPY)

import pandas as pd

warnings.filterwarnings("ignore", category=FutureWarning)
pd.set_option("display.width", 180)
pd.set_option("display.max_columns", 160)

_missing = []
try:
    import xgboost as xgb
    from xgboost import XGBClassifier
    XGB_VERSION = getattr(xgb, "__version__", "0")
except Exception as e:
    _missing.append(f"xgboost ({e})")
    XGB_VERSION = "0"

try:
    import tensorflow as tf
    from tensorflow.keras.models import Sequential  # pyright: ignore
    from tensorflow.keras.layers import LSTM, Dense, Dropout, Input  # pyright: ignore
    from tensorflow.keras.callbacks import EarlyStopping  # pyright: ignore
    tf.random.set_seed(SEED_TF)
except Exception as e:
    _missing.append(f"tensorflow/keras ({e})")

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    confusion_matrix,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
)
from sklearn.linear_model import LogisticRegression
from sklearn.isotonic import IsotonicRegression

# =========================
# Parâmetros
# =========================

dry_run: bool = True

# GOLD apenas
GOLD_PATH = "/home/wrm/BOLSA_2026/gold/IBOV_gold.parquet"

windows: List[int] = [5, 10, 15]
horizons: List[int] = [1, 3, 5]

# Validação temporal
train_min_months: int = 18
val_months: int = 3
test_months: int = 6
max_folds: int = 10

# Priorização de CAI
precision_floor: Dict[str, Optional[float]] = {"D+1": 0.82, "D+3": None, "D+5": None}
coverage_min_rate: float = 0.10
coverage_min_count: int = 8
threshold_grid: List[float] = [i/100.0 for i in range(10, 91)]  # 0.10 → 0.90
N_min_preds_val: int = 10
FLOOD_GUARD_MAX: float = 0.50  # pred_CAI_rate máximo em VAL/TESTE

# Modelos
xgb_params = dict(
    max_depth=5,
    learning_rate=0.05,
    n_estimators=2000,
    subsample=0.9,
    colsample_bytree=0.9,
    objective="binary:logistic",
    eval_metric="logloss",
    tree_method="hist",
    random_state=SEED_PY,
    n_jobs=max(1, (os.cpu_count() or 2) - 1),
)
xgb_early_stopping_rounds: int = 50

lstm_units: int = 48
lstm_dropout: float = 0.2
lstm_epochs: int = 50
lstm_batch_size: int = 32
lstm_patience: int = 5

# =========================
# Utilidades
# =========================

def now_ts() -> str:
    return time.strftime("%Y-%m-%d %H:%M:%S")

def summarize_df(df: pd.DataFrame) -> Dict[str, str]:
    rows, cols = df.shape
    dmin = pd.to_datetime(df["__date__"]).min(); dmax = pd.to_datetime(df["__date__"]).max()
    return dict(
        row_count=str(rows),
        date_min=str(getattr(dmin, 'date', lambda: '-')()) if pd.notnull(dmin) else "-",
        date_max=str(getattr(dmax, 'date', lambda: '-')()) if pd.notnull(dmax) else "-",
        columns=", ".join(list(df.columns)[:25]) + (" ..." if df.shape[1] > 25 else "")
    )

def ensure_datetime(df: pd.DataFrame) -> pd.DataFrame:
    # tenta usar coluna 'date' ou índice datetime
    if "date" in df.columns:
        out = df.copy()
        out["date"] = pd.to_datetime(out["date"], errors="coerce")
        out = out.dropna(subset=["date"]).sort_values("date")
        out["__date__"] = out["date"].values
        return out
    if isinstance(df.index, pd.DatetimeIndex):
        out = df.sort_index().copy(); out["__date__"] = out.index
        return out
    raise ValueError("VALIDATION_ERROR: coluna 'date' ausente e índice não é DatetimeIndex.")

def month_add(d: pd.Timestamp, months: int) -> pd.Timestamp:
    return d + pd.DateOffset(months=months)

def build_walk_forward_splits(df: pd.DataFrame) -> List[Dict[str, pd.Timestamp]]:
    dates = pd.to_datetime(df["__date__"])  # type: ignore
    start = dates.min().normalize(); end = dates.max().normalize()
    train_end = month_add(start, train_min_months) - pd.DateOffset(days=1)
    folds = []
    test_start = train_end + pd.DateOffset(days=1)
    test_end = month_add(test_start, test_months) - pd.DateOffset(days=1)
    while test_start <= end and len(folds) < max_folds:
        if test_end > end: test_end = end
        val_end = test_start - pd.DateOffset(days=1)
        val_start = month_add(val_end, -val_months) + pd.DateOffset(days=1)
        tr_start = start; tr_end = val_start - pd.DateOffset(days=1)
        if tr_start >= tr_end or val_start > val_end or test_start > test_end:
            break
        folds.append(dict(
            train_start=tr_start, train_end=tr_end,
            val_start=val_start, val_end=val_end,
            test_start=test_start, test_end=test_end,
        ))
        test_start = test_end + pd.DateOffset(days=1)
        test_end = month_add(test_start, test_months) - pd.DateOffset(days=1)
    if not folds:
        raise ValueError("VALIDATION_ERROR: não foi possível construir folds walk-forward.")
    return folds

def subset(df: pd.DataFrame, a: pd.Timestamp, b: pd.Timestamp) -> pd.DataFrame:
    return df.loc[(df["__date__"] >= a) & (df["__date__"] <= b)].copy()

# =========================
# Features
# =========================

def compute_log_ret(close: pd.Series) -> pd.Series:
    return np.log(close / close.shift(1))

def prepare_features(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()
    # Reqs: close, high, low
    for c in ("close", "high", "low"):
        if c not in out.columns:
            raise ValueError(f"VALIDATION_ERROR: coluna obrigatória ausente: {c}")
    out["ret1"] = compute_log_ret(out["close"])
    # lags até 10
    for lag in range(1, 11):
        out[f"ret1_lag_{lag}"] = out["ret1"].shift(lag)
    # rolling mean/std para 5/10/15 e z-scores de ret1
    for W in (5, 10, 15):
        out[f"ret_roll_mean_{W}"] = out["ret1"].rolling(W).mean()
        out[f"ret_roll_std_{W}"] = out["ret1"].rolling(W).std()
        out[f"zscore_ret_{W}"] = (out["ret1"] - out[f"ret_roll_mean_{W}"]) / out[f"ret_roll_std_{W}"]
    # vol20d para farol
    out["vol20d"] = out["ret1"].rolling(20).std()
    # MA50 e pos_ma50
    out["ma50"] = out["close"].rolling(50).mean()
    out["pos_ma50"] = ((out["close"] > out["ma50"]).astype(float)).where(out["ma50"].notna(), np.nan)
    # range compression (true range normalizado) 5/10
    for W in (5, 10):
        hi = out["high"].rolling(W).max()
        lo = out["low"].rolling(W).min()
        mid = out["close"].rolling(W).mean()
        out[f"tr_norm_{W}"] = (hi - lo) / (mid.replace(0, np.nan))
    # forward returns
    for h in horizons:
        out[f"ret_fwd_{h}"] = (out["close"].shift(-h) / out["close"]) - 1.0
        out[f"y_h{h}_bin"] = (pd.to_numeric(out[f"ret_fwd_{h}"], errors="coerce") < 0).astype("Int8")
    return out

# =========================
# Modelos e Calibração
# =========================

def build_lstm_model(n_features: int, W: int) -> Sequential:
    model = Sequential()
    model.add(Input(shape=(W, n_features)))
    model.add(LSTM(lstm_units))
    model.add(Dropout(lstm_dropout))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model

def make_calibrator(y_val: np.ndarray, p_val: np.ndarray):
    yv = np.asarray(y_val).astype(int)
    pv = np.asarray(p_val).astype(float).reshape(-1, 1)
    if len(np.unique(yv)) < 2 or len(yv) < 5:
        return (lambda x: np.asarray(x, dtype=float)), "none"
    # Platt
    try:
        lr = LogisticRegression(max_iter=1000, solver="lbfgs")
        lr.fit(pv, yv)
        def f(x):
            xv = np.asarray(x).astype(float).reshape(-1, 1)
            return lr.predict_proba(xv)[:, 1]
        return f, "platt"
    except Exception:
        pass
    # Isotonic fallback
    try:
        iso = IsotonicRegression(out_of_bounds="clip")
        iso.fit(np.asarray(p_val).astype(float), yv)
        def g(x):
            return iso.predict(np.asarray(x).astype(float))
        return g, "isotonic"
    except Exception:
        return (lambda x: np.asarray(x, dtype=float)), "none"

# =========================
# Métricas, sanity e seleção de threshold
# =========================

def cm_metrics(cm: np.ndarray) -> Tuple[float, float, float, float]:
    TP = float(cm[0,0]); FP = float(cm[0,1]); FN = float(cm[1,0]); TN = float(cm[1,1])
    prec = TP / max(1.0, (TP + FP))
    rec = TP / max(1.0, (TP + FN))
    acc = (TP + TN) / max(1.0, (TP + FP + FN + TN))
    f1 = (2*prec*rec) / max(1e-12, (prec + rec)) if (prec + rec) > 0 else 0.0
    return prec, rec, f1, acc

def binary_eval(y_true: np.ndarray, y_score: np.ndarray, thr: float) -> Dict[str, Any]:
    y_pred = (y_score >= thr).astype(int)
    cm = confusion_matrix(y_true, y_pred, labels=[1,0])
    prec_lib = float(precision_score(y_true, y_pred, zero_division=0))
    rec_lib = float(recall_score(y_true, y_pred, zero_division=0))
    f1_lib = float(f1_score(y_true, y_pred, zero_division=0))
    acc_lib = float(accuracy_score(y_true, y_pred))
    prec_cm, rec_cm, f1_cm, acc_cm = cm_metrics(cm)
    sanity_ok = (abs(prec_lib - prec_cm) < 1e-6) and (abs(rec_lib - rec_cm) < 1e-6)
    cover_rate = float((y_pred == 1).mean()) if len(y_pred) else 0.0
    cover_count = int((y_pred == 1).sum()) if len(y_pred) else 0
    return dict(
        cm=cm, precision=prec_lib, recall=rec_lib, f1=f1_lib, acc=acc_lib,
        precision_cm=prec_cm, recall_cm=rec_cm, f1_cm=f1_cm, acc_cm=acc_cm,
        sanity_ok=sanity_ok, coverage_rate=cover_rate, coverage_count=cover_count
    )

def select_threshold_val(yv: np.ndarray, pv: np.ndarray, floor: float) -> Tuple[Optional[float], Dict[str, Any]]:
    best = None
    best_metrics = None
    for thr in threshold_grid:
        m = binary_eval(yv, pv, thr)
        if (m["precision"] >= floor and m["coverage_count"] >= N_min_preds_val and m["coverage_rate"] <= FLOOD_GUARD_MAX):
            if best is None:
                best, best_metrics = thr, m
            else:
                # Priorizar recall, desempate F1, depois ACC
                cur = best_metrics
                if (m["recall"] > cur["recall"]) or \
                   (m["recall"] == cur["recall"] and m["f1"] > cur["f1"]) or \
                   (m["recall"] == cur["recall"] and m["f1"] == cur["f1"] and m["acc"] > cur["acc"]):
                    best, best_metrics = thr, m
    return best, (best_metrics or {})

# =========================
# XGBoost — branch por versão
# =========================

def parse_version_tuple(v: str) -> Tuple[int,int,int]:
    parts = (v or "0").split(".")
    nums = []
    for p in parts[:3]:
        n = ''.join(ch for ch in p if ch.isdigit())
        try:
            nums.append(int(n) if n != '' else 0)
        except Exception:
            nums.append(0)
    while len(nums) < 3:
        nums.append(0)
    return tuple(nums[:3])  # type: ignore

XGB_TUP = parse_version_tuple(XGB_VERSION)
XGB_GE_1_7_6 = (XGB_TUP[0] > 1) or (XGB_TUP[0] == 1 and (XGB_TUP[1] > 7 or (XGB_TUP[1] == 7 and XGB_TUP[2] >= 6)))

# =========================
# Execução principal
# =========================

def main():
    print(f"[{now_ts()}] Início — CAI vs NÃO CAI — V1.2/V1.2.1 | Seeds: numpy={SEED_NUMPY}, tf={SEED_TF}, py={SEED_PY}")
    if _missing:
        print(f"CHECKLIST_FAILURE: dependências ausentes -> {', '.join(_missing)}")
        return
    # SSOT GOLD
    if not os.path.exists(GOLD_PATH):
        print("CHECKLIST_FAILURE: GOLD ausente no SSOT.")
        return
    try:
        df_raw = pd.read_parquet(GOLD_PATH)
    except Exception as e:
        print(f"VALIDATION_ERROR: falha ao ler GOLD: {e}")
        return
    df = ensure_datetime(df_raw)
    # Features
    df = prepare_features(df)

    # PROVA SSOT
    proof = summarize_df(df)
    print("\n[PROVA SSOT]")
    print(f"- Caminho: {GOLD_PATH} (tier=GOLD)")
    print(f"- Linhas: {proof['row_count']} | date_min: {proof['date_min']} | date_max: {proof['date_max']}")
    print(f"- Colunas (amostra): {proof['columns']}")

    # Splits
    splits = build_walk_forward_splits(df)
    print("\n[WALK-FORWARD — Folds]")
    for i, s in enumerate(splits, 1):
        print(f"Fold {i:02d} | train[{str(s['train_start'].date())} → {str(s['train_end'].date())}] | val[{str(s['val_start'].date())} → {str(s['val_end'].date())}] | test[{str(s['test_start'].date())} → {str(s['test_end'].date())}]")

    # Estruturas
    preds_val: Dict[Tuple[str,int,int,int], Tuple[np.ndarray, np.ndarray, Dict[str, Any]]] = {}
    preds_tst: Dict[Tuple[str,int,int,int], Tuple[np.ndarray, np.ndarray, np.ndarray]] = {}
    per_fold_info: Dict[Tuple[str,int,int,int], Dict[str, Any]] = {}
    error_counts: Dict[str, int] = {}
    abort_xgb: bool = False

    # Loop h, W, fold
    for h in horizons:
        ycol = f"y_h{h}_bin"
        for W in windows:
            # lista de features base para XGB
            feat_cols = [
                # lags ret1
                *[f"ret1_lag_{lag}" for lag in range(1, 11)],
                # rolling mean/std/zscore
                *[f"ret_roll_mean_{k}" for k in (5,10,15)],
                *[f"ret_roll_std_{k}" for k in (5,10,15)],
                *[f"zscore_ret_{k}" for k in (5,10,15)],
            ]
            # faróis binários dependem do treino: vol20d_high e range_compression_{5,10}
            for fi, s in enumerate(splits, 1):
                tr = subset(df, s["train_start"], s["train_end"])  # inclui __date__
                va = subset(df, s["val_start"], s["val_end"])
                te = subset(df, s["test_start"], s["test_end"]) 
                # construir faróis a partir do treino
                vol_med = float(tr["vol20d"].median()) if len(tr) else np.nan
                tr["vol20d_high"] = (tr["vol20d"] >= vol_med).astype(int)
                va["vol20d_high"] = (va["vol20d"] >= vol_med).astype(int)
                te["vol20d_high"] = (te["vol20d"] >= vol_med).astype(int)
                # range compression: abaixo ou igual à mediana do treino => 1
                for k in (5, 10):
                    med = float(tr[f"tr_norm_{k}"].median()) if len(tr) else np.nan
                    tr[f"range_compression_{k}"] = (tr[f"tr_norm_{k}"] <= med).astype(int)
                    va[f"range_compression_{k}"] = (va[f"tr_norm_{k}"] <= med).astype(int)
                    te[f"range_compression_{k}"] = (te[f"tr_norm_{k}"] <= med).astype(int)
                # pos_ma50 já existe (0/1)
                feat_all = feat_cols + ["vol20d_high","pos_ma50","range_compression_5","range_compression_10"]
                # drop NaNs (janelas)
                trc = tr[["__date__", ycol] + feat_all].dropna().copy()
                vac = va[["__date__", ycol] + feat_all].dropna().copy()
                tec = te[["__date__", ycol] + feat_all].dropna().copy()
                if trc.empty or trc[ycol].isna().all():
                    # registrar e continuar
                    msg = f"train_empty:h={h},W={W},fold={fi}"
                    error_counts[msg] = error_counts.get(msg, 0) + 1
                    continue
                # padronização fit no treino
                scaler = StandardScaler().fit(trc[feat_all].values)
                Xtr = scaler.transform(trc[feat_all].values); ytr = trc[ycol].astype(int).values
                Xva = scaler.transform(vac[feat_all].values) if len(vac) else np.empty((0,len(feat_all)))
                yva = vac[ycol].astype(int).values if len(vac) else np.empty((0,), dtype=int)
                Xte = scaler.transform(tec[feat_all].values) if len(tec) else np.empty((0,len(feat_all)))
                yte = tec[ycol].astype(int).values if len(tec) else np.empty((0,), dtype=int)
                dates_te = tec["__date__"].values.astype("datetime64[ns]") if len(tec) else np.array([], dtype="datetime64[ns]")

                # scale_pos_weight
                pos = max(1, int((ytr == 1).sum()))
                neg = int((ytr == 0).sum())
                spw = float(neg / pos) if (pos + neg) > 0 else 1.0

                # ===== XGBoost (versão-branch) =====
                if not abort_xgb:
                    try:
                        if XGB_GE_1_7_6:
                            clf = XGBClassifier(**xgb_params, scale_pos_weight=spw)
                            # early_stopping_rounds no fit (SEM callbacks)
                            clf.fit(
                                Xtr, ytr,
                                eval_set=[(Xtr, ytr), (Xva, yva)],
                                early_stopping_rounds=xgb_early_stopping_rounds,
                                verbose=False,
                            )
                            best_iter = getattr(clf, "best_iteration", None)
                            if best_iter is not None:
                                p_val_raw = clf.predict_proba(Xva, iteration_range=(0, int(best_iter)+1))[:,1] if len(Xva) else np.array([])
                                p_tst_raw = clf.predict_proba(Xte, iteration_range=(0, int(best_iter)+1))[:,1] if len(Xte) else np.array([])
                            else:
                                p_val_raw = clf.predict_proba(Xva)[:,1] if len(Xva) else np.array([])
                                p_tst_raw = clf.predict_proba(Xte)[:,1] if len(Xte) else np.array([])
                        else:
                            # Fallback: xgb.train
                            dtr = xgb.DMatrix(Xtr, label=ytr)
                            dva = xgb.DMatrix(Xva, label=yva)
                            dte = xgb.DMatrix(Xte, label=yte)
                            params = {
                                'max_depth': xgb_params['max_depth'],
                                'eta': xgb_params['learning_rate'],
                                'subsample': xgb_params['subsample'],
                                'colsample_bytree': xgb_params['colsample_bytree'],
                                'objective': 'binary:logistic',
                                'eval_metric': 'logloss',
                                'tree_method': xgb_params['tree_method'],
                                'seed': SEED_PY,
                                'scale_pos_weight': spw,
                            }
                            booster = xgb.train(
                                params,
                                dtr,
                                num_boost_round=int(xgb_params['n_estimators']),
                                evals=[(dtr, 'train'), (dva, 'val')],
                                early_stopping_rounds=xgb_early_stopping_rounds,
                                verbose_eval=False,
                            )
                            best_iter = getattr(booster, 'best_iteration', None)
                            if best_iter is not None:
                                p_val_raw = booster.predict(dva, iteration_range=(0, int(best_iter)+1)) if len(yva) else np.array([])
                                p_tst_raw = booster.predict(dte, iteration_range=(0, int(best_iter)+1)) if len(yte) else np.array([])
                            else:
                                p_val_raw = booster.predict(dva) if len(yva) else np.array([])
                                p_tst_raw = booster.predict(dte) if len(yte) else np.array([])
                        # calibração
                        cal_fn, cal_m = make_calibrator(yva, p_val_raw)
                        p_val = cal_fn(p_val_raw)
                        p_tst = cal_fn(p_tst_raw)
                        preds_val[("XGB", W, h, fi)] = (yva, np.asarray(p_val, dtype=float), {"calibration": cal_m, "spw": spw})
                        preds_tst[("XGB", W, h, fi)] = (yte, np.asarray(p_tst, dtype=float), dates_te)
                        per_fold_info[("XGB", W, h, fi)] = {"scaler": "standard", "best_iteration": (int(best_iter) if best_iter is not None else None)}
                    except Exception as e:
                        msg = f"XGB_ERR:{type(e).__name__}:{str(e).strip()}"
                        error_counts[msg] = error_counts.get(msg, 0) + 1
                        if error_counts[msg] >= 3:
                            abort_xgb = True
                        # segue sem XGB neste fold
                # ===== LSTM =====
                try:
                    # montar painel para LSTM (ret1 + roll mean/std W)
                    lstm_df = df[["__date__","ret1", f"ret_roll_mean_{W}", f"ret_roll_std_{W}", ycol]].dropna().copy()
                    trl = subset(lstm_df, s["train_start"], s["train_end"])
                    val = subset(lstm_df, s["val_start"], s["val_end"])
                    tes = subset(lstm_df, s["test_start"], s["test_end"])
                    if len(trl) >= (W + 5) and len(val) >= (W + 5) and len(tes) >= (W + 5):
                        feat_l = ["ret1", f"ret_roll_mean_{W}", f"ret_roll_std_{W}"]
                        sc = StandardScaler().fit(trl[feat_l].values)
                        def to_seq(b: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
                            b2 = b.copy(); b2[feat_l] = sc.transform(b2[feat_l].values)
                            X, y = [], []
                            V = b2[feat_l].values; yv2 = b2[ycol].astype(int).values
                            for i in range(W, len(b2)):
                                X.append(V[i-W:i,:]); y.append(yv2[i])
                            dates = b2["__date__"].values[W:].astype("datetime64[ns]")
                            return (np.stack(X,0) if X else np.empty((0,W,len(feat_l)))), (np.array(y, int) if y else np.empty((0,), int)), dates
                        Xtr, ytr_l, _ = to_seq(trl)
                        Xva, yva_l, _ = to_seq(val)
                        Xte, yte_l, dte_l = to_seq(tes)
                        if Xtr.shape[0] and Xva.shape[0] and Xte.shape[0]:
                            tf.keras.backend.clear_session()
                            model = build_lstm_model(n_features=len(feat_l), W=W)
                            es = EarlyStopping(monitor="val_loss", mode="min", patience=lstm_patience, restore_best_weights=True, verbose=0)
                            model.fit(Xtr, ytr_l, validation_data=(Xva, yva_l), epochs=lstm_epochs, batch_size=lstm_batch_size, callbacks=[es], verbose=0)
                            p_val_raw = model.predict(Xva, verbose=0, batch_size=lstm_batch_size).reshape(-1)
                            p_tst_raw = model.predict(Xte, verbose=0, batch_size=lstm_batch_size).reshape(-1)
                            cal_fn, cal_m = make_calibrator(yva_l, p_val_raw)
                            preds_val[("LSTM", W, h, fi)] = (yva_l, np.asarray(cal_fn(p_val_raw), dtype=float), {"calibration": cal_m})
                            preds_tst[("LSTM", W, h, fi)] = (yte_l, np.asarray(cal_fn(p_tst_raw), dtype=float), dte_l)
                            per_fold_info[("LSTM", W, h, fi)] = {"scaler": "standard"}
                except Exception as e:
                    msg = f"LSTM_ERR:{type(e).__name__}:{str(e).strip()}"
                    error_counts[msg] = error_counts.get(msg, 0) + 1
                    # continua

    # Relatar avisos de repetição e perguntas
    repeat_msgs = [f"{k} (x{v})" for k,v in error_counts.items() if v >= 3]
    if repeat_msgs:
        print("\n[ERROS REPETIDOS — intervenção requerida]")
        for m in repeat_msgs:
            print(f"- {m}")
        if abort_xgb:
            print("- xgb_early_stop_unsupported? Considerar atualizar xgboost para >=1.7.6 ou usar fallback.")

    # Pisos D+3/D+5 via VAL com N_min e clip
    for h in [3, 5]:
        best_prec = 0.0
        for key, (yv, pv, meta) in preds_val.items():
            _, _, hh, _ = key
            if hh != h or len(yv) == 0:
                continue
            # varrer grade e pegar melhor precisão respeitando N_min
            for thr in threshold_grid:
                m = binary_eval(yv, pv, thr)
                if m["coverage_count"] >= N_min_preds_val:
                    if m["precision"] > best_prec:
                        best_prec = m["precision"]
        tag = f"D+{h}"
        if precision_floor.get(tag) is None:
            pf = round(best_prec, 2)
            precision_floor[tag] = float(np.clip(pf, 0.70, 0.85))
    print("\n[PISOS DE PRECISÃO — usados]")
    print(f"- D+1: {precision_floor['D+1']:.2f} (fixo)")
    print(f"- D+3: {precision_floor['D+3']:.2f}")
    print(f"- D+5: {precision_floor['D+5']:.2f}")
    print(f"- N_min_preds_val: {N_min_preds_val}; flood_guard ≤ {int(FLOOD_GUARD_MAX*100)}%")

    # Seleção de thresholds por fold (VAL) e avaliação no TESTE
    fold_rows: List[Dict[str, Any]] = []
    for key in sorted(preds_val.keys()):
        model, W, h, fi = key
        yv, pv, meta = preds_val[key]
        yt, pt, dt = preds_tst.get(key, (np.array([]), np.array([]), np.array([])))
        if len(yv) == 0 or len(yt) == 0:
            continue
        floor = precision_floor[f"D+{h}"] or 0.0
        thr, mval = select_threshold_val(yv, pv, floor)
        reasons = []
        if thr is None:
            reasons.append("piso/Nmin/flood_val")
            # Ainda assim, para registro no TESTE, usar threshold de melhor precisão com N_min se existir, senão o de maior precisão
            best_thr_tmp, best_prec_tmp, best_m_tmp = None, -1.0, None
            for t in threshold_grid:
                mv = binary_eval(yv, pv, t)
                if mv["coverage_count"] >= N_min_preds_val and mv["precision"] > best_prec_tmp:
                    best_prec_tmp, best_thr_tmp, best_m_tmp = mv["precision"], t, mv
            if best_thr_tmp is None:
                for t in threshold_grid:
                    mv = binary_eval(yv, pv, t)
                    if mv["precision"] > best_prec_tmp:
                        best_prec_tmp, best_thr_tmp, best_m_tmp = mv["precision"], t, mv
            thr = float(best_thr_tmp if best_thr_tmp is not None else 0.5)
            mval = best_m_tmp or {}
        mtest = binary_eval(yt, pt, thr)
        # sanity assert
        sanity_ok = mval.get("sanity_ok", True) and mtest.get("sanity_ok", True)
        if not sanity_ok:
            reasons.append("metric_swap_detected")
        # cobertura mínima & flood guard em TESTE
        cov_ok = (mtest["coverage_rate"] >= coverage_min_rate) or (mtest["coverage_count"] >= coverage_min_count)
        flood_ok = (mval.get("coverage_rate", 0.0) <= FLOOD_GUARD_MAX) and (mtest["coverage_rate"] <= FLOOD_GUARD_MAX)
        piso_ok = (mtest["precision"] >= floor)
        fold_eligible = bool(piso_ok and cov_ok and flood_ok and sanity_ok)
        fold_rows.append(dict(
            model=model, window=W, horizon=h, fold=fi, thr=thr,
            prec_val=mval.get("precision", np.nan), rec_val=mval.get("recall", np.nan), rate_val=mval.get("coverage_rate", np.nan),
            prec_test=mtest["precision"], rec_test=mtest["recall"], f1_test=mtest["f1"], acc_test=mtest["acc"],
            rate_test=mtest["coverage_rate"], n_pred_test=mtest["coverage_count"],
            cm_TP=int(mtest["cm"][0,0]), cm_FP=int(mtest["cm"][0,1]), cm_FN=int(mtest["cm"][1,0]), cm_TN=int(mtest["cm"][1,1]),
            fold_eligible=fold_eligible, reasons=",".join(reasons),
        ))

    if not fold_rows:
        print("CHECKLIST_FAILURE: nenhuma combinação produziu resultados.")
        return

    folds_df = pd.DataFrame(fold_rows)

    # Agregar por combinação/horizonte somando CMs e recomputando métricas
    agg_rows: List[Dict[str, Any]] = []
    thr_medians: Dict[Tuple[str,int,int], float] = {}
    for (h, m, W), grp in folds_df.groupby(["horizon","model","window"], as_index=False):
        TP = int(grp["cm_TP"].sum()); FP = int(grp["cm_FP"].sum()); FN = int(grp["cm_FN"].sum()); TN = int(grp["cm_TN"].sum())
        prec, rec, f1, acc = cm_metrics(np.array([[TP, FP],[FN, TN]], dtype=float))
        n_pred = int(grp["n_pred_test"].sum()); n_total = int((TP+FP+FN+TN))
        rate = float(n_pred / max(1, n_total)) if n_total > 0 else 0.0
        floor = precision_floor[f"D+{h}"] or 0.0
        # mediana de thresholds apenas entre folds elegíveis
        elig_thr = grp.loc[grp["fold_eligible"]==True, "thr"].values
        thr_med = float(np.median(elig_thr)) if len(elig_thr) else float("nan")
        thr_medians[(m, W, h)] = thr_med
        # checagens
        cov_ok = (rate >= coverage_min_rate) or (n_pred >= coverage_min_count)
        flood_ok = (grp["rate_val"].mean() <= FLOOD_GUARD_MAX) and (rate <= FLOOD_GUARD_MAX)
        piso_ok = (prec >= floor)
        sanity_ok = bool((folds_df[(folds_df["horizon"]==h)&(folds_df["model"]==m)&(folds_df["window"]==W)]["reasons"].str.contains("metric_swap_detected")).sum() == 0)
        eligible = bool(piso_ok and cov_ok and flood_ok and sanity_ok)
        reason_parts = []
        if not piso_ok: reason_parts.append("piso")
        if not cov_ok: reason_parts.append("cobertura")
        if not flood_ok: reason_parts.append("inundacao")
        if not sanity_ok: reason_parts.append("sanity")
        agg_rows.append(dict(horizon=h, model=m, window=W, TP=TP, FP=FP, FN=FN, TN=TN,
                             precisao_CAI=prec, recall_CAI=rec, F1_CAI=f1, acc=acc,
                             pred_CAI_rate=rate, num_pred_CAI=n_pred,
                             eligible=eligible, reason=",".join(reason_parts), folds=int(grp["fold"].nunique()),
                             threshold_median=thr_med))
    agg_df = pd.DataFrame(agg_rows).sort_values(["horizon","model","window"]) if agg_rows else pd.DataFrame()

    # Top-3 e melhores por horizonte
    best_by_h: Dict[int, Dict[str, Any]] = {}
    no_winner: Dict[int, bool] = {1: False, 3: False, 5: False}
    for h in horizons:
        sub = agg_df[agg_df["horizon"]==h].copy()
        elig = sub[sub["eligible"] == True].copy()
        print(f"\nRESUMO — D+{h} (TESTE agregado) — modelo × janela")
        if sub.empty:
            print("–")
        else:
            print(sub[["model","window","precisao_CAI","recall_CAI","F1_CAI","acc","pred_CAI_rate","eligible","reason","folds","threshold_median"]].to_string(index=False))
        print(f"\nTOP-3 — D+{h} (TESTE)")
        if elig.empty:
            print("–")
            no_winner[h] = True
        else:
            elig_sorted = elig.sort_values(["recall_CAI","F1_CAI","acc"], ascending=[False,False,False])
            top3 = elig_sorted.head(3).reset_index(drop=True)
            print(top3[["model","window","precisao_CAI","recall_CAI","F1_CAI","acc","pred_CAI_rate","folds","threshold_median"]].to_string(index=False))
            best = elig_sorted.iloc[0]
            best_by_h[h] = best.to_dict()
            # Matriz de confusão agregada do melhor
            print(f"\nMATRIZ DE CONFUSÃO — melhor combinação D+{h} (modelo={best['model']}, janela={int(best['window'])}) [CAI=1, N_CAI=0]")
            header = ["", "pred_CAI", "pred_NAO_CAI"]
            print("{:<14s}{:>10s}{:>14s}".format(*header))
            print("{:<14s}{:>10d}{:>14d}".format("true_CAI", int(best.get("TP",0)), int(best.get("FP",0))))
            print("{:<14s}{:>10d}{:>14d}".format("true_NAO_CAI", int(best.get("FN",0)), int(best.get("TN",0))))

    # Threshold operacional (medianas)
    threshold_operacional: Dict[int, float] = {}
    print("\nTHRESHOLD OPERACIONAL (mediana entre folds elegíveis)")
    for h in horizons:
        val = float(best_by_h[h]["threshold_median"]) if h in best_by_h and np.isfinite(best_by_h[h]["threshold_median"]) else float("nan")
        threshold_operacional[h] = val
        print(f"- D+{h}: {val if np.isfinite(val) else '–'}")

    # Sequência final — somente se houver elegíveis
    final_seq = []
    all_operable = all(h in best_by_h for h in horizons)
    if all_operable:
        # pegar último fold
        try:
            last_fold = max(int(k[3]) for k in preds_tst.keys()) if preds_tst else None
        except Exception:
            last_fold = None
        if last_fold is not None:
            for h in horizons:
                b = best_by_h[h]
                m, W = str(b["model"]), int(b["window"]) 
                thr_med = float(b.get("threshold_median", float("nan")))
                yt, pt, dt = preds_tst.get((m, W, h, last_fold), (np.array([]), np.array([]), np.array([])))
                if len(pt) == 0 or not np.isfinite(thr_med):
                    final_seq.append("—")
                else:
                    yhat = (pt >= thr_med).astype(int)
                    final_seq.append("CAI" if int(yhat[-1]) == 1 else "NÃO CAI")
        else:
            all_operable = False
    print("\nSEQUÊNCIA FINAL (último bloco de TESTE):")
    print(f"- (D+1, D+3, D+5) = {', '.join(final_seq) if all_operable and final_seq else '—'}")

    # Baselines — TESTE (médias por horizonte)
    baseline_rows = []
    for h in horizons:
        seen = set()
        for key, (yt, pt, dt) in preds_tst.items():
            m, W, hh, fi = key
            if hh != h or fi in seen or len(yt) == 0:
                continue
            seen.add(fi)
            n = len(yt)
            # Sempre NÃO CAI
            pred0 = np.zeros(n, dtype=int)
            cm0 = confusion_matrix(yt, pred0, labels=[1,0])
            p0, r0, f10, a0 = cm_metrics(cm0)
            baseline_rows.append(dict(horizon=h, baseline="Sempre_NAO_CAI", precision=p0, f1=f10))
            # ProporcaoTreino>0.5 via VAL proxy
            # pegar um yv do mesmo fold
            yv = None
            for k2, (yvv, pvv, meta) in preds_val.items():
                mm, WW, hhh, fii = k2
                if hhh == h and fii == fi and len(yvv) > 0:
                    yv = yvv
                    break
            if yv is not None:
                pred1 = np.ones(n, dtype=int) if float((yv == 1).mean()) > 0.5 else np.zeros(n, dtype=int)
                cm1 = confusion_matrix(yt, pred1, labels=[1,0])
                p1, r1, f11, a1 = cm_metrics(cm1)
                baseline_rows.append(dict(horizon=h, baseline="ProporcaoTreino>0.5", precision=p1, f1=f11))
            # Sinal de ontem
            ret1_map = pd.Series(df.set_index("__date__")["ret1"])  # t-1 < 0 => CAI
            pred2 = []
            for d in dt:
                prev = pd.to_datetime(d) - pd.Timedelta(days=1)
                v = ret1_map.get(prev, np.nan)
                pred2.append(1 if (pd.notna(v) and v < 0) else 0)
            pred2 = np.array(pred2, int)
            cm2 = confusion_matrix(yt, pred2, labels=[1,0])
            p2, r2, f12, a2 = cm_metrics(cm2)
            baseline_rows.append(dict(horizon=h, baseline="SinalOntem", precision=p2, f1=f12))
            # Momentum_3d (soma log ret últimos 3 < 0)
            mom3 = df["ret1"].rolling(3).sum().shift(1)
            mom_map = pd.Series(mom3.values, index=df["__date__"])  # alinhado
            pred3 = [1 if (pd.notna(mom_map.get(pd.to_datetime(d), np.nan)) and mom_map.get(pd.to_datetime(d)) < 0) else 0 for d in dt]
            pred3 = np.array(pred3, int)
            cm3 = confusion_matrix(yt, pred3, labels=[1,0])
            p3, r3, f13, a3 = cm_metrics(cm3)
            baseline_rows.append(dict(horizon=h, baseline="Momentum_3d", precision=p3, f1=f13))
    base_df = pd.DataFrame(baseline_rows)

    # Painel V1.2.1 — Uma página (markdown-like)
    print("\n[Painel — Checklist Operacional (V1.2.1)]")
    lines = []
    apto_map: Dict[int, str] = {}
    for h in horizons:
        lines.append(f"## HORIZONTE D+{h}")
        floor = precision_floor[f"D+{h}"] or 0.0
        thr_op = best_by_h[h]["threshold_median"] if h in best_by_h else "—"
        lines.append(f"Piso de Precisão(CAI): {floor:.2f}")
        lines.append(f"Threshold Operacional (mediana): {thr_op if isinstance(thr_op, str) or np.isnan(thr_op) else f'{thr_op:.2f}'}\n")
        if h in best_by_h:
            b = best_by_h[h]
            lines.append(f"Vencedor: {b['model']} — janela={int(b['window'])}")
            lines.append(f"Precisão(CAI): {b['precisao_CAI']:.2f}   Recall(CAI): {b['recall_CAI']:.2f}   F1(CAI): {b['F1_CAI']:.2f}   ACC: {b['acc']:.2f}")
            # taxas: usar médias aproximadas a partir de agg (VAL não diretamente disponível aqui, usar média dos folds do vencedor)
            sub = folds_df[(folds_df["horizon"]==h)&(folds_df["model"]==b['model'])&(folds_df["window"]==b['window'])]
            rate_val = float(sub["rate_val"].mean()) if len(sub) else float("nan")
            rate_test = float(b['pred_CAI_rate'])
            def pct(v):
                return f"{(v*100):.1f}%" if np.isfinite(v) else "—"
            lines.append(f"Pred_CAI_rate (VAL/Teste): {pct(rate_val)} / {pct(rate_test)}")
            lines.append(f"Confusão (Teste): TP={int(b.get('TP',0))}  FP={int(b.get('FP',0))}  FN={int(b.get('FN',0))}  TN={int(b.get('TN',0))}\n")
            # Checks
            piso_ok = (b['precisao_CAI'] >= floor)
            cov_ok = (b['pred_CAI_rate'] >= coverage_min_rate) or (int(b['num_pred_CAI']) >= coverage_min_count)
            flood_ok = (rate_val <= FLOOD_GUARD_MAX) and (rate_test <= FLOOD_GUARD_MAX)
            sanity_ok = not (folds_df[(folds_df["horizon"]==h)&(folds_df["model"]==b['model'])&(folds_df["window"]==b['window'])]["reasons"].str.contains("metric_swap_detected")).any()
            # Baselines precision comparison
            bd = base_df[base_df["horizon"]==h]
            best_beats_all = True
            if not bd.empty:
                for _, row in bd.iterrows():
                    if b['precisao_CAI'] < float(row['precision']) - 1e-12:
                        best_beats_all = False
                        break
            lines.append("Checks:")
            lines.append(f"{'✓' if piso_ok else '✗'} Piso de Precisão(CAI)")
            lines.append(f"{'✓' if cov_ok else '✗'} Cobertura mínima")
            lines.append(f"{'✓' if flood_ok else '✗'} Freio de inundação (VAL/Teste)")
            lines.append(f"{'✓' if sanity_ok else '✗'} Sanity de métricas")
            lines.append(f"{'✓' if best_beats_all else '!'} Baselines (precisão)")
            # Baselines resumo
            lines.append("\nBaselines — Precisão(CAI) / F1:")
            if bd.empty:
                lines.append("• –")
            else:
                # garantir ordem
                order = ["Sempre_NAO_CAI","ProporcaoTreino>0.5","SinalOntem","Momentum_3d"]
                for base in order:
                    row = bd[bd["baseline"]==base]
                    if not row.empty:
                        pr = float(row.iloc[0]["precision"]); f1 = float(row.iloc[0]["f1"])
                        lines.append(f"• {base}: {pr:.2f} / {f1:.2f}")
            # Decisão
            apto = bool(piso_ok and cov_ok and flood_ok and sanity_ok and best_beats_all)
            apto_map[h] = "SIM" if apto else "NÃO"
            if not apto:
                motivo = []
                if not piso_ok: motivo.append("piso")
                if not cov_ok: motivo.append("cobertura")
                if not flood_ok: motivo.append("inundacao")
                if not sanity_ok: motivo.append("sanity")
                if not best_beats_all: motivo.append("baselines")
                lines.append(f"\nApto a operar?  NÃO")
                lines.append(f"Motivo (se NÃO): {','.join(motivo)}\n")
            else:
                lines.append(f"\nApto a operar?  SIM\n")
        else:
            apto_map[h] = "NÃO"
            lines.append("Vencedor: —")
            lines.append("Precisão(CAI): —   Recall(CAI): —   F1(CAI): —   ACC: —")
            lines.append("Pred_CAI_rate (VAL/Teste): — / —")
            lines.append("Confusão (Teste): TP=—  FP=—  FN=—  TN=—\n")
            lines.append("Checks:")
            lines.append("✗ Piso de Precisão(CAI)")
            lines.append("✗ Cobertura mínima")
            lines.append("✗ Freio de inundação (VAL/Teste)")
            lines.append("✗ Sanity de métricas")
            lines.append("! Baselines (precisão)")
            lines.append("\nBaselines — Precisão(CAI) / F1:\n• –")
            lines.append("\nApto a operar?  NÃO\n")

    # Rodapé
    seq_str = ", ".join(final_seq) if all_operable and final_seq else "—"
    lines.append(f"Resumo: D+1={apto_map.get(1,'NÃO')} | D+3={apto_map.get(3,'NÃO')} | D+5={apto_map.get(5,'NÃO')}   |  Sequência final (último TESTE): {seq_str}")
    if repeat_msgs:
        lines.append("Alertas:")
        for m in repeat_msgs:
            lines.append(f"• {m}")
        if abort_xgb:
            lines.append("Perguntas: Atualizar xgboost para ≥1.7.6 e habilitar early stopping? [ ]Sim [ ]Não")
    else:
        lines.append("Alertas: —")
    lines.append("SSOT: /home/wrm/BOLSA_2026/gold/IBOV_gold.parquet | Folds: 10 | Seeds: {}/{}/{}".format(SEED_NUMPY, SEED_TF, SEED_PY))
    lines.append("Precision floors: D+1={:.2f}; D+3={:.2f}; D+5={:.2f} | N_min_preds_val=10".format(precision_floor['D+1'], precision_floor['D+3'], precision_floor['D+5']))

    print("\n" + "\n".join(lines))

    # Checklist final
    print("\nCHECKLIST")
    print(f"- XGBoost versão detectada: {XGB_VERSION}")
    print(f"- Branch XGB: {'sklearn.fit(early_stopping_rounds)' if XGB_GE_1_7_6 else 'xgb.train fallback'} | abort_xgb={abort_xgb}")
    print(f"- SSOT: {GOLD_PATH} (tier=GOLD)")
    print(f"- Horizontes: {horizons} | Janelas: {windows} | Folds: {len(splits)}")
    print(f"- precision_floor: D+1={precision_floor['D+1']:.2f}, D+3={precision_floor['D+3']:.2f}, D+5={precision_floor['D+5']:.2f}")
    print(f"- dry_run=True (nenhum arquivo salvo)")
    print(f"\n[{now_ts()}] Fim — CAI vs NÃO CAI (dry_run={dry_run})")

if __name__ == "__main__":
    main()

[2025-09-19 18:14:53] Início — CAI vs NÃO CAI — V1.2/V1.2.1 | Seeds: numpy=2025, tf=42, py=7

[PROVA SSOT]
- Caminho: /home/wrm/BOLSA_2026/gold/IBOV_gold.parquet (tier=GOLD)
- Linhas: 3400 | date_min: 2012-01-03 | date_max: 2025-09-19
- Colunas (amostra): date, open, high, low, close, volume, ticker, open_norm, high_norm, low_norm, close_norm, volume_norm, return_1d, volatility_5d, sma_5, sma_20, sma_ratio, y_h1, y_h3, y_h5, y_h1_cls, y_h3_cls, y_h5_cls, year, __date__ ...

[WALK-FORWARD — Folds]
Fold 01 | train[2012-01-03 → 2013-04-02] | val[2013-04-03 → 2013-07-02] | test[2013-07-03 → 2014-01-02]
Fold 02 | train[2012-01-03 → 2013-10-02] | val[2013-10-03 → 2014-01-02] | test[2014-01-03 → 2014-07-02]
Fold 03 | train[2012-01-03 → 2014-04-02] | val[2014-04-03 → 2014-07-02] | test[2014-07-03 → 2015-01-02]
Fold 04 | train[2012-01-03 → 2014-10-02] | val[2014-10-03 → 2015-01-02] | test[2015-01-03 → 2015-07-02]
Fold 05 | train[2012-01-03 → 2015-04-02] | val[2015-04-03 → 2015-07-02] | test[201

## CAI vs NÃO CAI — V1.2 + V1.2.1 + Delta XGBoost 3.0.5 (dry_run=True)

In [8]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
CAI vs NÃO CAI — V1.2 + V1.2.1 + Delta XGBoost 3.0.5 (dry_run=True)

Regras essenciais:
- SSOT: GOLD apenas (/home/wrm/BOLSA_2026/gold/IBOV_gold.parquet).
- Walk-forward: 10 folds (train ≥18m, val 3m, test 6m).
- Modelos: XGBoost (sklearn API forçada, early_stopping_rounds=50, eval_metric='aucpr', device cuda/cpu); LSTM compacto (CPU).
- Calibração (Platt; fallback Isotonic) fit em VAL e aplicada em VAL/TESTE.
- Pisos: D+1 fixo=0.82; D+3/D+5 = dinâmicos de VAL com N_min=10 e clip [0.70, 0.85].
- Seleção de threshold (VAL): max recall(CAI) sujeito a: precisão(CAI) ≥ piso, N_pred_CAI_val ≥ 10, pred_CAI_rate_val ≤ 0.50. Sem relax.
- Elegibilidade em TESTE (agregado): piso ok, cobertura mínima (≥10% ou ≥8), flood guard ≤50% em VAL e TESTE, sanity ok, vencedor supera baselines em Precisão(CAI).
- Abortadores: repetir o mesmo erro ≥3x → parar etapa e registrar repeat_error_stop:<etapa>.
- Checklist 1 página + Bloco de Diagnóstico (3 linhas) no topo.
- Primeira execução: dry_run=True; nada é salvo/persistido.
"""

import os
import sys
import math
import time
import json
import warnings
import subprocess
from typing import List, Dict, Tuple, Optional, Any

# =========================
# Seeds e ambiente
# =========================

SEED_NUMPY = 2025
SEED_TF = 42
SEED_PY = 7  # também usado como random_state do XGBoost

os.environ["PYTHONHASHSEED"] = str(SEED_PY)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"  # reduzir ruído TF

import random
random.seed(SEED_PY)

import numpy as np
np.random.seed(SEED_NUMPY)

import pandas as pd
warnings.filterwarnings("ignore", category=FutureWarning)
pd.set_option("display.width", 180)
pd.set_option("display.max_columns", 160)

# Imports principais
_missing = []
try:
    import xgboost as xgb
    from xgboost import XGBClassifier
    XGB_VERSION = getattr(xgb, "__version__", "0")
except Exception as e:
    _missing.append(f"xgboost ({e})")
    XGB_VERSION = "0"

try:
    import tensorflow as tf
    from tensorflow.keras.models import Sequential  # pyright: ignore
    from tensorflow.keras.layers import LSTM, Dense, Dropout, Input  # pyright: ignore
    from tensorflow.keras.callbacks import EarlyStopping  # pyright: ignore
    tf.random.set_seed(SEED_TF)
except Exception as e:
    _missing.append(f"tensorflow/keras ({e})")

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    confusion_matrix,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
)
from sklearn.linear_model import LogisticRegression
from sklearn.isotonic import IsotonicRegression

# =========================
# Parâmetros
# =========================

dry_run: bool = True

# SSOT GOLD apenas
GOLD_PATH = "/home/wrm/BOLSA_2026/gold/IBOV_gold.parquet"

windows: List[int] = [5, 10, 15]
horizons: List[int] = [1, 3, 5]

# Validação temporal
train_min_months: int = 18
val_months: int = 3
test_months: int = 6
max_folds: int = 10

# Priorização de CAI
precision_floor: Dict[str, Optional[float]] = {"D+1": 0.82, "D+3": None, "D+5": None}
coverage_min_rate: float = 0.10
coverage_min_count: int = 8
threshold_grid: List[float] = [i / 100.0 for i in range(10, 91)]  # 0.10 → 0.90
N_min_preds_val: int = 10
FLOOD_GUARD_MAX: float = 0.50  # pred_CAI_rate máximo em VAL/TESTE

# XGBoost — sklearn API forçada (Delta 3.0.5)
xgb_eval_metric = "aucpr"  # pode trocar para "logloss" se desejar estabilidade extra
xgb_early_stopping_rounds: int = 50
xgb_n_estimators: int = 2000

# LSTM
lstm_units: int = 48
lstm_dropout: float = 0.2
lstm_epochs: int = 50
lstm_batch_size: int = 32
lstm_patience: int = 5

# =========================
# Utilidades
# =========================

def now_ts() -> str:
    return time.strftime("%Y-%m-%d %H:%M:%S")

def summarize_df(df: pd.DataFrame) -> Dict[str, str]:
    rows, cols = df.shape
    dmin = pd.to_datetime(df["__date__"]).min(); dmax = pd.to_datetime(df["__date__"]).max()
    return dict(
        row_count=str(rows),
        date_min=str(getattr(dmin, 'date', lambda: '-')()) if pd.notnull(dmin) else "-",
        date_max=str(getattr(dmax, 'date', lambda: '-')()) if pd.notnull(dmax) else "-",
        columns=", ".join(list(df.columns)[:25]) + (" ..." if df.shape[1] > 25 else "")
    )

def ensure_datetime(df: pd.DataFrame) -> pd.DataFrame:
    # tenta usar coluna 'date' ou índice datetime
    if "date" in df.columns:
        out = df.copy()
        out["date"] = pd.to_datetime(out["date"], errors="coerce")
        out = out.dropna(subset=["date"]).sort_values("date")
        out["__date__"] = out["date"].values
        return out
    if isinstance(df.index, pd.DatetimeIndex):
        out = df.sort_index().copy()
        out["__date__"] = out.index
        return out
    raise ValueError("VALIDATION_ERROR: coluna 'date' ausente e índice não é DatetimeIndex.")

def month_add(d: pd.Timestamp, months: int) -> pd.Timestamp:
    return d + pd.DateOffset(months=months)

def build_walk_forward_splits(df: pd.DataFrame) -> List[Dict[str, pd.Timestamp]]:
    dates = pd.to_datetime(df["__date__"])  # type: ignore
    start = dates.min().normalize(); end = dates.max().normalize()
    train_end = month_add(start, train_min_months) - pd.DateOffset(days=1)
    folds = []
    test_start = train_end + pd.DateOffset(days=1)
    test_end = month_add(test_start, test_months) - pd.DateOffset(days=1)
    while test_start <= end and len(folds) < max_folds:
        if test_end > end: test_end = end
        val_end = test_start - pd.DateOffset(days=1)
        val_start = month_add(val_end, -val_months) + pd.DateOffset(days=1)
        tr_start = start; tr_end = val_start - pd.DateOffset(days=1)
        if tr_start >= tr_end or val_start > val_end or test_start > test_end:
            break
        folds.append(dict(
            train_start=tr_start, train_end=tr_end,
            val_start=val_start, val_end=val_end,
            test_start=test_start, test_end=test_end,
        ))
        test_start = test_end + pd.DateOffset(days=1)
        test_end = month_add(test_start, test_months) - pd.DateOffset(days=1)
    if not folds:
        raise ValueError("VALIDATION_ERROR: não foi possível construir folds walk-forward.")
    return folds

def subset(df: pd.DataFrame, a: pd.Timestamp, b: pd.Timestamp) -> pd.DataFrame:
    return df.loc[(df["__date__"] >= a) & (df["__date__"] <= b)].copy()

def detect_xgb_device() -> Tuple[str, str]:
    """
    Retorna ('cuda','<GPU>') quando houver CUDA detectável; caso contrário ('cpu','-').
    Usa nvidia-smi se disponível; caso contrário assume cpu.
    """
    try:
        out = subprocess.check_output(
            ["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"],
            stderr=subprocess.DEVNULL, stdin=subprocess.DEVNULL, timeout=2
        )
        names = out.decode("utf-8", errors="ignore").strip().splitlines()
        if names and len(names[0].strip()) > 0:
            return "cuda", names[0].strip()
    except Exception:
        pass
    return "cpu", "-"

# =========================
# Features
# =========================

def compute_log_ret(close: pd.Series) -> pd.Series:
    return np.log(close / close.shift(1))

def prepare_features(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()
    # Reqs: close, high, low
    for c in ("close", "high", "low"):
        if c not in out.columns:
            raise ValueError(f"VALIDATION_ERROR: coluna obrigatória ausente: {c}")
    out["ret1"] = compute_log_ret(out["close"])
    # lags até 10
    for lag in range(1, 11):
        out[f"ret1_lag_{lag}"] = out["ret1"].shift(lag)
    # rolling mean/std para 5/10/15 e z-scores de ret1
    for W in (5, 10, 15):
        out[f"ret_roll_mean_{W}"] = out["ret1"].rolling(W).mean()
        out[f"ret_roll_std_{W}"] = out["ret1"].rolling(W).std()
        out[f"zscore_ret_{W}"] = (out["ret1"] - out[f"ret_roll_mean_{W}"]) / out[f"ret_roll_std_{W}"]
    # vol20d para farol
    out["vol20d"] = out["ret1"].rolling(20).std()
    # MA50 e pos_ma50
    out["ma50"] = out["close"].rolling(50).mean()
    out["pos_ma50"] = ((out["close"] > out["ma50"]).astype(float)).where(out["ma50"].notna(), np.nan)
    # range compression (true range normalizado) 5/10
    for W in (5, 10):
        hi = out["high"].rolling(W).max()
        lo = out["low"].rolling(W).min()
        mid = out["close"].rolling(W).mean()
        out[f"tr_norm_{W}"] = (hi - lo) / (mid.replace(0, np.nan))
    # forward returns
    for h in horizons:
        out[f"ret_fwd_{h}"] = (out["close"].shift(-h) / out["close"]) - 1.0
        out[f"y_h{h}_bin"] = (pd.to_numeric(out[f"ret_fwd_{h}"], errors="coerce") < 0).astype("Int8")
    return out

# =========================
# Modelos e Calibração
# =========================

def build_lstm_model(n_features: int, W: int) -> "Sequential":
    model = Sequential()
    model.add(Input(shape=(W, n_features)))
    model.add(LSTM(lstm_units))
    model.add(Dropout(lstm_dropout))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model

def make_calibrator(y_val: np.ndarray, p_val: np.ndarray):
    yv = np.asarray(y_val).astype(int)
    pv = np.asarray(p_val).astype(float).reshape(-1, 1)
    if len(np.unique(yv)) < 2 or len(yv) < 5:
        return (lambda x: np.asarray(x, dtype=float)), "none"
    # Platt
    try:
        lr = LogisticRegression(max_iter=1000, solver="lbfgs")
        lr.fit(pv, yv)
        def f(x):
            xv = np.asarray(x).astype(float).reshape(-1, 1)
            return lr.predict_proba(xv)[:, 1]
        return f, "platt"
    except Exception:
        pass
    # Isotonic fallback
    try:
        iso = IsotonicRegression(out_of_bounds="clip")
        iso.fit(np.asarray(p_val).astype(float), yv)
        def g(x):
            return iso.predict(np.asarray(x).astype(float))
        return g, "isotonic"
    except Exception:
        return (lambda x: np.asarray(x, dtype=float)), "none"

# =========================
# Métricas, sanity e seleção de threshold
# =========================

def cm_metrics(cm: np.ndarray) -> Tuple[float, float, float, float]:
    TP = float(cm[0,0]); FP = float(cm[0,1]); FN = float(cm[1,0]); TN = float(cm[1,1])
    prec = TP / max(1.0, (TP + FP))
    rec = TP / max(1.0, (TP + FN))
    acc = (TP + TN) / max(1.0, (TP + FP + FN + TN))
    f1 = (2*prec*rec) / max(1e-12, (prec + rec)) if (prec + rec) > 0 else 0.0
    return prec, rec, f1, acc

def binary_eval(y_true: np.ndarray, y_score: np.ndarray, thr: float) -> Dict[str, Any]:
    y_pred = (y_score >= thr).astype(int)
    cm = confusion_matrix(y_true, y_pred, labels=[1,0])
    prec_lib = float(precision_score(y_true, y_pred, zero_division=0))
    rec_lib = float(recall_score(y_true, y_pred, zero_division=0))
    f1_lib = float(f1_score(y_true, y_pred, zero_division=0))
    acc_lib = float(accuracy_score(y_true, y_pred))
    prec_cm, rec_cm, f1_cm, acc_cm = cm_metrics(cm)
    sanity_ok = (abs(prec_lib - prec_cm) < 1e-6) and (abs(rec_lib - rec_cm) < 1e-6)
    cover_rate = float((y_pred == 1).mean()) if len(y_pred) else 0.0
    cover_count = int((y_pred == 1).sum()) if len(y_pred) else 0
    return dict(
        cm=cm, precision=prec_lib, recall=rec_lib, f1=f1_lib, acc=acc_lib,
        precision_cm=prec_cm, recall_cm=rec_cm, f1_cm=f1_cm, acc_cm=acc_cm,
        sanity_ok=sanity_ok, coverage_rate=cover_rate, coverage_count=cover_count
    )

def select_threshold_val(yv: np.ndarray, pv: np.ndarray, floor: float) -> Tuple[Optional[float], Dict[str, Any]]:
    best = None
    best_metrics = None
    for thr in threshold_grid:
        m = binary_eval(yv, pv, thr)
        if (m["precision"] >= floor and m["coverage_count"] >= N_min_preds_val and m["coverage_rate"] <= FLOOD_GUARD_MAX):
            if best is None:
                best, best_metrics = thr, m
            else:
                # Priorizar recall, desempate F1, depois ACC
                cur = best_metrics
                if (m["recall"] > cur["recall"]) or \
                   (m["recall"] == cur["recall"] and m["f1"] > cur["f1"]) or \
                   (m["recall"] == cur["recall"] and m["f1"] == cur["f1"] and m["acc"] > cur["acc"]):
                    best, best_metrics = thr, m
    return best, (best_metrics or {})

# =========================
# Execução principal
# =========================

def main():
    print(f"[{now_ts()}] Início — CAI vs NÃO CAI — V1.2/V1.2.1 (Delta XGBoost 3.0.5) | Seeds: numpy={SEED_NUMPY}, tf={SEED_TF}, xgb={SEED_PY}")

    if _missing:
        print(f"CHECKLIST_FAILURE: dependências ausentes -> {', '.join(_missing)}")
        return

    # TF: silenciar GPU e forçar CPU apenas para LSTM (sem afetar XGBoost)
    try:
        tf.config.set_visible_devices([], 'GPU')
        TF_DEV_NOTE = "TF device=CPU (fallback)"
    except Exception:
        TF_DEV_NOTE = "TF device=CPU (noop)"

    # XGBoost device detection
    xgb_device, xgb_gpu_name = detect_xgb_device()
    if xgb_device != "cuda":
        xgb_device = "cpu"
        xgb_gpu_name = "-"

    # SSOT GOLD
    if not os.path.exists(GOLD_PATH):
        print("CHECKLIST_FAILURE: GOLD ausente no SSOT.")
        return
    try:
        df_raw = pd.read_parquet(GOLD_PATH)
    except Exception as e:
        print(f"VALIDATION_ERROR: falha ao ler GOLD: {e}")
        return
    df = ensure_datetime(df_raw)
    df = prepare_features(df)

    # PROVA SSOT
    proof = summarize_df(df)
    print("\n[PROVA SSOT]")
    print(f"- Caminho: {GOLD_PATH} (tier=GOLD)")
    print(f"- Linhas: {proof['row_count']} | date_min: {proof['date_min']} | date_max: {proof['date_max']}")
    print(f"- Colunas (amostra): {proof['columns']}")

    # Splits
    splits = build_walk_forward_splits(df)
    print("\n[WALK-FORWARD — Folds]")
    for i, s in enumerate(splits, 1):
        print(f"Fold {i:02d} | train[{str(s['train_start'].date())} → {str(s['train_end'].date())}] | val[{str(s['val_start'].date())} → {str(s['val_end'].date())}] | test[{str(s['test_start'].date())} → {str(s['test_end'].date())}]")

    # Estruturas
    preds_val: Dict[Tuple[str,int,int,int], Tuple[np.ndarray, np.ndarray, Dict[str, Any]]] = {}
    preds_tst: Dict[Tuple[str,int,int,int], Tuple[np.ndarray, np.ndarray, np.ndarray]] = {}
    per_fold_info: Dict[Tuple[str,int,int,int], Dict[str, Any]] = {}
    error_counts: Dict[str, int] = {}
    abort_xgb: bool = False
    abort_lstm: bool = False
    xgb_fit_kw_error: bool = False  # flag para pergunta final de múltiplos XGB

    # Loop h, W, fold
    for h in horizons:
        ycol = f"y_h{h}_bin"
        for W in windows:
            # lista de features base para XGB
            feat_cols = [
                *[f"ret1_lag_{lag}" for lag in range(1, 11)],
                *[f"ret_roll_mean_{k}" for k in (5,10,15)],
                *[f"ret_roll_std_{k}" for k in (5,10,15)],
                *[f"zscore_ret_{k}" for k in (5,10,15)],
            ]
            # faróis e compressores
            for fi, s in enumerate(splits, 1):
                tr = subset(df, s["train_start"], s["train_end"])  # inclui __date__
                va = subset(df, s["val_start"], s["val_end"])
                te = subset(df, s["test_start"], s["test_end"])
                # construir faróis a partir do treino
                vol_med = float(tr["vol20d"].median()) if len(tr) else np.nan
                tr["vol20d_high"] = (tr["vol20d"] >= vol_med).astype(int)
                va["vol20d_high"] = (va["vol20d"] >= vol_med).astype(int)
                te["vol20d_high"] = (te["vol20d"] >= vol_med).astype(int)
                # range compression por mediana do treino
                for k in (5, 10):
                    med = float(tr[f"tr_norm_{k}"].median()) if len(tr) else np.nan
                    tr[f"range_compression_{k}"] = (tr[f"tr_norm_{k}"] <= med).astype(int)
                    va[f"range_compression_{k}"] = (va[f"tr_norm_{k}"] <= med).astype(int)
                    te[f"range_compression_{k}"] = (te[f"tr_norm_{k}"] <= med).astype(int)
                feat_all = feat_cols + ["vol20d_high","pos_ma50","range_compression_5","range_compression_10"]
                # drop NaNs (janelas)
                trc = tr[["__date__", ycol] + feat_all].dropna().copy()
                vac = va[["__date__", ycol] + feat_all].dropna().copy()
                tec = te[["__date__", ycol] + feat_all].dropna().copy()
                if trc.empty or trc[ycol].isna().all():
                    msg = f"train_empty:h={h},W={W},fold={fi}"
                    error_counts[msg] = error_counts.get(msg, 0) + 1
                    continue
                # padronização fit no treino
                scaler = StandardScaler().fit(trc[feat_all].values)
                Xtr = scaler.transform(trc[feat_all].values); ytr = trc[ycol].astype(int).values
                Xva = scaler.transform(vac[feat_all].values) if len(vac) else np.empty((0,len(feat_all)))
                yva = vac[ycol].astype(int).values if len(vac) else np.empty((0,), dtype=int)
                Xte = scaler.transform(tec[feat_all].values) if len(tec) else np.empty((0,len(feat_all)))
                yte = tec[ycol].astype(int).values if len(tec) else np.empty((0,), dtype=int)
                dates_te = tec["__date__"].values.astype("datetime64[ns]") if len(tec) else np.array([], dtype="datetime64[ns]")

                # scale_pos_weight
                pos = max(1, int((ytr == 1).sum()))
                neg = int((ytr == 0).sum())
                spw = float(neg / pos) if (pos + neg) > 0 else 1.0

                # ===== XGBoost (sklearn API forçada; Delta 3.0.5) =====
                if not abort_xgb:
                    try:
                        xgb_params = dict(
                            max_depth=5,
                            learning_rate=0.05,
                            n_estimators=xgb_n_estimators,
                            subsample=0.9,
                            colsample_bytree=0.9,
                            objective="binary:logistic",
                            eval_metric=xgb_eval_metric,
                            tree_method="hist",
                            device=xgb_device,
                            random_state=SEED_PY,
                            n_jobs=max(1, (os.cpu_count() or 2) - 1),
                        )
                        clf = XGBClassifier(**xgb_params, scale_pos_weight=spw)
                        # early_stopping_rounds no fit; NUNCA usar callbacks
                        clf.fit(
                            Xtr, ytr,
                            eval_set=[(Xtr, ytr), (Xva, yva)],
                            early_stopping_rounds=xgb_early_stopping_rounds,
                            verbose=False,
                        )
                        best_iter = getattr(clf, "best_iteration", None)
                        if best_iter is not None:
                            p_val_raw = clf.predict_proba(Xva, iteration_range=(0, int(best_iter)+1))[:,1] if len(Xva) else np.array([])
                            p_tst_raw = clf.predict_proba(Xte, iteration_range=(0, int(best_iter)+1))[:,1] if len(Xte) else np.array([])
                        else:
                            p_val_raw = clf.predict_proba(Xva)[:,1] if len(Xva) else np.array([])
                            p_tst_raw = clf.predict_proba(Xte)[:,1] if len(Xte) else np.array([])
                        # calibração em VAL
                        try:
                            cal_fn, cal_m = make_calibrator(yva, p_val_raw)
                        except Exception as e:
                            msg = f"CALIB_ERR:{type(e).__name__}:{str(e).strip()}"
                            error_counts[msg] = error_counts.get(msg, 0) + 1
                            if error_counts[msg] >= 3:
                                error_counts["repeat_error_stop:calibration"] = error_counts.get("repeat_error_stop:calibration", 0) + 1
                            cal_fn, cal_m = (lambda x: np.asarray(x, dtype=float)), "none"
                        p_val = cal_fn(p_val_raw)
                        p_tst = cal_fn(p_tst_raw)
                        preds_val[("XGB", W, h, fi)] = (yva, np.asarray(p_val, dtype=float), {"calibration": cal_m, "spw": spw, "device": xgb_device})
                        preds_tst[("XGB", W, h, fi)] = (yte, np.asarray(p_tst, dtype=float), dates_te)
                        per_fold_info[("XGB", W, h, fi)] = {"scaler": "standard", "best_iteration": (int(best_iter) if best_iter is not None else None)}
                    except TypeError as e:
                        msg_txt = str(e).lower()
                        # erros esperados de ambiente conflitado
                        if ("early_stopping_rounds" in msg_txt and "unexpected" in msg_txt) or ("callbacks" in msg_txt and "unexpected" in msg_txt):
                            error_counts["xgb_sklearn_fit"] = error_counts.get("xgb_sklearn_fit", 0) + 1
                            xgb_fit_kw_error = True
                            if error_counts["xgb_sklearn_fit"] >= 3:
                                error_counts["repeat_error_stop:xgb_sklearn_fit"] = 3
                                abort_xgb = True
                        else:
                            msg = f"XGB_FIT_TYPE_ERR:{str(e).strip()}"
                            error_counts[msg] = error_counts.get(msg, 0) + 1
                            if error_counts[msg] >= 3:
                                abort_xgb = True
                    except Exception as e:
                        msg = f"XGB_ERR:{type(e).__name__}:{str(e).strip()}"
                        error_counts[msg] = error_counts.get(msg, 0) + 1
                        if error_counts[msg] >= 3:
                            error_counts["repeat_error_stop:xgb_train"] = 3
                            abort_xgb = True

                # ===== LSTM (CPU, silencioso) =====
                if not abort_lstm:
                    try:
                        # montar painel para LSTM (ret1 + roll mean/std W)
                        lstm_df = df[["__date__","ret1", f"ret_roll_mean_{W}", f"ret_roll_std_{W}", ycol]].dropna().copy()
                        trl = subset(lstm_df, s["train_start"], s["train_end"])
                        val = subset(lstm_df, s["val_start"], s["val_end"])
                        tes = subset(lstm_df, s["test_start"], s["test_end"])
                        if len(trl) >= (W + 5) and len(val) >= (W + 5) and len(tes) >= (W + 5):
                            feat_l = ["ret1", f"ret_roll_mean_{W}", f"ret_roll_std_{W}"]
                            sc = StandardScaler().fit(trl[feat_l].values)
                            def to_seq(b: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
                                b2 = b.copy(); b2[feat_l] = sc.transform(b2[feat_l].values)
                                X, y = [], []
                                V = b2[feat_l].values; yv2 = b2[ycol].astype(int).values
                                for i in range(W, len(b2)):
                                    X.append(V[i-W:i,:]); y.append(yv2[i])
                                dates = b2["__date__"].values[W:].astype("datetime64[ns]")
                                return (np.stack(X,0) if X else np.empty((0,W,len(feat_l)))), (np.array(y, int) if y else np.empty((0,), int)), dates
                            Xtr, ytr_l, _ = to_seq(trl)
                            Xva, yva_l, _ = to_seq(val)
                            Xte, yte_l, dte_l = to_seq(tes)
                            if Xtr.shape[0] and Xva.shape[0] and Xte.shape[0]:
                                tf.keras.backend.clear_session()
                                model = build_lstm_model(n_features=len(feat_l), W=W)
                                es = EarlyStopping(monitor="val_loss", mode="min", patience=lstm_patience, restore_best_weights=True, verbose=0)
                                model.fit(Xtr, ytr_l, validation_data=(Xva, yva_l), epochs=lstm_epochs, batch_size=lstm_batch_size, callbacks=[es], verbose=0)
                                p_val_raw = model.predict(Xva, verbose=0, batch_size=lstm_batch_size).reshape(-1)
                                p_tst_raw = model.predict(Xte, verbose=0, batch_size=lstm_batch_size).reshape(-1)
                                try:
                                    cal_fn, cal_m = make_calibrator(yva_l, p_val_raw)
                                except Exception as e:
                                    msg = f"CALIB_ERR:{type(e).__name__}:{str(e).strip()}"
                                    error_counts[msg] = error_counts.get(msg, 0) + 1
                                    if error_counts[msg] >= 3:
                                        error_counts["repeat_error_stop:calibration"] = error_counts.get("repeat_error_stop:calibration", 0) + 1
                                    cal_fn, cal_m = (lambda x: np.asarray(x, dtype=float)), "none"
                                preds_val[("LSTM", W, h, fi)] = (yva_l, np.asarray(cal_fn(p_val_raw), dtype=float), {"calibration": cal_m})
                                preds_tst[("LSTM", W, h, fi)] = (yte_l, np.asarray(cal_fn(p_tst_raw), dtype=float), dte_l)
                                per_fold_info[("LSTM", W, h, fi)] = {"scaler": "standard"}
                    except Exception as e:
                        msg = f"LSTM_ERR:{type(e).__name__}:{str(e).strip()}"
                        error_counts[msg] = error_counts.get(msg, 0) + 1
                        if error_counts[msg] >= 3:
                            error_counts["repeat_error_stop:lstm"] = 3
                            abort_lstm = True

    # Relatar avisos de repetição e perguntas
    repeat_msgs = [f"{k} (x{v})" for k,v in error_counts.items() if v >= 3 and k.startswith("repeat_error_stop")]
    if repeat_msgs:
        print("\n[ERROS REPETIDOS — intervenção requerida]")
        for m in repeat_msgs:
            print(f"- {m}")

    # Pisos D+3/D+5 via VAL com N_min e clip
    for h in [3, 5]:
        best_prec = 0.0
        for key, (yv, pv, meta) in preds_val.items():
            _, _, hh, _ = key
            if hh != h or len(yv) == 0:
                continue
            # varrer grade e pegar melhor precisão respeitando N_min
            for thr in threshold_grid:
                m = binary_eval(yv, pv, thr)
                if m["coverage_count"] >= N_min_preds_val:
                    if m["precision"] > best_prec:
                        best_prec = m["precision"]
        tag = f"D+{h}"
        if precision_floor.get(tag) is None:
            pf = round(best_prec, 2)
            precision_floor[tag] = float(np.clip(pf, 0.70, 0.85))
    print("\n[PISOS DE PRECISÃO — usados]")
    print(f"- D+1: {precision_floor['D+1']:.2f} (fixo)")
    print(f"- D+3: {precision_floor['D+3']:.2f}")
    print(f"- D+5: {precision_floor['D+5']:.2f}")
    print(f"- N_min_preds_val: {N_min_preds_val}; flood_guard ≤ {int(FLOOD_GUARD_MAX*100)}%")

    # Seleção de thresholds por fold (VAL) e avaliação no TESTE
    fold_rows: List[Dict[str, Any]] = []
    for key in sorted(preds_val.keys()):
        model, W, h, fi = key
        yv, pv, meta = preds_val[key]
        yt, pt, dt = preds_tst.get(key, (np.array([]), np.array([]), np.array([])))
        if len(yv) == 0 or len(yt) == 0:
            continue
        floor = precision_floor[f"D+{h}"] or 0.0
        thr, mval = select_threshold_val(yv, pv, floor)
        reasons = []
        if thr is None:
            reasons.append("piso/Nmin/flood_val")
            # Inelegível — sem relax; ainda calculamos métricas de teste com um thr neutro para diagnóstico
            thr_test = 0.5
            mtest = binary_eval(yt, pt, thr_test)
            sanity_ok = mtest.get("sanity_ok", True)
            if not sanity_ok:
                reasons.append("metric_swap_detected")
            cov_ok = (mtest["coverage_rate"] >= coverage_min_rate) or (mtest["coverage_count"] >= coverage_min_count)
            flood_ok = False  # sem threshold válido em VAL → considerar falha de flood guard operacional
            piso_ok = (mtest["precision"] >= floor)
            fold_eligible = False
            fold_rows.append(dict(
                model=model, window=W, horizon=h, fold=fi, thr=float(thr_test),
                prec_val=float("nan"), rec_val=float("nan"), rate_val=float("nan"),
                prec_test=mtest["precision"], rec_test=mtest["recall"], f1_test=mtest["f1"], acc_test=mtest["acc"],
                rate_test=mtest["coverage_rate"], n_pred_test=mtest["coverage_count"],
                cm_TP=int(mtest["cm"][0,0]), cm_FP=int(mtest["cm"][0,1]), cm_FN=int(mtest["cm"][1,0]), cm_TN=int(mtest["cm"][1,1]),
                fold_eligible=fold_eligible, reasons=",".join(reasons),
            ))
            continue

        mtest = binary_eval(yt, pt, thr)
        # sanity assert (VAL e TESTE)
        sanity_ok = mval.get("sanity_ok", True) and mtest.get("sanity_ok", True)
        if not sanity_ok:
            reasons.append("metric_swap_detected")
        # cobertura mínima & flood guard em TESTE e VAL
        cov_ok = (mtest["coverage_rate"] >= coverage_min_rate) or (mtest["coverage_count"] >= coverage_min_count)
        flood_ok = (mval.get("coverage_rate", 0.0) <= FLOOD_GUARD_MAX) and (mtest["coverage_rate"] <= FLOOD_GUARD_MAX)
        piso_ok = (mtest["precision"] >= floor)
        fold_eligible = bool(piso_ok and cov_ok and flood_ok and sanity_ok)
        fold_rows.append(dict(
            model=model, window=W, horizon=h, fold=fi, thr=float(thr),
            prec_val=mval.get("precision", np.nan), rec_val=mval.get("recall", np.nan), rate_val=mval.get("coverage_rate", np.nan),
            prec_test=mtest["precision"], rec_test=mtest["recall"], f1_test=mtest["f1"], acc_test=mtest["acc"],
            rate_test=mtest["coverage_rate"], n_pred_test=mtest["coverage_count"],
            cm_TP=int(mtest["cm"][0,0]), cm_FP=int(mtest["cm"][0,1]), cm_FN=int(mtest["cm"][1,0]), cm_TN=int(mtest["cm"][1,1]),
            fold_eligible=fold_eligible, reasons=",".join(reasons),
        ))

    if not fold_rows:
        print("CHECKLIST_FAILURE: nenhuma combinação produziu resultados.")
        return

    folds_df = pd.DataFrame(fold_rows)

    # Agregar por combinação/horizonte somando CMs e recomputando métricas
    agg_rows: List[Dict[str, Any]] = []
    thr_medians: Dict[Tuple[str,int,int], float] = {}
    for (h, m, W), grp in folds_df.groupby(["horizon","model","window"], as_index=False):
        TP = int(grp["cm_TP"].sum()); FP = int(grp["cm_FP"].sum()); FN = int(grp["cm_FN"].sum()); TN = int(grp["cm_TN"].sum())
        prec, rec, f1, acc = cm_metrics(np.array([[TP, FP],[FN, TN]], dtype=float))
        n_pred = int(grp["n_pred_test"].sum()); n_total = int((TP+FP+FN+TN))
        rate = float(n_pred / max(1, n_total)) if n_total > 0 else 0.0
        floor = precision_floor[f"D+{h}"] or 0.0
        # mediana de thresholds apenas entre folds elegíveis
        elig_thr = grp.loc[grp["fold_eligible"]==True, "thr"].values
        thr_med = float(np.median(elig_thr)) if len(elig_thr) else float("nan")
        thr_medians[(m, W, h)] = thr_med
        # checagens
        cov_ok = (rate >= coverage_min_rate) or (n_pred >= coverage_min_count)
        # média de VAL flood guard entre folds do par (modelo, W, h)
        rate_val_mean = float(grp["rate_val"].mean()) if "rate_val" in grp.columns and len(grp) else float("nan")
        flood_ok = (rate_val_mean <= FLOOD_GUARD_MAX if np.isfinite(rate_val_mean) else False) and (rate <= FLOOD_GUARD_MAX)
        sanity_ok = not (grp["reasons"].str.contains("metric_swap_detected").fillna(False)).any()
        piso_ok = (prec >= floor)
        eligible = bool(piso_ok and cov_ok and flood_ok and sanity_ok)
        reason_parts = []
        if not piso_ok: reason_parts.append("piso")
        if not cov_ok: reason_parts.append("cobertura")
        if not flood_ok: reason_parts.append("inundacao")
        if not sanity_ok: reason_parts.append("sanity")
        agg_rows.append(dict(horizon=h, model=m, window=W, TP=TP, FP=FP, FN=FN, TN=TN,
                             precisao_CAI=prec, recall_CAI=rec, F1_CAI=f1, acc=acc,
                             pred_CAI_rate=rate, num_pred_CAI=n_pred,
                             eligible=eligible, reason=",".join(reason_parts), folds=int(grp["fold"].nunique()),
                             threshold_median=thr_med, rate_val_mean=rate_val_mean))
    agg_df = pd.DataFrame(agg_rows).sort_values(["horizon","model","window"]) if agg_rows else pd.DataFrame()

    # Top-3 e melhores por horizonte
    best_by_h: Dict[int, Dict[str, Any]] = {}
    for h in horizons:
        sub = agg_df[agg_df["horizon"]==h].copy()
        elig = sub[sub["eligible"] == True].copy()
        print(f"\nRESUMO — D+{h} (TESTE agregado) — modelo × janela")
        if sub.empty:
            print("–")
        else:
            print(sub[["model","window","precisao_CAI","recall_CAI","F1_CAI","acc","pred_CAI_rate","eligible","reason","folds","threshold_median"]].to_string(index=False))
        print(f"\nTOP-3 — D+{h} (TESTE)")
        if elig.empty:
            print("–")
        else:
            elig_sorted = elig.sort_values(["recall_CAI","F1_CAI","acc"], ascending=[False,False,False])
            top3 = elig_sorted.head(3).reset_index(drop=True)
            print(top3[["model","window","precisao_CAI","recall_CAI","F1_CAI","acc","pred_CAI_rate","folds","threshold_median"]].to_string(index=False))
            best = elig_sorted.iloc[0]
            best_by_h[h] = best.to_dict()
            # Matriz de confusão agregada do melhor
            print(f"\nMATRIZ DE CONFUSÃO — melhor combinação D+{h} (modelo={best['model']}, janela={int(best['window'])}) [CAI=1, N_CAI=0]")
            header = ["", "pred_CAI", "pred_NAO_CAI"]
            print("{:<14s}{:>10s}{:>14s}".format(*header))
            print("{:<14s}{:>10d}{:>14d}".format("true_CAI", int(best.get("TP",0)), int(best.get("FP",0))))
            print("{:<14s}{:>10d}{:>14d}".format("true_NAO_CAI", int(best.get("FN",0)), int(best.get("TN",0))))

    # Threshold operacional (medianas)
    threshold_operacional: Dict[int, float] = {}
    print("\nTHRESHOLD OPERACIONAL (mediana entre folds elegíveis)")
    for h in horizons:
        val = float(best_by_h[h]["threshold_median"]) if h in best_by_h and np.isfinite(best_by_h[h]["threshold_median"]) else float("nan")
        threshold_operacional[h] = val
        print(f"- D+{h}: {val if np.isfinite(val) else '–'}")

    # Sequência final — somente se houver elegíveis
    final_seq = []
    all_operable = all(h in best_by_h for h in horizons)
    if all_operable:
        try:
            last_fold = max(int(k[3]) for k in preds_tst.keys()) if preds_tst else None
        except Exception:
            last_fold = None
        if last_fold is not None:
            for h in horizons:
                b = best_by_h[h]
                m, W = str(b["model"]), int(b["window"])
                thr_med = float(b.get("threshold_median", float("nan")))
                yt, pt, dt = preds_tst.get((m, W, h, last_fold), (np.array([]), np.array([]), np.array([])))
                if len(pt) == 0 or not np.isfinite(thr_med):
                    final_seq.append("—")
                else:
                    yhat = (pt >= thr_med).astype(int)
                    final_seq.append("CAI" if int(yhat[-1]) == 1 else "NÃO CAI")
        else:
            all_operable = False
    print("\nSEQUÊNCIA FINAL (último bloco de TESTE):")
    print(f"- (D+1, D+3, D+5) = {', '.join(final_seq) if all_operable and final_seq else '—'}")

    # Baselines — TESTE (médias por horizonte)
    baseline_rows = []
    for h in horizons:
        seen = set()
        for key, (yt, pt, dt) in preds_tst.items():
            m, W, hh, fi = key
            if hh != h or fi in seen or len(yt) == 0:
                continue
            seen.add(fi)
            n = len(yt)
            # Sempre NÃO CAI
            pred0 = np.zeros(n, dtype=int)
            cm0 = confusion_matrix(yt, pred0, labels=[1,0])
            p0, r0, f10, a0 = cm_metrics(cm0)
            baseline_rows.append(dict(horizon=h, baseline="Sempre_NAO_CAI", precision=p0, f1=f10))
            # ProporcaoTreino>0.5 via VAL proxy
            yv = None
            for k2, (yvv, pvv, meta) in preds_val.items():
                mm, WW, hhh, fii = k2
                if hhh == h and fii == fi and len(yvv) > 0:
                    yv = yvv
                    break
            if yv is not None:
                pred1 = np.ones(n, dtype=int) if float((yv == 1).mean()) > 0.5 else np.zeros(n, dtype=int)
                cm1 = confusion_matrix(yt, pred1, labels=[1,0])
                p1, r1, f11, a1 = cm_metrics(cm1)
                baseline_rows.append(dict(horizon=h, baseline="ProporcaoTreino>0.5", precision=p1, f1=f11))
            # Sinal de ontem (ret1[t-1] < 0 -> CAI)
            ret1_map = pd.Series(df.set_index("__date__")["ret1"])
            pred2 = []
            for d in dt:
                prev = pd.to_datetime(d) - pd.Timedelta(days=1)
                v = ret1_map.get(prev, np.nan)
                pred2.append(1 if (pd.notna(v) and v < 0) else 0)
            pred2 = np.array(pred2, int)
            cm2 = confusion_matrix(yt, pred2, labels=[1,0])
            p2, r2, f12, a2 = cm_metrics(cm2)
            baseline_rows.append(dict(horizon=h, baseline="SinalOntem", precision=p2, f1=f12))
            # Momentum_3d (soma log ret últimos 3 < 0)
            mom3 = df["ret1"].rolling(3).sum().shift(1)
            mom_map = pd.Series(mom3.values, index=df["__date__"])
            pred3 = [1 if (pd.notna(mom_map.get(pd.to_datetime(d), np.nan)) and mom_map.get(pd.to_datetime(d)) < 0) else 0 for d in dt]
            pred3 = np.array(pred3, int)
            cm3 = confusion_matrix(yt, pred3, labels=[1,0])
            p3, r3, f13, a3 = cm_metrics(cm3)
            baseline_rows.append(dict(horizon=h, baseline="Momentum_3d", precision=p3, f1=f13))
    base_df = pd.DataFrame(baseline_rows)

    # Painel V1.2.1 — Uma página
    print("\n[Painel — Checklist Operacional (V1.2.1)]")
    lines = []

    # Bloco de Diagnóstico (3 linhas; antes de HORIZONTE D+1)
    py_ver = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
    device_line = f"cuda({xgb_gpu_name})" if xgb_device == "cuda" and xgb_gpu_name != "-" else "cpu"
    lines.append(f"Python: {py_ver} | xgboost: {XGB_VERSION} | device: {device_line}")
    lines.append(f"SSOT: {GOLD_PATH} | Folds: {max_folds} | Dry-run: {dry_run}")
    lines.append(f"Seeds: np={SEED_NUMPY} | tf={SEED_TF} | xgb={SEED_PY}")

    apto_map: Dict[int, str] = {}
    for h in horizons:
        lines.append(f"## HORIZONTE D+{h}")
        floor = precision_floor[f"D+{h}"] or 0.0
        thr_op = best_by_h[h]["threshold_median"] if h in best_by_h else "—"
        lines.append(f"Piso de Precisão(CAI): {floor:.2f}")
        lines.append(f"Threshold Operacional (mediana): {thr_op if isinstance(thr_op, str) or (isinstance(thr_op,float) and not np.isfinite(thr_op)) else f'{thr_op:.2f}'}\n")
        if h in best_by_h:
            b = best_by_h[h]
            lines.append(f"Vencedor: {b['model']} — janela={int(b['window'])}")
            lines.append(f"Precisão(CAI): {b['precisao_CAI']:.2f}   Recall(CAI): {b['recall_CAI']:.2f}   F1(CAI): {b['F1_CAI']:.2f}   ACC: {b['acc']:.2f}")
            # taxas: usar médias aproximadas a partir de agg (VAL média dos folds) e TESTE do agregado
            rate_val = float(agg_df[(agg_df['horizon']==h) & (agg_df['model']==b['model']) & (agg_df['window']==b['window'])]['rate_val_mean'].iloc[0]) if h in best_by_h else float("nan")
            rate_test = float(b['pred_CAI_rate'])
            def pct(v: float) -> str:
                return f"{(v*100):.1f}%" if np.isfinite(v) else "—"
            lines.append(f"Pred_CAI_rate (VAL/Teste): {pct(rate_val)} / {pct(rate_test)}")
            lines.append(f"Confusão (Teste): TP={int(b.get('TP',0))}  FP={int(b.get('FP',0))}  FN={int(b.get('FN',0))}  TN={int(b.get('TN',0))}\n")
            # Checks
            piso_ok = (b['precisao_CAI'] >= floor)
            cov_ok = (b['pred_CAI_rate'] >= coverage_min_rate) or (int(b['num_pred_CAI']) >= coverage_min_count)
            flood_ok = (rate_val <= FLOOD_GUARD_MAX) and (rate_test <= FLOOD_GUARD_MAX)
            sanity_ok = not (folds_df[(folds_df["horizon"]==h)&(folds_df["model"]==b['model'])&(folds_df["window"]==b['window'])]["reasons"].str.contains("metric_swap_detected")).any()
            # Baselines precision comparison
            bd = base_df[base_df["horizon"]==h]
            best_beats_all = True
            if not bd.empty:
                for _, row in bd.iterrows():
                    if b['precisao_CAI'] < float(row['precision']) - 1e-12:
                        best_beats_all = False
                        break
            lines.append("Checks:")
            lines.append(f"{'✓' if piso_ok else '✗'} Piso de Precisão(CAI)")
            lines.append(f"{'✓' if cov_ok else '✗'} Cobertura mínima")
            lines.append(f"{'✓' if flood_ok else '✗'} Freio de inundação (VAL/Teste)")
            lines.append(f"{'✓' if sanity_ok else '✗'} Sanity de métricas")
            lines.append(f"{'✓' if best_beats_all else '!'} Baselines (precisão)")
            # Baselines resumo
            lines.append("\nBaselines — Precisão(CAI) / F1:")
            if bd.empty:
                lines.append("• –")
            else:
                order = ["Sempre_NAO_CAI","ProporcaoTreino>0.5","SinalOntem","Momentum_3d"]
                for base in order:
                    row = bd[bd["baseline"]==base]
                    if not row.empty:
                        pr = float(row.iloc[0]["precision"]); f1v = float(row.iloc[0]["f1"])
                        lines.append(f"• {base}: {pr:.2f} / {f1v:.2f}")
            # Decisão
            apto = bool(piso_ok and cov_ok and flood_ok and sanity_ok and best_beats_all)
            apto_map[h] = "SIM" if apto else "NÃO"
            if not apto:
                motivo = []
                if not piso_ok: motivo.append("piso")
                if not cov_ok: motivo.append("cobertura")
                if not flood_ok: motivo.append("inundacao")
                if not sanity_ok: motivo.append("sanity")
                if not best_beats_all: motivo.append("baselines")
                lines.append(f"\nApto a operar?  NÃO")
                lines.append(f"Motivo (se NÃO): {','.join(motivo)}\n")
            else:
                lines.append(f"\nApto a operar?  SIM\n")
        else:
            apto_map[h] = "NÃO"
            lines.append("Vencedor: —")
            lines.append("Precisão(CAI): —   Recall(CAI): —   F1(CAI): —   ACC: —")
            lines.append("Pred_CAI_rate (VAL/Teste): — / —")
            lines.append("Confusão (Teste): TP=—  FP=—  FN=—  TN=—\n")
            lines.append("Checks:")
            lines.append("✗ Piso de Precisão(CAI)")
            lines.append("✗ Cobertura mínima")
            lines.append("✗ Freio de inundação (VAL/Teste)")
            lines.append("✗ Sanity de métricas")
            lines.append("! Baselines (precisão)")
            lines.append("\nBaselines — Precisão(CAI) / F1:\n• –")
            lines.append("\nApto a operar?  NÃO\n")

    # Rodapé
    seq_str = ", ".join(final_seq) if all_operable and final_seq else "—"
    lines.append(f"Resumo: D+1={apto_map.get(1,'NÃO')} | D+3={apto_map.get(3,'NÃO')} | D+5={apto_map.get(5,'NÃO')}   |  Sequência final (último TESTE): {seq_str}")
    alerts_added = False
    if repeat_msgs or xgb_fit_kw_error:
        lines.append("Alertas:")
        alerts_added = True
        for m in repeat_msgs:
            lines.append(f"• {m}")
        if xgb_fit_kw_error:
            lines.append("• repeat_error_stop:xgb_sklearn_fit — Ambiente tem múltiplos XGBoost? Limpar venv e reinstalar 3.0.5? [ ]Sim [ ]Não")
    if not alerts_added:
        lines.append("Alertas: —")
    lines.append("SSOT: /home/wrm/BOLSA_2026/gold/IBOV_gold.parquet | Folds: 10 | Seeds: {}/{}/{} | {}".format(SEED_NUMPY, SEED_TF, SEED_PY, TF_DEV_NOTE))
    lines.append("Precision floors: D+1={:.2f}; D+3={:.2f}; D+5={:.2f} | N_min_preds_val=10 | flood_guard≤{}%".format(
        precision_floor['D+1'], precision_floor['D+3'], precision_floor['D+5'], int(FLOOD_GUARD_MAX*100)
    ))

    print("\n" + "\n".join(lines))

    # Checklist final
    print("\nCHECKLIST")
    print(f"- xgboost_version detectada: {XGB_VERSION}")
    print(f"- XGBoost API: sklearn.fit(early_stopping_rounds={xgb_early_stopping_rounds}) | device={xgb_device}{'('+xgb_gpu_name+')' if xgb_device=='cuda' else ''}")
    print(f"- SSOT: {GOLD_PATH} (tier=GOLD)")
    print(f"- Horizontes: {horizons} | Janelas: {windows} | Folds: {len(splits)}")
    print(f"- precision_floor: D+1={precision_floor['D+1']:.2f}, D+3={precision_floor['D+3']:.2f}, D+5={precision_floor['D+5']:.2f}")
    if xgb_fit_kw_error:
        print("- Pergunta: Ambiente tem múltiplos XGBoost? Limpar venv e reinstalar 3.0.5? [ ]Sim [ ]Não")
    print(f"- dry_run=True (nenhum arquivo salvo)")
    print(f"\n[{now_ts()}] Fim — CAI vs NÃO CAI (dry_run={dry_run})")

if __name__ == "__main__":
    main()

[2025-09-19 20:25:24] Início — CAI vs NÃO CAI — V1.2/V1.2.1 (Delta XGBoost 3.0.5) | Seeds: numpy=2025, tf=42, xgb=7

[PROVA SSOT]
- Caminho: /home/wrm/BOLSA_2026/gold/IBOV_gold.parquet (tier=GOLD)
- Linhas: 3400 | date_min: 2012-01-03 | date_max: 2025-09-19
- Colunas (amostra): date, open, high, low, close, volume, ticker, open_norm, high_norm, low_norm, close_norm, volume_norm, return_1d, volatility_5d, sma_5, sma_20, sma_ratio, y_h1, y_h3, y_h5, y_h1_cls, y_h3_cls, y_h5_cls, year, __date__ ...

[WALK-FORWARD — Folds]
Fold 01 | train[2012-01-03 → 2013-04-02] | val[2013-04-03 → 2013-07-02] | test[2013-07-03 → 2014-01-02]
Fold 02 | train[2012-01-03 → 2013-10-02] | val[2013-10-03 → 2014-01-02] | test[2014-01-03 → 2014-07-02]
Fold 03 | train[2012-01-03 → 2014-04-02] | val[2014-04-03 → 2014-07-02] | test[2014-07-03 → 2015-01-02]
Fold 04 | train[2012-01-03 → 2014-10-02] | val[2014-10-03 → 2015-01-02] | test[2015-01-03 → 2015-07-02]
Fold 05 | train[2012-01-03 → 2015-04-02] | val[2015-04-03 →