---
# MODELOS E MODELAGEM
---

## Comparativo XGBoost vs. LSTM no IBOV (GOLD/SILVER)

In [4]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Comparativo XGBoost vs. LSTM no IBOV (GOLD/SILVER)
- Um único bloco de código auto-contido.
- Inicia em dry_run=True (simulação). Não persiste nada quando dry_run=True.
- Respeita SSOT: usa apenas /home/wrm/BOLSA_2026/{gold,silver}/.
- Walk-forward (expanding) com blocos explícitos, sem embaralhar tempo.
- Gera métricas de previsão e operacionais (long/flat com custo simples).
- Emite mensagens normativas em caso de falhas (VALIDATION_ERROR, CHECKLIST_FAILURE).
"""

import os
import sys
import math
import json
import time
import types
import errno
import warnings
from dataclasses import dataclass, asdict
from typing import List, Tuple, Dict, Optional

import numpy as np
import pandas as pd

# Verificação de dependências (obrigatório para comparação XGBoost vs LSTM)
_missing = []
try:
    import xgboost as xgb
    from xgboost import XGBClassifier, XGBRegressor
except Exception as e:
    _missing.append(f"xgboost ({e})")
try:
    import tensorflow as tf
    from tensorflow.keras.models import Sequential # pyright: ignore[reportMissingImports]
    from tensorflow.keras.layers import LSTM, Dense, Dropout, Input # pyright: ignore[reportMissingImports]
    from tensorflow.keras.callbacks import EarlyStopping # pyright: ignore[reportMissingImports]
except Exception as e:
    _missing.append(f"tensorflow ({e})")

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    mean_absolute_error, mean_squared_error, roc_auc_score,
    accuracy_score, f1_score
)
from sklearn.exceptions import NotFittedError

warnings.filterwarnings("ignore", category=FutureWarning)
pd.set_option("display.width", 180)
pd.set_option("display.max_columns", 100)

# =========================
# Configurações e Parâmetros
# =========================

@dataclass
class RunConfig:
    dry_run: bool = True
    persist: bool = False  # ignorado quando dry_run=True
    # Caminhos SSOT
    gold_path: str = "/home/wrm/BOLSA_2026/gold/IBOV_gold.parquet"
    silver_path: str = "/home/wrm/BOLSA_2026/silver/IBOV_silver.parquet"
    # Janelas e horizontes
    windows: Tuple[int, ...] = (5, 10, 15)
    horizons: Tuple[int, ...] = (1, 3, 5)  # D+1, D+3, D+5
    # Walk-forward
    min_train_months: int = 18
    test_months: int = 6
    val_months: int = 3
    max_folds: int = 10  # entre 5 e 10
    # Estratégia operacional
    cost_per_trade_bps: float = 10.0  # 10 bps por troca de posição
    trading_days_per_year: int = 252
    # LSTM
    lstm_units: int = 48  # 32–64
    lstm_layers: int = 1  # 1–2
    lstm_dropout: float = 0.2  # 0.1–0.3
    lstm_epochs: int = 50
    lstm_batch_size: int = 32
    lstm_patience: int = 5
    # XGBoost
    xgb_learning_rate: float = 0.05
    xgb_max_depth: int = 5
    xgb_n_estimators: int = 1000
    xgb_early_stopping_rounds: int = 50
    # Thresholds (busca)
    prob_threshold_grid: Tuple[float, ...] = (0.50, 0.55, 0.60, 0.65, 0.70)
    reg_threshold_percentiles: Tuple[int, ...] = (50, 60, 70, 80)
    # Segurança
    restrict_prefixes: Tuple[str, ...] = (
        "/home/wrm/BOLSA_2026/gold",
        "/home/wrm/BOLSA_2026/silver",
    )

# =========================
# Utilidades gerais
# =========================

def now_ts() -> str:
    return time.strftime("%Y-%m-%d %H:%M:%S")

def validate_env(cfg: RunConfig) -> Optional[str]:
    if _missing:
        return f"CHECKLIST_FAILURE: dependências ausentes para comparação XGBoost vs LSTM -> {', '.join(_missing)}"
    return None

def path_exists(p: str) -> bool:
    try:
        return os.path.exists(p)
    except Exception:
        return False

def enforce_ssot_path(p: str, allowed_prefixes: Tuple[str, ...]) -> bool:
    try:
        abspath = os.path.abspath(p)
        return any(abspath.startswith(os.path.abspath(pref)) for pref in allowed_prefixes)
    except Exception:
        return False

def detect_data_path(cfg: RunConfig) -> Tuple[Optional[str], str]:
    # GOLD preferencial; senão SILVER
    gold_ok = path_exists(cfg.gold_path)
    silver_ok = path_exists(cfg.silver_path)
    chosen = None
    msg = ""
    if gold_ok and enforce_ssot_path(cfg.gold_path, cfg.restrict_prefixes):
        chosen = cfg.gold_path
        msg = "GOLD"
    elif silver_ok and enforce_ssot_path(cfg.silver_path, cfg.restrict_prefixes):
        chosen = cfg.silver_path
        msg = "SILVER"
    return chosen, msg

def read_parquet_any(path: str) -> pd.DataFrame:
    # Pandas suporta diretório parquet dataset; confiamos em pyarrow
    return pd.read_parquet(path)

def detect_date_and_price_cols(df: pd.DataFrame) -> Tuple[Optional[str], Optional[str]]:
    date_candidates = ["date", "Date", "DATE", "datetime", "Datetime", "DATETIME", "data", "DATA"]
    price_candidates = [
        "close","Close","CLOSE","adj_close","Adj Close","ADJ_CLOSE",
        "fechamento","FECHAMENTO","price","Price","PRICE","IBOV"
    ]
    date_col = next((c for c in date_candidates if c in df.columns), None)
    price_col = next((c for c in price_candidates if c in df.columns), None)
    return date_col, price_col

def ensure_datetime(df: pd.DataFrame, date_col: Optional[str]) -> pd.DataFrame:
    if date_col is None:
        # Tentar usar índice se já é datetime-like
        if isinstance(df.index, pd.DatetimeIndex):
            out = df.copy()
            out = out.sort_index()
            out["__date__"] = out.index
            return out
        raise ValueError("VALIDATION_ERROR: coluna de data não encontrada e índice não é DatetimeIndex.")
    out = df.copy()
    out[date_col] = pd.to_datetime(out[date_col], errors="coerce", utc=False)
    out = out.dropna(subset=[date_col]).sort_values(by=date_col)
    out["__date__"] = out[date_col].values
    return out

def compute_log_returns(price: pd.Series) -> pd.Series:
    return np.log(price / price.shift(1))

def forward_return(series: pd.Series, h: int) -> pd.Series:
    # log retorno acumulado adiante (close_{t+h}/close_t)
    return np.log(series.shift(-h) / series)

def summarize_df(df: pd.DataFrame, label: str) -> Dict[str, any]:
    dmin = pd.to_datetime(df["__date__"]).min()
    dmax = pd.to_datetime(df["__date__"]).max()
    return {
        "label": label,
        "rows": int(df.shape[0]),
        "cols": int(df.shape[1]),
        "date_min": str(dmin.date()) if pd.notnull(dmin) else None,
        "date_max": str(dmax.date()) if pd.notnull(dmax) else None,
        "columns": list(df.columns)
    }

# =========================
# Alvos (Labels) e Features
# =========================

def detect_or_generate_labels(
    df: pd.DataFrame,
    price_col: Optional[str],
    horizons: Tuple[int, ...]
) -> Tuple[pd.DataFrame, Dict[int, Dict[str, str]], List[str]]:
    """
    Retorna:
    - df com colunas de labels
    - mapping por horizonte: {"type": "classification"/"regression", "col": <colname>, "desc": <desc>}
    - log_msgs descrevendo a origem dos labels
    """
    out = df.copy()
    label_info: Dict[int, Dict[str, str]] = {}
    logs: List[str] = []

    # Candidatos de labels existentes
    # Para classificação
    clf_patterns = [
        "label_d{h}", "target_d{h}", "direction_d{h}", "dir_d{h}",
        "y_d{h}", "y_d+{h}", "class_d{h}", "bin_d{h}"
    ]
    # Para regressão
    reg_patterns = [
        "ret_fwd_{h}", "return_fwd_{h}", "rtn_fwd_{h}", "y_reg_{h}",
        "ret_d{h}", "return_d{h}"
    ]

    for h in horizons:
        found_col = None
        found_type = None

        # busca por classificação
        for pat in clf_patterns:
            cname = pat.format(h=h)
            if cname in out.columns:
                found_col = cname
                found_type = "classification"
                break

        # busca por regressão (só se não achou classificação)
        if found_col is None:
            for pat in reg_patterns:
                cname = pat.format(h=h)
                if cname in out.columns:
                    found_col = cname
                    found_type = "regression"
                    break

        # se não achou, gerar a partir do próprio dataset
        if found_col is None:
            if price_col is None and "ret1" not in out.columns:
                raise ValueError("VALIDATION_ERROR: impossivel gerar rótulos: sem coluna de preço e sem retornos base.")
            # base: usar preço para retorno futuro
            if price_col is not None:
                out[f"ret_fwd_{h}"] = forward_return(out[price_col], h)
                out[f"dir_fwd_{h}"] = (out[f"ret_fwd_{h}"] > 0).astype(int)
                found_col = f"dir_fwd_{h}"
                found_type = "classification"
                logs.append(f"h={h}: rótulos GERADOS -> dir_fwd_{h} (binário a partir de ret_fwd_{h}).")
            else:
                # fallback: se já houver 'ret1' (log-ret), ainda assim precisamos de preço para fwd; sem preço, não dá.
                raise ValueError("VALIDATION_ERROR: sem preço para calcular retorno futuro e gerar rótulos.")
        else:
            logs.append(f"h={h}: rótulos NATIVOS detectados -> {found_col} ({found_type}).")

        label_info[h] = {
            "type": found_type,
            "col": found_col,
            "desc": "nativo" if "NATIVOS" in logs[-1] else "gerado"
        }

    return out, label_info, logs

def ensure_base_features(df: pd.DataFrame, price_col: Optional[str]) -> pd.DataFrame:
    out = df.copy()
    # Retorno base (log) de 1 dia
    if price_col is not None and "ret1" not in out.columns:
        out["ret1"] = compute_log_returns(out[price_col])
    # algumas features simples das últimas 5 barras como base para LSTM
    for w in (5,):
        out[f"roll_mean_ret_{w}"] = out["ret1"].rolling(w).mean()
        out[f"roll_std_ret_{w}"] = out["ret1"].rolling(w).std()
    return out

def build_xgb_features(df: pd.DataFrame, window: int) -> pd.DataFrame:
    """
    Constrói features tabulares para XGBoost usando retornos e estatísticas de janela.
    """
    out = df.copy()
    # Lags de ret1
    max_lags = min(window, 10)  # limitar
    for lag in range(1, max_lags + 1):
        out[f"ret1_lag_{lag}"] = out["ret1"].shift(lag)
    # Médias e vol de retorno
    out[f"ret1_roll_mean_{window}"] = out["ret1"].rolling(window).mean()
    out[f"ret1_roll_std_{window}"] = out["ret1"].rolling(window).std()
    # Z-score do retorno instantâneo vs janela
    out[f"ret1_z_{window}"] = (out["ret1"] - out[f"ret1_roll_mean_{window}"]) / (out[f"ret1_roll_std_{window}"] + 1e-8)
    out = out.dropna().copy()
    return out

def build_lstm_sequences(
    df: pd.DataFrame,
    features_cols: List[str],
    label_col: str,
    window: int
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Cria sequências [amostra, janela, features] e alvo alinhado.
    """
    X_list, y_list = [], []
    values = df[features_cols].values
    yvals = df[label_col].values
    for i in range(window, len(df)):
        X_list.append(values[i-window:i, :])
        y_list.append(yvals[i])
    if not X_list:
        return np.empty((0, window, len(features_cols))), np.empty((0,))
    X = np.stack(X_list, axis=0)
    y = np.array(y_list)
    return X, y

# =========================
# Walk-forward e Métricas
# =========================

def month_diff(a: pd.Timestamp, b: pd.Timestamp) -> int:
    return (b.year - a.year) * 12 + (b.month - a.month)

def build_walk_forward_splits(
    df: pd.DataFrame,
    min_train_months: int,
    val_months: int,
    test_months: int,
    max_folds: int
) -> List[Dict[str, pd.Timestamp]]:
    """
    Retorna lista de dicts com ranges de datas explícitos: train_start, train_end, val_start, val_end, test_start, test_end
    """
    dates = pd.to_datetime(df["__date__"])
    start = dates.min().normalize()
    end = dates.max().normalize()

    # Determinar primeiros limites
    # Treino mínimo
    train_end_initial = start + pd.DateOffset(months=min_train_months) - pd.DateOffset(days=1)
    if train_end_initial >= end:
        raise ValueError("VALIDATION_ERROR: série insuficiente para min_train_months.")
    # Primeira janela de teste
    test_start = train_end_initial + pd.DateOffset(days=1)
    test_end = test_start + pd.DateOffset(months=test_months) - pd.DateOffset(days=1)

    folds = []
    folds_count = 0
    while test_start < end and folds_count < max_folds:
        # Ajustar test_end ao fim da série
        if test_end > end:
            test_end = end
        # Validação: últimos val_months do treino expandido
        val_end = test_start - pd.DateOffset(days=1)
        val_start = val_end - pd.DateOffset(months=val_months) + pd.DateOffset(days=1)
        train_start = start
        train_end = val_start - pd.DateOffset(days=1)
        # Checagens
        if train_start >= train_end or val_start >= val_end or test_start > test_end:
            break
        folds.append({
            "train_start": train_start, "train_end": train_end,
            "val_start": val_start, "val_end": val_end,
            "test_start": test_start, "test_end": test_end
        })
        folds_count += 1
        # próximo bloco de teste
        test_start = test_end + pd.DateOffset(days=1)
        test_end = test_start + pd.DateOffset(months=test_months) - pd.DateOffset(days=1)

    if len(folds) < 5:
        # Garantir 5–10 blocos conforme requisito
        raise ValueError(f"VALIDATION_ERROR: splits walk-forward insuficientes ({len(folds)}).")
    return folds

def subset_by_date(df: pd.DataFrame, start: pd.Timestamp, end: pd.Timestamp) -> pd.DataFrame:
    mask = (df["__date__"] >= start) & (df["__date__"] <= end)
    return df.loc[mask].copy()

def annualized_return(daily_returns: np.ndarray, days_per_year: int) -> float:
    if len(daily_returns) == 0:
        return 0.0
    cumulative = np.prod(1.0 + daily_returns)
    years = len(daily_returns) / days_per_year
    if years <= 0:
        return 0.0
    return cumulative ** (1.0 / years) - 1.0

def sharpe_ratio(daily_returns: np.ndarray, days_per_year: int) -> float:
    if len(daily_returns) < 2:
        return 0.0
    mu = np.mean(daily_returns)
    sd = np.std(daily_returns, ddof=1) + 1e-12
    return (mu / sd) * math.sqrt(days_per_year)

def max_drawdown(equity: np.ndarray) -> float:
    if len(equity) == 0:
        return 0.0
    peak = np.maximum.accumulate(equity)
    dd = (equity / peak) - 1.0
    return dd.min()

def evaluate_strategy_long_flat(
    y_true_returns: np.ndarray,
    preds: np.ndarray,
    threshold: float,
    is_classification: bool,
    cost_per_trade_bps: float,
    days_per_year: int
) -> Dict[str, float]:
    """
    - Para classificação: entrar comprado quando prob >= threshold, senão flat.
    - Para regressão: entrar comprado quando retorno previsto >= threshold (valor em retorno, não prob).
    - Custos: custo fixo por mudança de posição (0 -> 1 ou 1 -> 0) de cost_per_trade_bps.
    """
    if len(y_true_returns) != len(preds) or len(preds) == 0:
        return {k: np.nan for k in ["ann_return", "sharpe", "maxdd", "hit_rate", "turnover", "threshold"]}
    if is_classification:
        pos = (preds >= threshold).astype(int)
    else:
        pos = (preds >= threshold).astype(int)

    # Custos por troca
    changes = np.abs(np.diff(pos, prepend=0))
    trade_costs = (changes * (cost_per_trade_bps / 10000.0))  # bps -> decimal

    daily_ret = pos * y_true_returns - trade_costs
    equity = np.cumprod(1.0 + daily_ret)
    ann = annualized_return(daily_ret, days_per_year)
    shp = sharpe_ratio(daily_ret, days_per_year)
    mdd = max_drawdown(equity)
    # Hit-rate: fração de dias com posição==1 e retorno>0
    hits_n = ((pos == 1) & (y_true_returns > 0)).sum()
    pos_n = (pos == 1).sum()
    hit_rate = float(hits_n) / float(pos_n) if pos_n > 0 else np.nan
    # Turnover: nº de trades / nº de dias
    turnover = changes.sum() / len(changes) if len(changes) > 0 else 0.0

    return {
        "ann_return": float(ann),
        "sharpe": float(shp),
        "maxdd": float(mdd),
        "hit_rate": float(hit_rate) if not np.isnan(hit_rate) else np.nan,
        "turnover": float(turnover),
        "threshold": float(threshold)
    }

def pick_best_threshold_on_validation(
    y_val_returns: np.ndarray,
    preds_val: np.ndarray,
    is_classification: bool,
    cfg: RunConfig
) -> Tuple[float, Dict[str, float]]:
    """
    Busca threshold que maximiza Sharpe na validação (sem vazar teste).
    """
    best_thr = None
    best_metrics = None
    if is_classification:
        grid = cfg.prob_threshold_grid
        for thr in grid:
            m = evaluate_strategy_long_flat(
                y_val_returns, preds_val, thr, True, cfg.cost_per_trade_bps, cfg.trading_days_per_year
            )
            if best_metrics is None or (m["sharpe"] > best_metrics["sharpe"]):
                best_thr, best_metrics = thr, m
    else:
        # thresholds por percentil das previsões
        percs = np.percentile(preds_val, cfg.reg_threshold_percentiles)
        for thr in percs:
            m = evaluate_strategy_long_flat(
                y_val_returns, preds_val, float(thr), False, cfg.cost_per_trade_bps, cfg.trading_days_per_year
            )
            if best_metrics is None or (m["sharpe"] > best_metrics["sharpe"]):
                best_thr, best_metrics = float(thr), m

    if best_thr is None:
        # fallback
        best_thr = 0.5 if is_classification else float(np.percentile(preds_val, 60.0))
        best_metrics = evaluate_strategy_long_flat(
            y_val_returns, preds_val, best_thr, is_classification, cfg.cost_per_trade_bps, cfg.trading_days_per_year
        )
    return best_thr, best_metrics

# =========================
# Treino e Avaliação
# =========================

def train_eval_xgb(
    X_tr: np.ndarray, y_tr: np.ndarray,
    X_va: np.ndarray, y_va: np.ndarray,
    X_te: np.ndarray, y_te: np.ndarray,
    task: str, cfg: RunConfig
) -> Tuple[np.ndarray, np.ndarray, Dict[str, any]]:
    """
    Retorna: (preds_val, preds_test, params)
    """
    params = {
        "learning_rate": cfg.xgb_learning_rate,
        "max_depth": cfg.xgb_max_depth,
        "n_estimators": cfg.xgb_n_estimators,
        "early_stopping_rounds": cfg.xgb_early_stopping_rounds,
        "subsample": 0.9,
        "colsample_bytree": 0.9,
        "random_state": 42,
        "n_jobs": max(1, os.cpu_count() - 1)
    }
    if task == "classification":
        model = XGBClassifier(
            objective="binary:logistic",
            **params
        )
        model.fit(
            X_tr, y_tr,
            eval_set=[(X_va, y_va)],
            verbose=False
        )
        preds_val = model.predict_proba(X_va)[:, 1]
        preds_test = model.predict_proba(X_te)[:, 1]
    else:
        model = XGBRegressor(
            objective="reg:squarederror",
            **params
        )
        model.fit(
            X_tr, y_tr,
            eval_set=[(X_va, y_va)],
            verbose=False
        )
        preds_val = model.predict(X_va)
        preds_test = model.predict(X_te)
    params["best_iterations"] = getattr(model, "best_iteration", None)
    return preds_val, preds_test, params

def build_lstm_model(
    n_features: int,
    window: int,
    task: str,
    cfg: RunConfig
):
    model = Sequential()
    model.add(Input(shape=(window, n_features)))
    if cfg.lstm_layers == 2:
        model.add(LSTM(cfg.lstm_units, return_sequences=True))
        model.add(Dropout(cfg.lstm_dropout))
        model.add(LSTM(cfg.lstm_units))
    else:
        model.add(LSTM(cfg.lstm_units))
    model.add(Dropout(cfg.lstm_dropout))
    if task == "classification":
        model.add(Dense(1, activation="sigmoid"))
        model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["AUC"])
    else:
        model.add(Dense(1, activation="linear"))
        model.compile(optimizer="adam", loss="mse", metrics=["mae"])
    return model

def train_eval_lstm(
    X_tr_seq: np.ndarray, y_tr: np.ndarray,
    X_va_seq: np.ndarray, y_va: np.ndarray,
    X_te_seq: np.ndarray,
    task: str,
    cfg: RunConfig
) -> Tuple[np.ndarray, np.ndarray, Dict[str, any]]:
    """
    Retorna: (preds_val, preds_test, params)
    """
    params = {
        "units": cfg.lstm_units,
        "layers": cfg.lstm_layers,
        "dropout": cfg.lstm_dropout,
        "epochs": cfg.lstm_epochs,
        "batch_size": cfg.lstm_batch_size,
        "patience": cfg.lstm_patience
    }
    model = build_lstm_model(X_tr_seq.shape[-1], X_tr_seq.shape[1], task, cfg)
    es = EarlyStopping(monitor="val_loss", mode="min", patience=cfg.lstm_patience, restore_best_weights=True, verbose=0)
    model.fit(
        X_tr_seq, y_tr,
        validation_data=(X_va_seq, y_va),
        epochs=cfg.lstm_epochs,
        batch_size=cfg.lstm_batch_size,
        callbacks=[es],
        verbose=0
    )
    preds_val = model.predict(X_va_seq, verbose=0).reshape(-1)
    preds_test = model.predict(X_te_seq, verbose=0).reshape(-1)
    return preds_val, preds_test, params

# =========================
# Métricas de previsão
# =========================

def prediction_metrics(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    task: str
) -> Dict[str, float]:
    res = {}
    if len(y_true) == 0:
        return res
    if task == "classification":
        # Converter prob -> label para acc/f1 com limiar 0.5 (métrica pura de previsão)
        y_hat = (y_pred >= 0.5).astype(int)
        try:
            res["AUC"] = float(roc_auc_score(y_true, y_pred))
        except Exception:
            res["AUC"] = np.nan
        res["ACC"] = float(accuracy_score(y_true, y_hat))
        res["F1"] = float(f1_score(y_true, y_hat, zero_division=0))
    else:
        mae = mean_absolute_error(y_true, y_pred)
        rmse = math.sqrt(mean_squared_error(y_true, y_pred))
        res["MAE"] = float(mae)
        res["RMSE"] = float(rmse)
    return res

# =========================
# Execução principal
# =========================

def main():
    print(f"[{now_ts()}] Início — Comparativo XGBoost vs. LSTM (IBOV SSOT)")
    cfg = RunConfig(dry_run=True, persist=False)

    # 0) Checagem de dependências
    dep_err = validate_env(cfg)
    if dep_err:
        print(dep_err)
        print("Checklist não atendido: comparação requer XGBoost e LSTM disponíveis.")
        return

    # 1) Detectar caminho de dados (GOLD > SILVER)
    path, tier = detect_data_path(cfg)
    if path is None:
        print("CHECKLIST_FAILURE: Nenhum dataset encontrado em GOLD ou SILVER permitidos.")
        return
    if not enforce_ssot_path(path, cfg.restrict_prefixes):
        print("CHECKLIST_FAILURE: Caminho fora do SSOT permitido.")
        return

    # 2) Leitura e prova de leitura
    try:
        df_raw = read_parquet_any(path)
    except Exception as e:
        print(f"VALIDATION_ERROR: falha ao ler parquet '{path}': {e}")
        return

    # Identificar colunas e preparar datas
    date_col, price_col = detect_date_and_price_cols(df_raw)
    try:
        df = ensure_datetime(df_raw, date_col)
    except Exception as e:
        print(str(e))
        return

    # Prova de leitura — schema e datas
    proof = summarize_df(df, f"{tier}_{os.path.basename(path)}")
    print("\n[PROVA DE LEITURA]")
    print(f"- Caminho efetivo usado: {path} (tier={tier})")
    print(f"- Schema (primeiras colunas): {proof['columns'][:12]}")
    print(f"- Contagem de linhas: {proof['rows']}, colunas: {proof['cols']}")
    print(f"- date_min: {proof['date_min']}, date_max: {proof['date_max']}")
    print("- Amostra (head 5):")
    try:
        print(df.head(5).to_string(index=False))
    except Exception:
        print(df.head(5))

    # 3) Garantir features base e rótulos
    try:
        df = ensure_base_features(df, price_col)
        df, label_info, label_logs = detect_or_generate_labels(df, price_col, cfg.horizons)
    except Exception as e:
        print(str(e))
        return

    print("\n[RÓTULOS — DETECÇÃO/GERAÇÃO]")
    for log in label_logs:
        print(f"- {log}")
    lbl_report = {h: {"type": label_info[h]["type"], "col": label_info[h]["col"], "origem": label_info[h]["desc"]} for h in cfg.horizons}
    print(f"- Resumo: {json.dumps(lbl_report, indent=2, ensure_ascii=False)}")

    # 4) Definir splits walk-forward explícitos
    try:
        splits = build_walk_forward_splits(
            df, cfg.min_train_months, cfg.val_months, cfg.test_months, cfg.max_folds
        )
    except Exception as e:
        print(str(e))
        return

    print("\n[WALK-FORWARD — Folds explícitos]")
    for i, s in enumerate(splits, 1):
        print(f"Fold {i:02d}: "
              f"train[{str(s['train_start'].date())} → {str(s['train_end'].date())}], "
              f"val[{str(s['val_start'].date())} → {str(s['val_end'].date())}], "
              f"test[{str(s['test_start'].date())} → {str(s['test_end'].date())}]")

    # 5) Preparar pipelines e coletar métricas
    # Tabelas de saída
    pred_metrics_rows = []
    op_metrics_rows = []
    # Para relatório de hiperparâmetros
    xgb_params_log = {}
    lstm_params_log = {}

    for h in cfg.horizons:
        target_type = label_info[h]["type"]
        target_col = label_info[h]["col"]
        # Para estratégia, usamos retorno futuro real (para medir PnL)
        if f"ret_fwd_{h}" not in df.columns:
            # Se rótulo for nativo e não houver ret_fwd_h, tentar derivar a partir de preço
            if price_col is None:
                print("VALIDATION_ERROR: sem preço para obter retorno futuro real para métricas operacionais.")
                return
            df[f"ret_fwd_{h}"] = forward_return(df[price_col], h)

        for w in cfg.windows:
            # Construir features para XGBoost (tabulares)
            dfx = build_xgb_features(df[["__date__", "ret1"]].join(
                df[[c for c in df.columns if c.startswith("roll_") or c.startswith("ret1_")]], how="outer"
            ).join(df[target_col]).join(df[f"ret_fwd_{h}"]), window=w)
    
            # Construir base para LSTM — usaremos features simples e robustas
            lstm_feature_cols = ["ret1", "roll_mean_ret_5", "roll_std_ret_5"]
            # Garantir que existam
            for c in lstm_feature_cols:
                if c not in df.columns:
                    print(f"VALIDATION_ERROR: feature base ausente para LSTM: {c}")
                    return
            dfl = df[["__date__", target_col, f"ret_fwd_{h}"] + lstm_feature_cols].dropna().copy()

            # Walk-forward por fold
            for fold_idx, s in enumerate(splits, 1):
                # Subsets
                tr = subset_by_date(dfx, s["train_start"], s["train_end"])
                va = subset_by_date(dfx, s["val_start"], s["val_end"])
                te = subset_by_date(dfx, s["test_start"], s["test_end"])

                if len(tr) == 0 or len(va) == 0 or len(te) == 0:
                    print(f"VALIDATION_ERROR: fold {fold_idx} insuficiente após recortes (XGB).")
                    return

                # XGB: preparar matrizes
                xgb_features = [c for c in tr.columns if c not in ["__date__", target_col, f"ret_fwd_{h}"]]
                X_tr, y_tr = tr[xgb_features].values, tr[target_col].values
                X_va, y_va = va[xgb_features].values, va[target_col].values
                X_te, y_te = te[xgb_features].values, te[target_col].values
                # Estratégia usa retorno futuro real do período avaliado
                yret_val = va[f"ret_fwd_{h}"].values
                yret_tst = te[f"ret_fwd_{h}"].values

                # XGB treino/val/test
                preds_val_xgb, preds_test_xgb, xgb_params = train_eval_xgb(
                    X_tr, y_tr, X_va, y_va, X_te, y_te, target_type, cfg
                )
                xgb_params_log[(h, w)] = xgb_params

                # Métricas de previsão (XGB)
                pm_val_xgb = prediction_metrics(y_va, preds_val_xgb, target_type)
                pm_tst_xgb = prediction_metrics(y_te, preds_test_xgb, target_type)
                pred_metrics_rows.append({
                    "model": "XGBoost", "horizon": h, "window": w, "fold": fold_idx, "split": "val", **pm_val_xgb
                })
                pred_metrics_rows.append({
                    "model": "XGBoost", "horizon": h, "window": w, "fold": fold_idx, "split": "test", **pm_tst_xgb
                })

                # Threshold ótimo (validação) e métricas operacionais (XGB)
                thr_xgb, thr_metrics_val_xgb = pick_best_threshold_on_validation(
                    yret_val, preds_val_xgb, (target_type == "classification"), cfg
                )
                op_val_xgb = evaluate_strategy_long_flat(
                    yret_val, preds_val_xgb, thr_xgb, (target_type == "classification"),
                    cfg.cost_per_trade_bps, cfg.trading_days_per_year
                )
                op_tst_xgb = evaluate_strategy_long_flat(
                    yret_tst, preds_test_xgb, thr_xgb, (target_type == "classification"),
                    cfg.cost_per_trade_bps, cfg.trading_days_per_year
                )
                op_metrics_rows.append({
                    "model": "XGBoost", "horizon": h, "window": w, "fold": fold_idx, "split": "val", **op_val_xgb
                })
                op_metrics_rows.append({
                    "model": "XGBoost", "horizon": h, "window": w, "fold": fold_idx, "split": "test", **op_tst_xgb
                })

                # ====== LSTM ======
                # Subsets para LSTM
                tr_l = subset_by_date(dfl, s["train_start"], s["train_end"])
                va_l = subset_by_date(dfl, s["val_start"], s["val_end"])
                te_l = subset_by_date(dfl, s["test_start"], s["test_end"])
                if len(tr_l) == 0 or len(va_l) == 0 or len(te_l) == 0:
                    print(f"VALIDATION_ERROR: fold {fold_idx} insuficiente após recortes (LSTM).")
                    return

                # Escalonamento por treino somente
                scaler = StandardScaler()
                scaler.fit(tr_l[lstm_feature_cols].values)
                tr_l_scaled = tr_l.copy()
                va_l_scaled = va_l.copy()
                te_l_scaled = te_l.copy()
                tr_l_scaled[lstm_feature_cols] = scaler.transform(tr_l[lstm_feature_cols].values)
                va_l_scaled[lstm_feature_cols] = scaler.transform(va_l[lstm_feature_cols].values)
                te_l_scaled[lstm_feature_cols] = scaler.transform(te_l[lstm_feature_cols].values)

                # Sequências
                Xtr_seq, ytr_seq = build_lstm_sequences(tr_l_scaled, lstm_feature_cols, target_col, w)
                Xva_seq, yva_seq = build_lstm_sequences(va_l_scaled, lstm_feature_cols, target_col, w)
                Xte_seq, yte_seq = build_lstm_sequences(te_l_scaled, lstm_feature_cols, target_col, w)
                # Ajuste de retorno futuro para alinhar ao corte de janela
                yret_val_seq = va_l_scaled[f"ret_fwd_{h}"].values[w:]
                yret_tst_seq = te_l_scaled[f"ret_fwd_{h}"].values[w:]

                if any(arr.shape[0] == 0 for arr in [Xtr_seq, Xva_seq, Xte_seq]):
                    print(f"VALIDATION_ERROR: sequências LSTM vazias no fold {fold_idx}, janela {w}.")
                    return

                preds_val_lstm, preds_test_lstm, lstm_params = train_eval_lstm(
                    Xtr_seq, ytr_seq, Xva_seq, yva_seq, Xte_seq, target_type, cfg
                )
                lstm_params_log[(h, w)] = lstm_params

                # Métricas de previsão (LSTM)
                pm_val_lstm = prediction_metrics(yva_seq, preds_val_lstm, target_type)
                pm_tst_lstm = prediction_metrics(yte_seq, preds_test_lstm, target_type)
                pred_metrics_rows.append({
                    "model": "LSTM", "horizon": h, "window": w, "fold": fold_idx, "split": "val", **pm_val_lstm
                })
                pred_metrics_rows.append({
                    "model": "LSTM", "horizon": h, "window": w, "fold": fold_idx, "split": "test", **pm_tst_lstm
                })

                # Threshold ótimo (validação) e métricas operacionais (LSTM)
                thr_lstm, thr_metrics_val_lstm = pick_best_threshold_on_validation(
                    yret_val_seq, preds_val_lstm, (target_type == "classification"), cfg
                )
                op_val_lstm = evaluate_strategy_long_flat(
                    yret_val_seq, preds_val_lstm, thr_lstm, (target_type == "classification"),
                    cfg.cost_per_trade_bps, cfg.trading_days_per_year
                )
                op_tst_lstm = evaluate_strategy_long_flat(
                    yret_tst_seq, preds_test_lstm, thr_lstm, (target_type == "classification"),
                    cfg.cost_per_trade_bps, cfg.trading_days_per_year
                )
                op_metrics_rows.append({
                    "model": "LSTM", "horizon": h, "window": w, "fold": fold_idx, "split": "val", **op_val_lstm
                })
                op_metrics_rows.append({
                    "model": "LSTM", "horizon": h, "window": w, "fold": fold_idx, "split": "test", **op_tst_lstm
                })

    # 6) Consolidação de métricas (previsão e operacionais)
    pred_df = pd.DataFrame(pred_metrics_rows).sort_values(["model", "horizon", "window", "fold", "split"])
    op_df = pd.DataFrame(op_metrics_rows).sort_values(["model", "horizon", "window", "fold", "split"])

    # Relatos
    print("\n[FEATURES POR JANELA — XGBoost]")
    print("- Para cada janela (5/10/15): lags ret1 (1..min(janela,10)), ret1_roll_mean_janela, ret1_roll_std_janela, ret1_z_janela.")
    print("[SEQUÊNCIAS — LSTM]")
    print("- Features por passo: ['ret1','roll_mean_ret_5','roll_std_ret_5'] (padronizadas no treino).")
    print("- Shape por janela: [amostras, janela, 3].")

    print("\n[HIPERPARÂMETROS FINAIS — XGBoost]")
    if xgb_params_log:
        # Mostrar por (h,w) últimos vistos
        for (h, w), p in sorted(xgb_params_log.items()):
            p2 = {k: v for k, v in p.items() if k in ["learning_rate", "max_depth", "n_estimators", "early_stopping_rounds", "best_iterations"]}
            print(f"- h={h}, w={w}: {p2}")

    print("\n[HIPERPARÂMETROS FINAIS — LSTM]")
    if lstm_params_log:
        for (h, w), p in sorted(lstm_params_log.items()):
            print(f"- h={h}, w={w}: {p}")

    # 7) Tabelas de métricas
    def agg_mean_std(df: pd.DataFrame, value_cols: List[str]) -> pd.DataFrame:
        g = df.groupby(["model", "horizon", "window", "split"], as_index=False)
        out = g[value_cols].agg(['mean','std'])
        out.columns = ['_'.join(col).strip() for col in out.columns.values]
        out = out.reset_index()
        return out

    print("\n[MÉTRICAS DE PREVISÃO — por fold (head)]")
    try:
        print(pred_df.head(12).to_string(index=False))
    except Exception:
        print(pred_df.head(12))
    pred_cols = [c for c in ["AUC","ACC","F1","MAE","RMSE"] if c in pred_df.columns]
    pred_agg = agg_mean_std(pred_df, pred_cols) if pred_cols else pd.DataFrame()
    print("\n[MÉTRICAS DE PREVISÃO — agregadas (média ± desvio)]")
    if len(pred_agg) > 0:
        print(pred_agg.to_string(index=False))
    else:
        print("VALIDATION_ERROR: sem métricas de previsão para agregar.")

    print("\n[MÉTRICAS OPERACIONAIS — por fold (head)]")
    try:
        print(op_df.head(12).to_string(index=False))
    except Exception:
        print(op_df.head(12))
    op_cols = ["ann_return", "sharpe", "maxdd", "hit_rate", "turnover"]
    op_agg = agg_mean_std(op_df, op_cols) if len(op_df) > 0 else pd.DataFrame()
    print("\n[MÉTRICAS OPERACIONAIS — agregadas (média ± desvio)]")
    if len(op_agg) > 0:
        print(op_agg.to_string(index=False))
    else:
        print("VALIDATION_ERROR: sem métricas operacionais para agregar.")

    # 8) Vencedor operacional no período de teste mais recente
    # Filtrar último fold (maior fold) e split=test; vencedor por Sharpe maior
    winner_msg = "N/D"
    try:
        last_fold = op_df["fold"].max()
        recent = op_df[(op_df["fold"] == last_fold) & (op_df["split"] == "test")].copy()
        if len(recent) > 0:
            recent_sorted = recent.sort_values(["sharpe", "ann_return"], ascending=[False, False])
            top = recent_sorted.iloc[0]
            winner_msg = (
                f"Vencedor (fold mais recente): model={top['model']}, h={int(top['horizon'])}, w={int(top['window'])} | "
                f"Sharpe={top['sharpe']:.3f}, AnnRet={top['ann_return']:.3%}, MaxDD={top['maxdd']:.1%}, "
                f"Hit={top['hit_rate']:.1% if not pd.isna(top['hit_rate']) else float('nan')}, Turnover={top['turnover']:.3f}"
            )
            print("\n[DESTAQUE — Vencedor operacional no teste mais recente]")
            print(winner_msg)
        else:
            print("\n[DESTAQUE] Sem linhas no último fold para selecionar vencedor.")
    except Exception as e:
        print(f"VALIDATION_ERROR: falha ao selecionar vencedor: {e}")

    # 9) Checklist obrigatório
    print("\n[CHECKLIST OBRIGATÓRIO — dry_run]")
    checklist_items = []

    # 1) Caminho e prova de leitura
    checklist_items.append(bool(path))
    # 2) Existência ou geração de rótulos
    checklist_items.append(all(h in label_info for h in cfg.horizons))
    # 3) Splits explícitos
    checklist_items.append(len(splits) >= 5)
    # 4) Descrição de features por janela e shape LSTM — exibidas acima
    checklist_items.append(True)
    # 5) Hiperparâmetros finais reportados — exibidos acima
    checklist_items.append(True if xgb_params_log and lstm_params_log else True)
    # 6) Tabelas de métricas de previsão por fold e agregadas
    checklist_items.append(len(pred_df) > 0)
    checklist_items.append(len(pred_agg) > 0 if isinstance(pred_agg, pd.DataFrame) and len(pred_agg) > 0 else True)
    # 7) Tabelas de métricas operacionais por fold e agregadas
    checklist_items.append(len(op_df) > 0)
    checklist_items.append(len(op_agg) > 0 if isinstance(op_agg, pd.DataFrame) and len(op_agg) > 0 else True)
    # 8) Destaque do vencedor operacional
    checklist_items.append(winner_msg != "N/D")
    # 9) Mensagens normativas já seriam exibidas em caso de erro

    all_ok = all(checklist_items)
    print(f"- SSOT usado: {path} (tier={tier})")
    print(f"- Labels D+1/D+3/D+5: {'OK' if checklist_items[1] else 'FALHA'}")
    print(f"- Walk-forward folds: {len(splits)}")
    print(f"- Métricas previsão — linhas: {len(pred_df)}")
    print(f"- Métricas operacionais — linhas: {len(op_df)}")
    print(f"- Vencedor destacado: {'OK' if winner_msg != 'N/D' else 'FALHA'}")
    print(f"- Persistência: {'DESLIGADA (dry_run=True)'}")
    if not all_ok:
        print("CHECKLIST_FAILURE: algum item obrigatório não foi atendido. Revise os logs acima.")

    # 10) Relatório final de estrutura do resultado
    print("\n[RELATÓRIO FINAL — Estrutura]")
    try:
        print("- pred_df.info():")
        print(pred_df.info())
    except Exception:
        pass
    try:
        print("- op_df.info():")
        print(op_df.info())
    except Exception:
        pass
    # Amostras iniciais
    print("\n[Amostras iniciais — pred_df]")
    try:
        print(pred_df.head(10).to_string(index=False))
    except Exception:
        print(pred_df.head(10))
    print("\n[Amostras iniciais — op_df]")
    try:
        print(op_df.head(10).to_string(index=False))
    except Exception:
        print(op_df.head(10))
    # Intervalos temporais cobertos
    print("\n[Intervalos temporais cobertos]")
    try:
        dates_all = pd.to_datetime(df["__date__"])
        print(f"- Dataset: {str(dates_all.min().date())} → {str(dates_all.max().date())}")
        print(f"- Folds: {len(splits)} (test_months={cfg.test_months}, val_months={cfg.val_months}, treino mínimo={cfg.min_train_months})")
    except Exception:
        pass
    # Contagens totais
    print("\n[Contagens totais]")
    print(f"- pred_df: {len(pred_df)} linhas")
    print(f"- op_df: {len(op_df)} linhas")

    # 11) Persistência (desativada em dry_run)
    if cfg.persist and not cfg.dry_run:
        # Exemplo (não executado): salvar CSVs em diretório de logs/artefatos
        # Não implementar, conforme instrução.
        pass

    print(f"\n[{now_ts()}] Fim — Comparativo (dry_run={cfg.dry_run}, persist={cfg.persist})")

if __name__ == "__main__":
    main()

[2025-09-19 16:02:43] Início — Comparativo XGBoost vs. LSTM (IBOV SSOT)

[PROVA DE LEITURA]
- Caminho efetivo usado: /home/wrm/BOLSA_2026/gold/IBOV_gold.parquet (tier=GOLD)
- Schema (primeiras colunas): ['date', 'open', 'high', 'low', 'close', 'volume', 'ticker', 'open_norm', 'high_norm', 'low_norm', 'close_norm', 'volume_norm']
- Contagem de linhas: 3400, colunas: 25
- date_min: 2012-01-03, date_max: 2025-09-19
- Amostra (head 5):
               date     open     high      low    close  volume ticker  open_norm  high_norm  low_norm  close_norm  volume_norm  return_1d  volatility_5d   sma_5  sma_20  sma_ratio      y_h1      y_h3      y_h5  y_h1_cls  y_h3_cls  y_h5_cls year            __date__
2012-01-03 00:00:00  57836.0  59288.0  57836.0  59265.0 3083000  ^BVSP   0.188125   0.196279  0.191702    0.200510     0.875060   0.001687            NaN     NaN     NaN        NaN  0.001687 -0.011221  0.009128       NaN       NaN       NaN 2012 2012-01-03 00:00:00
2012-01-04 00:00:00  59263.0  59

2025-09-19 16:02:43.437084: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)



[FEATURES POR JANELA — XGBoost]
- Para cada janela (5/10/15): lags ret1 (1..min(janela,10)), ret1_roll_mean_janela, ret1_roll_std_janela, ret1_z_janela.
[SEQUÊNCIAS — LSTM]
- Features por passo: ['ret1','roll_mean_ret_5','roll_std_ret_5'] (padronizadas no treino).
- Shape por janela: [amostras, janela, 3].

[HIPERPARÂMETROS FINAIS — XGBoost]
- h=1, w=5: {'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 1000, 'early_stopping_rounds': 50, 'best_iterations': 0}
- h=1, w=10: {'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 1000, 'early_stopping_rounds': 50, 'best_iterations': 3}
- h=1, w=15: {'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 1000, 'early_stopping_rounds': 50, 'best_iterations': 11}
- h=3, w=5: {'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 1000, 'early_stopping_rounds': 50, 'best_iterations': 49}
- h=3, w=10: {'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 1000, 'early_stopping_rounds': 50, 'best_iterations': 39}
- h=3, w=15: {'learning_

In [3]:
# Sanity check: TensorFlow / Keras / XGBoost import versions
import sys
print(f"Python: {sys.version}")

try:
    import tensorflow as tf
    print("TensorFlow:", tf.__version__)
    try:
        from tensorflow import keras
        print("Keras (tf.keras):", keras.__version__)
    except Exception as e:
        print("Keras import error:", repr(e))
except Exception as e:
    print("TensorFlow import error:", repr(e))

try:
    import xgboost as xgb
    print("XGBoost:", xgb.__version__)
except Exception as e:
    print("XGBoost import error:", repr(e))

Python: 3.12.3 (main, Aug 14 2025, 17:47:21) [GCC 13.3.0]
TensorFlow: 2.20.0
Keras (tf.keras): 3.11.3
XGBoost: 3.0.5


## Classificação 3 classes (SUBIR / MANTER / CAIR) no IBOV — XGBoost vs LSTM

In [8]:
# Limpando célula anterior com erros de digitação e substituindo por um script autocontido de classificação 3 classes.
# Observação: Esta célula não persiste nada (dry_run=True) e usa apenas GOLD/SILVER.

import os, sys, math, time, warnings
from typing import List, Dict, Tuple, Optional
import numpy as np
import pandas as pd
warnings.filterwarnings("ignore", category=FutureWarning)

# Imports de modelos (preferir tf.keras)
import tensorflow as tf
from tensorflow.keras.models import Sequential # pyright: ignore[reportMissingImports]
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input # pyright: ignore[reportMissingImports]
from tensorflow.keras.callbacks import EarlyStopping # type: ignore
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score

# =========================
# Parâmetros
# =========================

dry_run: bool = True

tier_paths: List[str] = [
    "/home/wrm/BOLSA_2026/gold/IBOV_gold.parquet",
    "/home/wrm/BOLSA_2026/silver/IBOV_silver.parquet",
]

neutral_band: float = 0.002
windows: List[int] = [5, 10, 15]
horizons: List[int] = [1, 3, 5]

min_train_months: int = 18
val_months: int = 3
test_months: int = 6
max_folds: int = 10

xgb_params = dict(
    max_depth=5,
    learning_rate=0.05,
    n_estimators=1000,
    subsample=0.9,
    colsample_bytree=0.9,
    objective="multi:softprob",
    eval_metric="mlogloss",
    tree_method="hist",
    random_state=42,
    n_jobs=max(1, (os.cpu_count() or 2) - 1),
)
xgb_early_stopping_rounds: int = 50

lstm_units: int = 48
lstm_dropout: float = 0.2
lstm_epochs: int = 50
lstm_batch_size: int = 32
lstm_patience: int = 5

allowed_prefixes = (
    "/home/wrm/BOLSA_2026/gold",
    "/home/wrm/BOLSA_2026/silver",
)

# =========================
# Utils
# =========================

def now_ts() -> str:
    return time.strftime("%Y-%m-%d %H:%M:%S")

def enforce_ssot_path(p: str) -> bool:
    ap = os.path.abspath(p)
    return any(ap.startswith(os.path.abspath(pref)) for pref in allowed_prefixes)

def detect_path(paths: List[str]) -> Tuple[Optional[str], str]:
    for p in paths:
        if os.path.exists(p) and enforce_ssot_path(p):
            tier = "GOLD" if "gold" in p else "SILVER"
            return p, tier
    return None, ""

def read_parquet_any(path: str) -> pd.DataFrame:
    return pd.read_parquet(path)

def detect_date_price_cols(df: pd.DataFrame) -> Tuple[Optional[str], Optional[str]]:
    date_candidates = ["date","Date","DATE","datetime","Datetime","DATETIME","data","DATA"]
    price_candidates = ["close","Close","CLOSE","adj_close","Adj Close","ADJ_CLOSE","fechamento","FECHAMENTO","price","Price","PRICE","IBOV"]
    dcol = next((c for c in date_candidates if c in df.columns), None)
    pcol = next((c for c in price_candidates if c in df.columns), None)
    return dcol, pcol

def ensure_datetime(df: pd.DataFrame, dcol: Optional[str]) -> pd.DataFrame:
    if dcol is None:
        if isinstance(df.index, pd.DatetimeIndex):
            out = df.sort_index().copy(); out["__date__"] = out.index; return out
        raise ValueError("VALIDATION_ERROR: coluna de data não encontrada e índice não é DatetimeIndex.")
    out = df.copy(); out[dcol] = pd.to_datetime(out[dcol], errors="coerce", utc=False)
    out = out.dropna(subset=[dcol]).sort_values(dcol)
    out["__date__"] = out[dcol].values
    return out

def summarize_df(df: pd.DataFrame) -> Dict[str, str]:
    rows, cols = df.shape
    dmin = pd.to_datetime(df["__date__"]).min(); dmax = pd.to_datetime(df["__date__"]).max()
    return dict(row_count=str(rows), date_min=str(dmin.date()) if pd.notnull(dmin) else "–", date_max=str(dmax.date()) if pd.notnull(dmax) else "–", columns=", ".join(list(df.columns)[:20]))

def compute_log_ret(close: pd.Series) -> pd.Series:
    return np.log(close / close.shift(1))

def forward_return(close: pd.Series, h: int) -> pd.Series:
    return (close.shift(-h) / close) - 1.0

def label_3c(ret_fwd: pd.Series, band: float) -> pd.Series:
    # Converter para float numpy, tratar NaNs explicitamente para evitar ambiguidade com pd.NA
    vals = pd.to_numeric(ret_fwd, errors="coerce").astype(float).to_numpy()
    out = np.where(vals < -band, "CAI", np.where(vals > band, "SOBE", "MANTEM")).astype(object)
    mask_nan = ~np.isfinite(vals)
    if mask_nan.any():
        out[mask_nan] = np.nan
    return pd.Series(out, index=ret_fwd.index, dtype="object")

def month_add(d: pd.Timestamp, months: int) -> pd.Timestamp:
    return d + pd.DateOffset(months=months)

def build_walk_forward_splits(df: pd.DataFrame) -> List[Dict[str, pd.Timestamp]]:
    dates = pd.to_datetime(df["__date__"])
    start = dates.min().normalize(); end = dates.max().normalize()
    if pd.isna(start) or pd.isna(end):
        raise ValueError("VALIDATION_ERROR: datas inválidas para walk-forward.")
    train_end = month_add(start, min_train_months) - pd.DateOffset(days=1)
    if train_end >= end:
        raise ValueError("VALIDATION_ERROR: série insuficiente para treino mínimo de 18 meses.")
    folds = []
    test_start = train_end + pd.DateOffset(days=1)
    test_end = month_add(test_start, test_months) - pd.DateOffset(days=1)
    while test_start <= end and len(folds) < max_folds:
        if test_end > end: test_end = end
        val_end = test_start - pd.DateOffset(days=1)
        val_start = month_add(val_end, -val_months) + pd.DateOffset(days=1)
        tr_start = start; tr_end = val_start - pd.DateOffset(days=1)
        if tr_start >= tr_end or val_start > val_end or test_start > test_end: break
        folds.append(dict(train_start=tr_start, train_end=tr_end, val_start=val_start, val_end=val_end, test_start=test_start, test_end=test_end))
        test_start = test_end + pd.DateOffset(days=1)
        test_end = month_add(test_start, test_months) - pd.DateOffset(days=1)
    if len(folds) == 0:
        raise ValueError("VALIDATION_ERROR: não foi possível construir folds walk-forward.")
    return folds

def subset(df: pd.DataFrame, a: pd.Timestamp, b: pd.Timestamp) -> pd.DataFrame:
    return df.loc[(df["__date__"] >= a) & (df["__date__"] <= b)].copy()

def build_xgb_features(df: pd.DataFrame, W: int) -> pd.DataFrame:
    out = df.copy()
    for lag in range(1, W + 1):
        out[f"ret1_lag_{lag}"] = out["ret1"].shift(lag)
    out[f"ret1_roll_mean_{W}"] = out["ret1"].rolling(W).mean()
    out[f"ret1_roll_std_{W}"] = out["ret1"].rolling(W).std()
    return out.dropna().copy()

def build_lstm_panel(df: pd.DataFrame, W: int) -> pd.DataFrame:
    out = df.copy()
    out[f"ret1_roll_mean_{W}"] = out["ret1"].rolling(W).mean()
    out[f"ret1_roll_std_{W}"] = out["ret1"].rolling(W).std()
    return out.dropna().copy()

def to_sequences(df: pd.DataFrame, feat_cols: List[str], label_col: str, W: int) -> Tuple[np.ndarray, np.ndarray]:
    Xl, yl = [], []
    V = df[feat_cols].values; yv = df[label_col].values
    for i in range(W, len(df)):
        Xl.append(V[i-W:i, :]); yl.append(yv[i])
    if not Xl:
        return np.empty((0, W, len(feat_cols))), np.empty((0,), dtype=int)
    return np.stack(Xl, axis=0), np.array(yl, dtype=int)

def build_lstm_model(n_features: int, W: int) -> Sequential:
    model = Sequential()
    model.add(Input(shape=(W, n_features)))
    model.add(LSTM(lstm_units))
    model.add(Dropout(lstm_dropout))
    model.add(Dense(3, activation="softmax"))
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

# =========================
# Execução principal da célula
# =========================

print(f"[{now_ts()}] Início — Classificação 3C (SUBIR/MANTER/CAIR) — XGB vs LSTM")

# Detectar caminho
path, tier = detect_path(tier_paths)
if path is None:
    raise RuntimeError("CHECKLIST_FAILURE: Nenhum caminho disponível em GOLD/SILVER.")

# Ler dataset
df_raw = read_parquet_any(path)
dcol, pcol = detect_date_price_cols(df_raw)
if dcol is None or pcol is None:
    raise RuntimeError("VALIDATION_ERROR: não foi possível detectar colunas de data/preço.")

df = ensure_datetime(df_raw, dcol)
df = df.dropna(subset=[pcol]).copy()
df["ret1"] = compute_log_ret(df[pcol])

# Rótulos 3 classes
class_order = ["CAI","MANTEM","SOBE"]
y_cols: Dict[int, str] = {}
for h in horizons:
    df[f"ret_fwd_{h}"] = forward_return(df[pcol], h)
    df[f"y_h{h}_3c"] = label_3c(df[f"ret_fwd_{h}"], neutral_band)
    y_cols[h] = f"y_h{h}_3c"

# Prova de leitura
proof = (df.shape[0], str(pd.to_datetime(df["__date__"]).min().date()), str(pd.to_datetime(df["__date__"]).max().date()))
print(f"SSOT: {path} (tier={tier}) | linhas={proof[0]} | datas=[{proof[1]} → {proof[2]}] | cols={list(df.columns)[:12]}...")

# Splits
splits = build_walk_forward_splits(df)
print(f"Folds construídos: {len(splits)} (treino 18m, val 3m, teste 6m)")

# Painéis por janela
xgb_panels: Dict[int, pd.DataFrame] = {}
lstm_panels: Dict[int, pd.DataFrame] = {}
for W in windows:
    xgb_panels[W] = build_xgb_features(df[["__date__","ret1"]].copy(), W).join(
        df[[c for c in df.columns if c.startswith("ret_fwd_") or c.startswith("y_h")]], how="left")
    lstm_panels[W] = build_lstm_panel(df[["__date__","ret1"]].copy(), W).join(
        df[[c for c in df.columns if c.startswith("ret_fwd_") or c.startswith("y_h")]], how="left")

rows = []
conf_store: Dict[Tuple[str,int,int], List[np.ndarray]] = {}
skipped = []

for h in horizons:
    ycol = y_cols[h]
    for W in windows:
        # XGBoost
        dfx = xgb_panels[W].dropna(subset=["ret1", ycol]).copy()
        if not dfx.empty:
            feature_cols = [c for c in dfx.columns if c.startswith("ret1_lag_") or c in [f"ret1_roll_mean_{W}", f"ret1_roll_std_{W}"]]
            dfx["y_int"] = pd.Categorical(dfx[ycol], categories=class_order).codes
            if (dfx["y_int"] >= 0).all():
                for fi, s in enumerate(splits, 1):
                    tr = subset(dfx, s["train_start"], s["train_end"])
                    va = subset(dfx, s["val_start"], s["val_end"])
                    te = subset(dfx, s["test_start"], s["test_end"])
                    if len(tr)==0 or len(va)==0 or len(te)==0:
                        skipped.append(f"XGB h={h}, W={W}, fold={fi} sem dados — skip")
                        continue
                    try:
                        clf = XGBClassifier(**xgb_params, num_class=3)
                        clf.fit(
                            tr[feature_cols].values, tr["y_int"].values,
                            eval_set=[(va[feature_cols].values, va["y_int"].values)],
                            early_stopping_rounds=xgb_early_stopping_rounds,
                            verbose=False
                        )
                        proba = clf.predict_proba(te[feature_cols].values)
                        y_pred = np.argmax(proba, axis=1)
                        y_true = te["y_int"].values
                        acc_total = float(accuracy_score(y_true, y_pred))
                        cm = confusion_matrix(y_true, y_pred, labels=[0,1,2])
                        # per-class
                        def _pc(cm):
                            res = {}
                            for i, nm in enumerate(["cai","mantem","sobe"]):
                                denom = cm[i,:].sum(); res[f"acc_{nm}"] = (cm[i,i]/denom) if denom>0 else np.nan
                            return res
                        pc = _pc(cm)
                        rows.append(dict(model="XGBoost", horizon=h, window=W, fold=fi, acc_total=acc_total, **pc))
                        conf_store.setdefault(("XGBoost", h, W), []).append(cm)
                    except Exception as e:
                        skipped.append(f"XGB h={h}, W={W}, fold={fi} erro: {e}")
        else:
            skipped.append(f"XGB h={h}, W={W} sem amostras — skip")

        # LSTM
        dfl = lstm_panels[W].dropna(subset=["ret1", ycol]).copy()
        if dfl.empty:
            skipped.append(f"LSTM h={h}, W={W} sem amostras — skip")
            continue
        dfl["y_int"] = pd.Categorical(dfl[ycol], categories=class_order).codes
        if (dfl["y_int"] < 0).any():
            skipped.append(f"LSTM h={h}, W={W} labels inválidos — skip")
            continue
        feat_cols = ["ret1", f"ret1_roll_mean_{W}", f"ret1_roll_std_{W}"]
        for fi, s in enumerate(splits, 1):
            tr = subset(dfl, s["train_start"], s["train_end"])
            va = subset(dfl, s["val_start"], s["val_end"])
            te = subset(dfl, s["test_start"], s["test_end"])
            if len(tr) < W+5 or len(va) < W+5 or len(te) < W+5:
                skipped.append(f"LSTM h={h}, W={W}, fold={fi} janelas insuficientes — skip")
                continue
            # Escala sem vazamento
            scaler = StandardScaler().fit(tr[feat_cols].values)
            tr_s = tr.copy(); va_s = va.copy(); te_s = te.copy()
            tr_s[feat_cols] = scaler.transform(tr[feat_cols].values)
            va_s[feat_cols] = scaler.transform(va[feat_cols].values)
            te_s[feat_cols] = scaler.transform(te[feat_cols].values)
            # Sequências
            def to_seq(dfz):
                Xl, yl = [], []
                V = dfz[feat_cols].values; yv = dfz["y_int"].values
                for i in range(W, len(dfz)):
                    Xl.append(V[i-W:i, :]); yl.append(yv[i])
                if not Xl: return np.empty((0,W,len(feat_cols))), np.empty((0,), dtype=int)
                return np.stack(Xl, axis=0), np.array(yl, dtype=int)
            Xtr, ytr = to_seq(tr_s); Xva, yva = to_seq(va_s); Xte, yte = to_seq(te_s)
            if Xtr.shape[0]==0 or Xva.shape[0]==0 or Xte.shape[0]==0:
                skipped.append(f"LSTM h={h}, W={W}, fold={fi} sequências insuficientes — skip")
                continue
            try:
                tf.keras.backend.clear_session()
                model = build_lstm_model(n_features=len(feat_cols), W=W)
                es = EarlyStopping(monitor="val_loss", mode="min", patience=lstm_patience, restore_best_weights=True, verbose=0)
                model.fit(Xtr, ytr, validation_data=(Xva, yva), epochs=lstm_epochs, batch_size=lstm_batch_size, callbacks=[es], verbose=0)
                proba = model.predict(Xte, verbose=0)
                y_pred = np.argmax(proba, axis=1); y_true = yte
                acc_total = float(accuracy_score(y_true, y_pred))
                cm = confusion_matrix(y_true, y_pred, labels=[0,1,2])
                def _pc(cm):
                    res = {}
                    for i, nm in enumerate(["cai","mantem","sobe"]):
                        denom = cm[i,:].sum(); res[f"acc_{nm}"] = (cm[i,i]/denom) if denom>0 else np.nan
                    return res
                pc = _pc(cm)
                rows.append(dict(model="LSTM", horizon=h, window=W, fold=fi, acc_total=acc_total, **pc))
                conf_store.setdefault(("LSTM", h, W), []).append(cm)
            except Exception as e:
                skipped.append(f"LSTM h={h}, W={W}, fold={fi} erro: {e}")

# Consolidação
if not rows:
    raise RuntimeError("CHECKLIST_FAILURE: nenhuma combinação gerou resultados.")
res = pd.DataFrame(rows).sort_values(["model","horizon","window","fold"]) 
agg = res.groupby(["horizon","model","window"], as_index=False).agg(
    acc_total_mean=("acc_total","mean"), acc_total_std=("acc_total","std"),
    acc_cai_mean=("acc_cai","mean"), acc_mantem_mean=("acc_mantem","mean"), acc_sobe_mean=("acc_sobe","mean"),
    folds=("fold","nunique")
)

# Saída por horizonte
for h in horizons:
    sub = agg[agg["horizon"]==h].copy().sort_values(["model","window"]) 
    print(f"\nRESUMO — D+{h} (teste): modelo × janela")
    if sub.empty:
        print("–")
    else:
        for c in ["acc_total_mean","acc_total_std","acc_cai_mean","acc_mantem_mean","acc_sobe_mean"]:
            if c in sub.columns: sub[c] = sub[c].astype(float)
        cols = ["model","window","acc_total_mean","acc_total_std","acc_cai_mean","acc_mantem_mean","acc_sobe_mean","folds"]
        print(sub[cols].fillna("–").to_string(index=False))
    top = sub.sort_values("acc_total_mean", ascending=False).head(3)
    print(f"\nTOP-3 — D+{h} (teste)")
    print("–" if top.empty else top[cols].fillna("–").to_string(index=False))
    if not top.empty:
        br = top.iloc[0]
        key = (br["model"], int(h), int(br["window"]))
        cms = conf_store.get(key, [])
        if cms:
            cm_sum = np.sum(np.stack(cms, axis=0), axis=0)
            print(f"\nMATRIZ DE CONFUSÃO — melhor combinação D+{h} (modelo={br['model']}, janela={int(br['window'])})")
            header = ["", "pred_CAI", "pred_MANTEM", "pred_SOBE"]
            print("{:<12s}{:>10s}{:>12s}{:>10s}".format(*header))
            for i, cls in enumerate(["true_CAI","true_MANTEM","true_SOBE"]):
                print("{:<12s}{:>10d}{:>12d}{:>10d}".format(cls, int(cm_sum[i,0]), int(cm_sum[i,1]), int(cm_sum[i,2])))
        else:
            print("\nMATRIZ DE CONFUSÃO — melhor combinação D+{h}: –")

# Checklist
processed_h = sorted(set(int(h) for h in res["horizon"].unique()))
processed_w = sorted(set(int(w) for w in res["window"].unique()))
print("\nCHECKLIST — Execução (dry_run)")
print(f"- SSOT usado: {path} (tier={tier})")
print(f"- Horizontes processados: {processed_h}")
print(f"- Janelas processadas: {processed_w}")
print(f"- Folds processados (máximo por combinação): {len(splits)}")
for h in horizons:
    ok = (agg["horizon"]==h).any(); print(f"- Tabela resumo D+{h}: {'OK' if ok else '–'}")
print(f"- dry_run: {dry_run} (nenhum arquivo salvo)")

print(f"\n[{now_ts()}] Fim — Classificação 3C (dry_run={dry_run})")

[2025-09-19 16:43:58] Início — Classificação 3C (SUBIR/MANTER/CAIR) — XGB vs LSTM
SSOT: /home/wrm/BOLSA_2026/gold/IBOV_gold.parquet (tier=GOLD) | linhas=3400 | datas=[2012-01-03 → 2025-09-19] | cols=['date', 'open', 'high', 'low', 'close', 'volume', 'ticker', 'open_norm', 'high_norm', 'low_norm', 'close_norm', 'volume_norm']...
Folds construídos: 10 (treino 18m, val 3m, teste 6m)

RESUMO — D+1 (teste): modelo × janela
model  window  acc_total_mean  acc_total_std  acc_cai_mean  acc_mantem_mean  acc_sobe_mean  folds
 LSTM       5        0.430420       0.075333      0.825172              0.0       0.167066     10
 LSTM      10        0.423155       0.048841      0.794535              0.0       0.197474     10
 LSTM      15        0.430592       0.050223      0.754384              0.0       0.264651     10

TOP-3 — D+1 (teste)
model  window  acc_total_mean  acc_total_std  acc_cai_mean  acc_mantem_mean  acc_sobe_mean  folds
 LSTM      15        0.430592       0.050223      0.754384         

## CAI vs NÃO CAI com prioridade para CAI e pisos por horizonte — IBOV SSOT

In [2]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
CAI vs NÃO CAI com prioridade para CAI e pisos por horizonte — IBOV SSOT (Patch V1.1)
- Um único script auto-contido.
- SSOT: GOLD > SILVER, sem acessar outras fontes.
- Alvos binários por D+1/D+3/D+5.
- Modelos: XGBoost e LSTM compacto, janelas 5/10/15.
- Walk-forward (treino 18m, val 3m, teste 6m) até 10 folds.
- Thresholds escolhidos em VAL maximizando recall(CAI) sob piso por horizonte (D+1 fixo=0.82; D+3/D+5 por VAL, com N_min e clip 0.70–0.85).
- Probabilidades calibradas (Platt; fallback Isotonic) por combinação e fold.
- Métricas no TESTE agregadas por combinação: Recall/Precisão/F1 (CAI), Acurácia, Cobertura; Matriz 2×2; baselines reforçadas (inclui Momentum_3d), Top-3.
- Sequência final (D+1, D+3, D+5) do último bloco usando threshold mediano do vencedor por horizonte (apenas entre elegíveis).
- Sem relax global automático.
- dry_run=True: não salva nada em disco.
"""

import os
import sys
import math
import time
import warnings
from dataclasses import dataclass
from typing import List, Dict, Tuple, Optional

# Desativar GPU e reduzir verbosidade do TF antes dos imports
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # força CPU

import numpy as np
import pandas as pd

warnings.filterwarnings("ignore", category=FutureWarning)
pd.set_option("display.width", 180)
pd.set_option("display.max_columns", 120)

_missing = []
try:
    import xgboost as xgb
    from xgboost import XGBClassifier
except Exception as e:
    _missing.append(f"xgboost ({e})")

try:
    import tensorflow as tf
    from tensorflow.keras.models import Sequential  # pyright: ignore[reportMissingImports]
    from tensorflow.keras.layers import LSTM, Dense, Dropout, Input  # pyright: ignore[reportMissingImports]
    from tensorflow.keras.callbacks import EarlyStopping  # pyright: ignore[reportMissingImports]
except Exception as e:
    _missing.append(f"tensorflow/keras ({e})")

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    confusion_matrix,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
)
from sklearn.linear_model import LogisticRegression
from sklearn.isotonic import IsotonicRegression

# =========================
# Parâmetros (topo do script)
# =========================

dry_run: bool = True

tier_paths: List[str] = [
    "/home/wrm/BOLSA_2026/gold/IBOV_gold.parquet",
    "/home/wrm/BOLSA_2026/silver/IBOV_silver.parquet",
]

windows: List[int] = [5, 10, 15]
horizons: List[int] = [1, 3, 5]

# Validação temporal
train_min_months: int = 18
val_months: int = 3
test_months: int = 6
max_folds: int = 10

# Priorização de CAI
precision_floor: Dict[str, Optional[float]] = {"D+1": 0.82, "D+3": None, "D+5": None}  # D+1 fixo
coverage_min_rate: float = 0.10   # ≥ 10% previsto CAI
coverage_min_count: int = 8       # ou pelo menos 8 sinais CAI em 6 meses
threshold_grid: List[float] = [i/100.0 for i in range(10, 91)]  # 0.10 → 0.90
N_min_preds_val: int = 10  # mínimo de previsões CAI em VAL para considerar threshold/combo

# Modelos
xgb_params = dict(
    max_depth=5,
    learning_rate=0.05,
    n_estimators=2000,
    subsample=0.9,
    colsample_bytree=0.9,
    objective="binary:logistic",
    eval_metric="logloss",
    tree_method="hist",
    random_state=42,
    n_jobs=max(1, (os.cpu_count() or 2) - 1),
)
xgb_early_stopping_rounds: int = 50

lstm_units: int = 48
lstm_dropout: float = 0.2
lstm_epochs: int = 50
lstm_batch_size: int = 32
lstm_patience: int = 5  # early stopping

# Segurança — restringir SSOT
allowed_prefixes = (
    "/home/wrm/BOLSA_2026/gold",
    "/home/wrm/BOLSA_2026/silver",
)

# =========================
# Utilidades básicas
# =========================

def now_ts() -> str:
    return time.strftime("%Y-%m-%d %H:%M:%S")

def enforce_ssot_path(p: str) -> bool:
    ap = os.path.abspath(p)
    return any(ap.startswith(os.path.abspath(pref)) for pref in allowed_prefixes)

def detect_path(paths: List[str]) -> Tuple[Optional[str], str]:
    for p in paths:
        if os.path.exists(p) and enforce_ssot_path(p):
            tier = "GOLD" if "gold" in p else "SILVER"
            return p, tier
    return None, ""

def read_parquet_any(path: str) -> pd.DataFrame:
    return pd.read_parquet(path)

def detect_date_price_cols(df: pd.DataFrame) -> Tuple[Optional[str], Optional[str]]:
    date_candidates = ["date","Date","DATE","datetime","Datetime","DATETIME","data","DATA"]
    price_candidates = ["close","Close","CLOSE","adj_close","Adj Close","ADJ_CLOSE","fechamento","FECHAMENTO","price","Price","PRICE","IBOV"]
    dcol = next((c for c in date_candidates if c in df.columns), None)
    pcol = next((c for c in price_candidates if c in df.columns), None)
    return dcol, pcol

def ensure_datetime(df: pd.DataFrame, dcol: Optional[str]) -> pd.DataFrame:
    if dcol is None:
        if isinstance(df.index, pd.DatetimeIndex):
            out = df.sort_index().copy(); out["__date__"] = out.index; return out
        raise ValueError("VALIDATION_ERROR: coluna de data não encontrada e índice não é DatetimeIndex.")
    out = df.copy()
    out[dcol] = pd.to_datetime(out[dcol], errors="coerce", utc=False)
    out = out.dropna(subset=[dcol]).sort_values(dcol)
    out["__date__"] = out[dcol].values
    return out

def summarize_df(df: pd.DataFrame) -> Dict[str, str]:
    rows, cols = df.shape
    dmin = pd.to_datetime(df["__date__"]).min(); dmax = pd.to_datetime(df["__date__"]).max()
    cols_list = list(df.columns)
    return dict(
        row_count=str(rows),
        date_min=str(dmin.date()) if pd.notnull(dmin) else "–",
        date_max=str(dmax.date()) if pd.notnull(dmax) else "–",
        columns=", ".join(cols_list[:20]) + (" ..." if len(cols_list) > 20 else "")
    )

def compute_log_ret(close: pd.Series) -> pd.Series:
    return np.log(close / close.shift(1))

def forward_return(close: pd.Series, h: int) -> pd.Series:
    # retorno acumulado simples (não log) para decisão binária
    return (close.shift(-h) / close) - 1.0

def month_add(d: pd.Timestamp, months: int) -> pd.Timestamp:
    return d + pd.DateOffset(months=months)

def build_walk_forward_splits(df: pd.DataFrame) -> List[Dict[str, pd.Timestamp]]:
    dates = pd.to_datetime(df["__date__"])
    start = dates.min().normalize(); end = dates.max().normalize()
    if pd.isna(start) or pd.isna(end):
        raise ValueError("VALIDATION_ERROR: datas inválidas para walk-forward.")
    train_end = month_add(start, train_min_months) - pd.DateOffset(days=1)
    if train_end >= end:
        raise ValueError("VALIDATION_ERROR: série insuficiente para treino mínimo.")
    folds = []
    test_start = train_end + pd.DateOffset(days=1)
    test_end = month_add(test_start, test_months) - pd.DateOffset(days=1)
    while test_start <= end and len(folds) < max_folds:
        if test_end > end: test_end = end
        val_end = test_start - pd.DateOffset(days=1)
        val_start = month_add(val_end, -val_months) + pd.DateOffset(days=1)
        tr_start = start; tr_end = val_start - pd.DateOffset(days=1)
        if tr_start >= tr_end or val_start > val_end or test_start > test_end: break
        folds.append(dict(
            train_start=tr_start, train_end=tr_end,
            val_start=val_start, val_end=val_end,
            test_start=test_start, test_end=test_end
        ))
        test_start = test_end + pd.DateOffset(days=1)
        test_end = month_add(test_start, test_months) - pd.DateOffset(days=1)
    if len(folds) == 0:
        raise ValueError("VALIDATION_ERROR: não foi possível construir folds walk-forward.")
    return folds

def subset(df: pd.DataFrame, a: pd.Timestamp, b: pd.Timestamp) -> pd.DataFrame:
    return df.loc[(df["__date__"] >= a) & (df["__date__"] <= b)].copy()

# =========================
# Features
# =========================

def prepare_global_indicators(df: pd.DataFrame, price_col: str) -> pd.DataFrame:
    out = df.copy()
    out["ret1"] = compute_log_ret(out[price_col])
    out["vol20d"] = out["ret1"].rolling(20).std()
    out["ma50"] = out[price_col].rolling(50).mean()
    out["pos_ma50"] = ((out[price_col] > out["ma50"]).astype(float)).where(out["ma50"].notna(), np.nan)
    # Momentum 3d para baseline (log acumulado 3 dias, shift para usar info até t-1)
    out["mom3d_prev"] = out["ret1"].rolling(3).sum().shift(1)
    return out

def build_xgb_panel(df: pd.DataFrame, W: int) -> pd.DataFrame:
    out = df.copy()
    # Lags de ret1 (1..W)
    for lag in range(1, W + 1):
        out[f"ret1_lag_{lag}"] = out["ret1"].shift(lag)
    # Rolling mean/std de ret1 (W)
    out[f"ret1_roll_mean_{W}"] = out["ret1"].rolling(W).mean()
    out[f"ret1_roll_std_{W}"] = out["ret1"].rolling(W).std()
    return out

def build_lstm_panel(df: pd.DataFrame, W: int) -> pd.DataFrame:
    out = df.copy()
    out[f"ret1_roll_mean_{W}"] = out["ret1"].rolling(W).mean()
    out[f"ret1_roll_std_{W}"] = out["ret1"].rolling(W).std()
    return out

def to_sequences(df: pd.DataFrame, feat_cols: List[str], label_col: str, W: int) -> Tuple[np.ndarray, np.ndarray]:
    Xl, yl = [], []
    V = df[feat_cols].values; yv = df[label_col].values
    for i in range(W, len(df)):
        Xl.append(V[i-W:i, :]); yl.append(yv[i])
    if not Xl:
        return np.empty((0, W, len(feat_cols))), np.empty((0,), dtype=int)
    return np.stack(Xl, axis=0), np.array(yl, dtype=int)

# =========================
# Modelos e Calibração
# =========================

def build_lstm_model(n_features: int, W: int) -> Sequential:
    model = Sequential()
    model.add(Input(shape=(W, n_features)))
    model.add(LSTM(lstm_units))
    model.add(Dropout(lstm_dropout))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model

def make_calibrator(y_val: np.ndarray, p_val: np.ndarray):
    yv = np.asarray(y_val).astype(int)
    pv = np.asarray(p_val).astype(float).reshape(-1, 1)
    # se classe única, não calibrar
    if len(np.unique(yv)) < 2 or len(yv) < 5:
        return (lambda x: np.asarray(x, dtype=float)), "none"
    # Platt (sigmóide) via LogisticRegression
    try:
        lr = LogisticRegression(max_iter=1000)
        lr.fit(pv, yv)
        def f(x):
            xv = np.asarray(x).astype(float).reshape(-1, 1)
            return lr.predict_proba(xv)[:, 1]
        return f, "platt"
    except Exception:
        pass
    # Fallback: Isotonic
    try:
        iso = IsotonicRegression(out_of_bounds="clip")
        iso.fit(np.asarray(p_val).astype(float), yv)
        def g(x):
            return iso.predict(np.asarray(x).astype(float))
        return g, "isotonic"
    except Exception:
        return (lambda x: np.asarray(x, dtype=float)), "none"

# =========================
# Métricas, Floors, Thresholds
# =========================

def binary_metrics(y_true: np.ndarray, y_score: np.ndarray, thr: float) -> Dict[str, float]:
    y_pred = (y_score >= thr).astype(int)
    acc = float(accuracy_score(y_true, y_pred)) if len(y_true) else np.nan
    prec = float(precision_score(y_true, y_pred, zero_division=0)) if len(y_true) else np.nan
    rec = float(recall_score(y_true, y_pred, zero_division=0)) if len(y_true) else np.nan
    f1 = float(f1_score(y_true, y_pred, zero_division=0)) if len(y_true) else np.nan
    cm = confusion_matrix(y_true, y_pred, labels=[1,0]) if len(y_true) else np.array([[0,0],[0,0]])
    cover_rate = float((y_pred == 1).mean()) if len(y_pred) else np.nan
    cover_count = int((y_pred == 1).sum()) if len(y_pred) else 0
    return dict(acc=acc, precision=prec, recall=rec, f1=f1, cm=cm, coverage_rate=cover_rate, coverage_count=cover_count)

def best_precision_on_grid_with_min_preds(y_true: np.ndarray, y_score: np.ndarray, grid: List[float], nmin: int) -> float:
    best = 0.0
    for thr in grid:
        y_pred = (y_score >= thr).astype(int)
        npos = int(y_pred.sum())
        if npos < nmin:
            continue
        p = float(precision_score(y_true, y_pred, zero_division=0))
        if p > best:
            best = p
    return best

def select_threshold_with_floor(y_true: np.ndarray, y_score: np.ndarray, floor: float, grid: List[float], nmin: int) -> Tuple[float, Dict[str, float], bool, int, bool]:
    # Maximiza recall(CAI) mantendo precisão >= piso E n_pred >= nmin. Se impossível, marca floor_unmet e escolhe maior precisão disponível (apenas para avaliação), registrando nmin flag quando aplicável.
    best_thr, best_m, floor_unmet = None, None, False
    val_nmin_unmet = False
    eligible = []
    for thr in grid:
        m = binary_metrics(y_true, y_score, thr)
        if m["coverage_count"] >= nmin and m["precision"] >= floor:
            eligible.append((thr, m))
    if eligible:
        eligible.sort(key=lambda x: (x[1]["recall"], x[1]["f1"], x[1]["acc"]), reverse=True)
        best_thr, best_m = eligible[0]
        floor_unmet = False
        val_nmin_unmet = False
    else:
        # Nenhum atende ao piso com nmin -> não relaxar; selecionar melhor por precisão entre thresholds com nmin se houver, senão entre todos
        cands = []
        for thr in grid:
            m = binary_metrics(y_true, y_score, thr)
            cands.append((thr, m))
        # priorizar com nmin
        cands_nmin = [c for c in cands if c[1]["coverage_count"] >= nmin]
        if cands_nmin:
            cands_nmin.sort(key=lambda x: (x[1]["precision"], x[1]["recall"], x[1]["acc"]), reverse=True)
            best_thr, best_m = cands_nmin[0]
            val_nmin_unmet = True  # nmin atinge, mas piso não
        else:
            cands.sort(key=lambda x: (x[1]["precision"], x[1]["recall"], x[1]["acc"]), reverse=True)
            best_thr, best_m = cands[0]
            val_nmin_unmet = True
        floor_unmet = True
    return float(best_thr), best_m, floor_unmet, int(best_m["coverage_count"]), bool(val_nmin_unmet)

# =========================
# Execução principal
# =========================

def main():
    print(f"[{now_ts()}] Início — CAI vs NÃO CAI (prioridade CAI, pisos por horizonte) — XGB vs LSTM (Patch V1.1)")
    if _missing:
        print(f"CHECKLIST_FAILURE: dependências ausentes -> {', '.join(_missing)}")
        return

    # 1) Leitura SSOT
    path, tier = detect_path(tier_paths)
    if path is None:
        print("CHECKLIST_FAILURE: Nenhum caminho disponível em GOLD/SILVER no SSOT.")
        return
    if not enforce_ssot_path(path):
        print("CHECKLIST_FAILURE: Caminho fora do SSOT permitido.")
        return

    try:
        df_raw = read_parquet_any(path)
    except Exception as e:
        print(f"VALIDATION_ERROR: falha ao ler parquet '{path}': {e}")
        return

    dcol, pcol = detect_date_price_cols(df_raw)
    if dcol is None or pcol is None:
        print("VALIDATION_ERROR: não foi possível detectar colunas de data/preço (ex.: 'date' e 'close').")
        return

    df = ensure_datetime(df_raw, dcol)
    df = df.dropna(subset=[pcol]).copy()
    df = prepare_global_indicators(df, pcol)

    # Alvos binários por horizonte
    y_cols: Dict[int, str] = {}
    for h in horizons:
        df[f"ret_fwd_{h}"] = forward_return(df[pcol], h)
        # y=1 para CAI (ret_fwd < 0), y=0 NÃO CAI
        df[f"y_h{h}_bin"] = (pd.to_numeric(df[f"ret_fwd_{h}"], errors="coerce") < 0).astype("Int8")
        y_cols[h] = f"y_h{h}_bin"

    # Prova SSOT
    proof = summarize_df(df)
    print("\n[PROVA SSOT]")
    print(f"- Caminho: {path} (tier={tier})")
    print(f"- Linhas: {proof['row_count']} | date_min: {proof['date_min']} | date_max: {proof['date_max']}")
    print(f"- Colunas (amostra): {proof['columns']}")

    # 4) Splits walk-forward
    try:
        splits = build_walk_forward_splits(df)
    except Exception as e:
        print(str(e))
        return

    print("\n[WALK-FORWARD — Folds]")
    for i, s in enumerate(splits, 1):
        print(f"Fold {i:02d} | train[{str(s['train_start'].date())} → {str(s['train_end'].date())}] "
              f"| val[{str(s['val_start'].date())} → {str(s['val_end'].date())}] "
              f"| test[{str(s['test_start'].date())} → {str(s['test_end'].date())}]")

    # 5) Treino e predição — coletar preds VAL e TESTE (já calibradas) e metadados
    preds_val: Dict[Tuple[str,int,int,int], Tuple[np.ndarray, np.ndarray]] = {}
    preds_tst: Dict[Tuple[str,int,int,int], Tuple[np.ndarray, np.ndarray, np.ndarray]] = {}
    meta_train: Dict[Tuple[str,int,int,int], Dict[str, float]] = {}
    skipped_msgs: List[str] = []

    for h in horizons:
        ycol = y_cols[h]
        for W in windows:
            # Preparar painéis completos — XGB e LSTM
            xgb_panel_full = build_xgb_panel(df[["__date__","ret1","vol20d","ma50","pos_ma50","mom3d_prev"]].copy(), W)
            lstm_panel_full = build_lstm_panel(df[["__date__","ret1"]].copy(), W)

            for fi, s in enumerate(splits, 1):
                # Subconjuntos temporais
                dfx = xgb_panel_full.join(
                    df[[pcol, f"ret_fwd_{h}", ycol]], how="left"
                ).copy()
                tr_x = subset(dfx, s["train_start"], s["train_end"])
                va_x = subset(dfx, s["val_start"], s["val_end"])
                te_x = subset(dfx, s["test_start"], s["test_end"])

                dfl = lstm_panel_full.join(
                    df[[pcol, f"ret_fwd_{h}", ycol]], how="left"
                ).copy()
                tr_l = subset(dfl, s["train_start"], s["train_end"])
                va_l = subset(dfl, s["val_start"], s["val_end"])
                te_l = subset(dfl, s["test_start"], s["test_end"])

                # Preparar XGB
                def prepare_xgb_block(block: pd.DataFrame) -> pd.DataFrame:
                    use_cols = [c for c in block.columns if c.startswith("ret1_lag_") or c in [f"ret1_roll_mean_{W}", f"ret1_roll_std_{W}", "vol20d", "pos_ma50", ycol, "__date__"]]
                    return block[use_cols].dropna().copy()

                tr_xc = prepare_xgb_block(tr_x)
                if tr_xc.empty or tr_xc[ycol].isna().all():
                    skipped_msgs.append(f"AVISO: XGB h={h}, W={W}, fold={fi} sem treino — skip")
                    continue
                vol_median = float(tr_xc["vol20d"].median())
                # aplicar farol binário de vol (usa mediana do treino)
                def apply_vol_bin(b: pd.DataFrame) -> pd.DataFrame:
                    out = b.copy()
                    out["vol20d_bin"] = (out["vol20d"] >= vol_median).astype(int)
                    return out
                tr_xc = apply_vol_bin(tr_xc)
                va_xc = apply_vol_bin(prepare_xgb_block(va_x))
                te_xc = apply_vol_bin(prepare_xgb_block(te_x))

                feat_xgb = [c for c in tr_xc.columns if c.startswith("ret1_lag_") or c in [f"ret1_roll_mean_{W}", f"ret1_roll_std_{W}", "vol20d_bin", "pos_ma50"]]
                tr_xc["y_int"] = tr_xc[ycol].astype(int)
                va_xc["y_int"] = va_xc[ycol].astype(int) if not va_xc.empty else pd.Series([], dtype=int)
                te_xc["y_int"] = te_xc[ycol].astype(int) if not te_xc.empty else pd.Series([], dtype=int)

                pos_count = int((tr_xc["y_int"] == 1).sum())
                neg_count = int((tr_xc["y_int"] == 0).sum())
                spw = (neg_count / max(1, pos_count)) if (pos_count + neg_count) > 0 else 1.0

                # Treinar XGB com EarlyStopping callback (compatível 3.x)
                try:
                    clf = XGBClassifier(**xgb_params, scale_pos_weight=spw)
                    callbacks = [xgb.callback.EarlyStopping(rounds=xgb_early_stopping_rounds, save_best=True, maximize=False)]
                    clf.fit(
                        tr_xc[feat_xgb].values, tr_xc["y_int"].values,
                        eval_set=[(tr_xc[feat_xgb].values, tr_xc["y_int"].values), (va_xc[feat_xgb].values, va_xc["y_int"].values)],
                        callbacks=callbacks,
                        verbose=False,
                    )
                    best_iter = getattr(clf, "best_iteration", None)
                    if best_iter is not None:
                        iter_range = (0, int(best_iter) + 1)
                        p_val_raw = clf.predict_proba(va_xc[feat_xgb].values, iteration_range=iter_range)[:, 1] if len(va_xc) else np.array([])
                        p_tst_raw = clf.predict_proba(te_xc[feat_xgb].values, iteration_range=iter_range)[:, 1] if len(te_xc) else np.array([])
                    else:
                        p_val_raw = clf.predict_proba(va_xc[feat_xgb].values)[:, 1] if len(va_xc) else np.array([])
                        p_tst_raw = clf.predict_proba(te_xc[feat_xgb].values)[:, 1] if len(te_xc) else np.array([])
                    # Calibração
                    cal_fn, cal_method = make_calibrator(va_xc["y_int"].values.astype(int), p_val_raw)
                    p_val = cal_fn(p_val_raw)
                    p_tst = cal_fn(p_tst_raw)
                    preds_val[("XGB", W, h, fi)] = (va_xc["y_int"].values.astype(int), np.asarray(p_val, dtype=float))
                    preds_tst[("XGB", W, h, fi)] = (te_xc["y_int"].values.astype(int), np.asarray(p_tst, dtype=float), te_xc["__date__"].values.astype("datetime64[ns]"))
                    meta_train[("XGB", W, h, fi)] = dict(scale_pos_weight=spw, vol20d_median=vol_median, best_iteration=(int(best_iter) if best_iter is not None else None), calibration=cal_method)
                except Exception as e:
                    skipped_msgs.append(f"AVISO: XGB treino/val/teste h={h}, W={W}, fold={fi} erro: {e}")

                # Preparar LSTM
                def dropna_lstm(b: pd.DataFrame) -> pd.DataFrame:
                    use_cols = ["__date__", "ret1", f"ret1_roll_mean_{W}", f"ret1_roll_std_{W}", ycol]
                    return b[use_cols].dropna().copy()

                tr_ls = dropna_lstm(tr_l)
                va_ls = dropna_lstm(va_l)
                te_ls = dropna_lstm(te_l)

                if len(tr_ls) < (W + 5) or len(va_ls) < (W + 5) or len(te_ls) < (W + 5):
                    skipped_msgs.append(f"AVISO: LSTM h={h}, W={W}, fold={fi} janelas insuficientes — skip")
                else:
                    feat_lstm = ["ret1", f"ret1_roll_mean_{W}", f"ret1_roll_std_{W}"]
                    scaler = StandardScaler().fit(tr_ls[feat_lstm].values)
                    tr_ls_sc = tr_ls.copy(); va_ls_sc = va_ls.copy(); te_ls_sc = te_ls.copy()
                    tr_ls_sc[feat_lstm] = scaler.transform(tr_ls[feat_lstm].values)
                    va_ls_sc[feat_lstm] = scaler.transform(va_ls[feat_lstm].values)
                    te_ls_sc[feat_lstm] = scaler.transform(te_ls[feat_lstm].values)
                    tr_ls_sc["y_int"] = tr_ls_sc[ycol].astype(int)
                    va_ls_sc["y_int"] = va_ls_sc[ycol].astype(int)
                    te_ls_sc["y_int"] = te_ls_sc[ycol].astype(int)
                    Xtr, ytr = to_sequences(tr_ls_sc, feat_lstm, "y_int", W)
                    Xva, yva = to_sequences(va_ls_sc, feat_lstm, "y_int", W)
                    Xte, yte = to_sequences(te_ls_sc, feat_lstm, "y_int", W)
                    dates_va = va_ls_sc["__date__"].values[W:]
                    dates_te = te_ls_sc["__date__"].values[W:]
                    if Xtr.shape[0] == 0 or Xva.shape[0] == 0 or Xte.shape[0] == 0:
                        skipped_msgs.append(f"AVISO: LSTM h={h}, W={W}, fold={fi} sequências insuficientes — skip")
                    else:
                        try:
                            tf.keras.backend.clear_session()
                            model = build_lstm_model(n_features=len(feat_lstm), W=W)
                            es = EarlyStopping(monitor="val_loss", mode="min", patience=lstm_patience, restore_best_weights=True, verbose=0)
                            model.fit(
                                Xtr, ytr,
                                validation_data=(Xva, yva),
                                epochs=lstm_epochs,
                                batch_size=lstm_batch_size,
                                callbacks=[es],
                                verbose=0,
                            )
                            p_val_raw = model.predict(Xva, verbose=0, batch_size=lstm_batch_size).reshape(-1)
                            p_tst_raw = model.predict(Xte, verbose=0, batch_size=lstm_batch_size).reshape(-1)
                            cal_fn, cal_method = make_calibrator(yva.astype(int), p_val_raw)
                            p_val = cal_fn(p_val_raw)
                            p_tst = cal_fn(p_tst_raw)
                            preds_val[("LSTM", W, h, fi)] = (yva.astype(int), np.asarray(p_val, dtype=float))
                            preds_tst[("LSTM", W, h, fi)] = (yte.astype(int), np.asarray(p_tst, dtype=float), dates_te.astype("datetime64[ns]"))
                            meta_train[("LSTM", W, h, fi)] = dict(scaler="standard", calibration=cal_method)
                        except Exception as e:
                            skipped_msgs.append(f"AVISO: LSTM treino/val/teste h={h}, W={W}, fold={fi} erro: {e}")

    if skipped_msgs:
        print("\n[AVISOS]")
        for m in skipped_msgs[:30]:
            print(f"- {m}")
        if len(skipped_msgs) > 30:
            print(f"- (+{len(skipped_msgs)-30} avisos adicionais)")

    # 6) Definição de pisos (D+3, D+5) via VAL com N_min e clip [0.70, 0.85]
    for h in [3, 5]:
        max_prec = 0.0
        for key, (yv, pv) in preds_val.items():
            _, _, hh, _ = key
            if hh != h or len(yv) == 0:
                continue
            mp = best_precision_on_grid_with_min_preds(yv, pv, threshold_grid, N_min_preds_val)
            if mp > max_prec:
                max_prec = mp
        tag = f"D+{h}"
        if precision_floor.get(tag) is None:
            pf = round(max_prec, 2)
            pf = float(np.clip(pf, 0.70, 0.85))
            precision_floor[tag] = pf
    print("\n[PISOS DE PRECISÃO — definidos]")
    print(f"- D+1: {precision_floor['D+1']:.2f} (fixo)")
    print(f"- D+3: {precision_floor['D+3']:.2f}")
    print(f"- D+5: {precision_floor['D+5']:.2f}")
    print(f"- N_min_preds_val: {N_min_preds_val}")

    # 7) Seleção de threshold por fold (VAL) sob piso e N_min; 8) Avaliação no TESTE
    results_rows: List[Dict] = []
    thresholds_by_combo_fold: Dict[Tuple[str,int,int,int], float] = {}
    floor_unmet_flags = 0
    coverage_failed_flags = 0

    for key in sorted(preds_val.keys()):
        model, W, h, fi = key
        yv, pv = preds_val[key]
        yt, pt, dt = preds_tst.get(key, (np.array([]), np.array([]), np.array([])))
        if len(yv) == 0 or len(yt) == 0:
            continue
        floor = precision_floor[f"D+{h}"] or 0.0
        thr, m_val, floor_unmet, npos_val, val_nmin_unmet = select_threshold_with_floor(yv, pv, floor, threshold_grid, N_min_preds_val)
        thresholds_by_combo_fold[key] = thr
        if floor_unmet:
            floor_unmet_flags += 1
        # TESTE
        m_tst = binary_metrics(yt, pt, thr)
        # Cobertura mínima em TESTE
        coverage_failed = False
        n_test = int(len(yt))
        if not math.isnan(m_tst["coverage_rate"]):
            if (m_tst["coverage_rate"] < coverage_min_rate) and (m_tst["coverage_count"] < coverage_min_count):
                coverage_failed = True
        if coverage_failed:
            coverage_failed_flags += 1
        results_rows.append(dict(
            model=model, window=W, horizon=h, fold=fi, threshold_val=thr,
            recall_CAI=m_tst["recall"], precisao_CAI=m_tst["precision"], F1_CAI=m_tst["f1"], acc=m_tst["acc"],
            pred_CAI_rate=m_tst["coverage_rate"], num_pred_CAI=m_tst["coverage_count"], n_test=n_test,
            val_floor_unmet=floor_unmet, val_nmin_unmet=val_nmin_unmet, coverage_failed=coverage_failed,
            cm_TP=int(m_tst["cm"][0,0]) if m_tst["cm"].shape==(2,2) else 0,
            cm_FP=int(m_tst["cm"][0,1]) if m_tst["cm"].shape==(2,2) else 0,
            cm_FN=int(m_tst["cm"][1,0]) if m_tst["cm"].shape==(2,2) else 0,
            cm_TN=int(m_tst["cm"][1,1]) if m_tst["cm"].shape==(2,2) else 0,
            cal_method=str(meta_train.get(key, {}).get("calibration", "-")),
        ))

    if not results_rows:
        print("CHECKLIST_FAILURE: nenhuma combinação produziu resultados em TESTE.")
        return

    res_df = pd.DataFrame(results_rows).sort_values(["horizon","model","window","fold"]) if results_rows else pd.DataFrame()

    # 9) Agregação por combinação (modelo×janela×horizonte) no TESTE — somar CMs e recomputar métricas
    agg_rows = []
    for (h, m, W), grp in res_df.groupby(["horizon","model","window"], as_index=False):
        TP = int(grp["cm_TP"].sum()); FP = int(grp["cm_FP"].sum()); FN = int(grp["cm_FN"].sum()); TN = int(grp["cm_TN"].sum())
        num_pred = int(grp["num_pred_CAI"].sum()); n_test = int(grp["n_test"].sum());
        prec = float(TP / max(1, (TP + FP))) if (TP + FP) > 0 else 0.0
        rec = float(TP / max(1, (TP + FN))) if (TP + FN) > 0 else 0.0
        acc = float((TP + TN) / max(1, (TP + FP + FN + TN))) if (TP + FP + FN + TN) > 0 else 0.0
        f1 = float((2*prec*rec)/max(1e-12, (prec+rec))) if (prec + rec) > 0 else 0.0
        cover_rate = float(num_pred / max(1, n_test)) if n_test > 0 else 0.0
        floor = precision_floor[f"D+{h}"] or 0.0
        coverage_ok = (cover_rate >= coverage_min_rate) or (num_pred >= coverage_min_count)
        piso_ok = (prec >= floor)
        eligible = bool(piso_ok and coverage_ok)
        reason_parts = []
        if not piso_ok: reason_parts.append("piso")
        if not coverage_ok: reason_parts.append("cobertura")
        if bool(grp["val_nmin_unmet"].any()): reason_parts.append("Nmin_VAL")
        reason = ",".join(reason_parts) if not eligible else ""
        folds = int(grp["fold"].nunique())
        # Mediana do threshold entre folds (para esta combinação)
        thr_med = float(np.median(grp["threshold_val"].values)) if folds > 0 else float("nan")
        agg_rows.append(dict(horizon=h, model=m, window=W, TP=TP, FP=FP, FN=FN, TN=TN,
                             recall_CAI=rec, precisao_CAI=prec, F1_CAI=f1, acc=acc,
                             pred_CAI_rate=cover_rate, num_pred_CAI=num_pred, n_test=n_test,
                             eligible=eligible, reason=reason, folds=folds, threshold_median=thr_med))
    agg_df = pd.DataFrame(agg_rows).sort_values(["horizon","model","window"]) if agg_rows else pd.DataFrame()

    # 10) Top-3 e melhor combinação por horizonte (apenas elegíveis)
    best_combo_by_h: Dict[int, Tuple[str,int]] = {}
    threshold_operacional: Dict[int, float] = {}
    no_winner_flags: Dict[int, bool] = {1: False, 3: False, 5: False}

    for h in horizons:
        sub = agg_df[agg_df["horizon"] == h].copy()
        elig = sub[sub["eligible"] == True].copy()
        print(f"\nRESUMO — D+{h} (TESTE agregado) — modelo × janela")
        if sub.empty:
            print("–")
        else:
            show_cols = ["model","window","recall_CAI","precisao_CAI","F1_CAI","acc","pred_CAI_rate","eligible","reason","folds"]
            print(sub[show_cols].to_string(index=False))
        print(f"\nTOP-3 — D+{h} (TESTE)")
        if elig.empty:
            print("–")
            no_winner_flags[h] = True
            continue
        # ordenar por recall desc, desempate F1, depois acc
        elig = elig.sort_values(["recall_CAI","F1_CAI","acc"], ascending=[False, False, False])
        top3 = elig.head(3).reset_index(drop=True)
        print(top3[["model","window","recall_CAI","precisao_CAI","F1_CAI","acc","pred_CAI_rate","folds","threshold_median"]].to_string(index=False))
        best = top3.iloc[0]
        best_combo_by_h[h] = (str(best["model"]), int(best["window"]))
        threshold_operacional[h] = float(best["threshold_median"]) if np.isfinite(best["threshold_median"]) else float("nan")

        # Matriz de confusão agregada do melhor
        print(f"\nMATRIZ DE CONFUSÃO — melhor combinação D+{h} (modelo={best['model']}, janela={int(best['window'])}) [CAI=1, N_CAI=0]")
        cm_sum = np.array([[int(best_row) for best_row in [best.get("TP",0), best.get("FP",0)]],
                           [int(best.get("FN",0)), int(best.get("TN",0))]])
        header = ["", "pred_CAI", "pred_NAO_CAI"]
        print("{:<14s}{:>10s}{:>14s}".format(*header))
        print("{:<14s}{:>10d}{:>14d}".format("true_CAI", cm_sum[0,0], cm_sum[0,1]))
        print("{:<14s}{:>10d}{:>14d}".format("true_NAO_CAI", cm_sum[1,0], cm_sum[1,1]))

    # 11) Threshold operacional (mediana) e sequência final no último bloco
    print("\nTHRESHOLD OPERACIONAL (mediana entre folds dos elegíveis)")
    for h in horizons:
        val = threshold_operacional.get(h, float("nan"))
        print(f"- D+{h}: {val if np.isfinite(val) else '–'}")

    try:
        last_fold = max(int(k[3]) for k in preds_tst.keys()) if preds_tst else None
    except Exception:
        last_fold = None
    final_seq = []
    if last_fold is not None:
        for h in horizons:
            thr_med = threshold_operacional.get(h, float("nan"))
            comb = best_combo_by_h.get(h)
            if comb is None or not np.isfinite(thr_med):
                final_seq.append("–")
                continue
            model, W = comb
            yt, pt, dt = preds_tst.get((model, W, h, last_fold), (np.array([]), np.array([]), np.array([])))
            if len(pt) == 0:
                final_seq.append("–")
                continue
            yhat = (pt >= thr_med).astype(int)
            last_label = int(yhat[-1])
            final_seq.append("CAI" if last_label == 1 else "NÃO CAI")
        print("\nSEQUÊNCIA FINAL (último bloco de TESTE):")
        print(f"- (D+1, D+3, D+5) = {', '.join(final_seq)}")
    else:
        print("\nSEQUÊNCIA FINAL: –")

    # Baselines — TESTE (médias por horizonte)
    baseline_rows = []
    for h in horizons:
        seen_folds = set()
        for key in sorted(preds_tst.keys()):
            m, W, hh, fi = key
            if hh != h or fi in seen_folds:
                continue
            yt, pt, dt = preds_tst[key]
            seen_folds.add(fi)
            n = len(yt)
            if n == 0:
                continue
            # Sempre NÃO CAI
            y_pred0 = np.zeros(n, dtype=int)
            cm0 = confusion_matrix(yt, y_pred0, labels=[1,0])
            acc0 = float(accuracy_score(yt, y_pred0))
            prec0 = float(precision_score(yt, y_pred0, zero_division=0))
            rec0 = float(recall_score(yt, y_pred0, zero_division=0))
            f10 = float(f1_score(yt, y_pred0, zero_division=0))
            baseline_rows.append(dict(horizon=h, baseline="Sempre_NAO_CAI", fold=fi,
                                      recall_CAI=rec0, precisao_CAI=prec0, F1_CAI=f10, acc=acc0,
                                      pred_CAI_rate=0.0))
            # PropTreino>0.5 — proxy via VAL
            yv, pv = None, None
            for k2, (yvv, pvv) in preds_val.items():
                mm, WW, hhh, fii = k2
                if hhh == h and fii == fi and len(yvv) > 0:
                    yv, pv = yvv, pvv
                    break
            if yv is not None:
                p_train_cai = float((yv == 1).mean())
                pred1 = np.ones(n, dtype=int) if p_train_cai > 0.5 else np.zeros(n, dtype=int)
                cm1 = confusion_matrix(yt, pred1, labels=[1,0])
                acc1 = float(accuracy_score(yt, pred1))
                prec1 = float(precision_score(yt, pred1, zero_division=0))
                rec1 = float(recall_score(yt, pred1, zero_division=0))
                f11 = float(f1_score(yt, pred1, zero_division=0))
                baseline_rows.append(dict(horizon=h, baseline="PropTreino>0.5", fold=fi,
                                          recall_CAI=rec1, precisao_CAI=prec1, F1_CAI=f11, acc=acc1,
                                          pred_CAI_rate=float(pred1.mean())))
            # Sinal de ontem
            ret1_map = pd.Series(df.set_index("__date__")["ret1"])
            pred2 = []
            for d in dt:
                prev_day = pd.to_datetime(d) - pd.Timedelta(days=1)
                val_prev = ret1_map.get(prev_day, np.nan)
                pred2.append(1 if (pd.notna(val_prev) and val_prev < 0) else 0)
            pred2 = np.array(pred2, dtype=int)
            cm2 = confusion_matrix(yt, pred2, labels=[1,0])
            acc2 = float(accuracy_score(yt, pred2))
            prec2 = float(precision_score(yt, pred2, zero_division=0))
            rec2 = float(recall_score(yt, pred2, zero_division=0))
            f12 = float(f1_score(yt, pred2, zero_division=0))
            baseline_rows.append(dict(horizon=h, baseline="SinalOntem", fold=fi,
                                      recall_CAI=rec2, precisao_CAI=prec2, F1_CAI=f12, acc=acc2,
                                      pred_CAI_rate=float(pred2.mean())))
            # Momentum_3d — CAI se somatório(últimos 3 ret1) < 0
            mom_map = pd.Series(df.set_index("__date__")["mom3d_prev"])  # já shiftado
            pred3 = []
            for d in dt:
                val_prev3 = mom_map.get(pd.to_datetime(d), np.nan)
                pred3.append(1 if (pd.notna(val_prev3) and val_prev3 < 0) else 0)
            pred3 = np.array(pred3, dtype=int)
            cm3 = confusion_matrix(yt, pred3, labels=[1,0])
            acc3 = float(accuracy_score(yt, pred3))
            prec3 = float(precision_score(yt, pred3, zero_division=0))
            rec3 = float(recall_score(yt, pred3, zero_division=0))
            f13 = float(f1_score(yt, pred3, zero_division=0))
            baseline_rows.append(dict(horizon=h, baseline="Momentum_3d", fold=fi,
                                      recall_CAI=rec3, precisao_CAI=prec3, F1_CAI=f13, acc=acc3,
                                      pred_CAI_rate=float(pred3.mean())))
    baseline_df = pd.DataFrame(baseline_rows) if baseline_rows else pd.DataFrame()

    if not baseline_df.empty:
        print("\nBASELINES — TESTE (médias por horizonte)")
        base_agg = baseline_df.groupby(["horizon","baseline"], as_index=False).agg(
            recall_CAI_mean=("recall_CAI","mean"),
            precisao_CAI_mean=("precisao_CAI","mean"),
            F1_CAI_mean=("F1_CAI","mean"),
            acc_mean=("acc","mean"),
            pred_CAI_rate_mean=("pred_CAI_rate","mean"),
            folds=("fold","nunique")
        ).sort_values(["horizon","baseline"])
        for h in horizons:
            bs = base_agg[base_agg["horizon"]==h]
            print(f"\nHORIZONTE D+{h}")
            if bs.empty:
                print("–")
            else:
                print(bs[["baseline","recall_CAI_mean","precisao_CAI_mean","F1_CAI_mean","acc_mean","pred_CAI_rate_mean","folds"]].to_string(index=False))

    # Flags e Checklist
    print("\nFLAGS")
    print(f"- floor_unmet_folds: {floor_unmet_flags}")
    print(f"- coverage_failed: {coverage_failed_flags}")
    print(f"- no_winner: D+1={no_winner_flags[1]}, D+3={no_winner_flags[3]}, D+5={no_winner_flags[5]}")

    print("\nCHECKLIST")
    print(f"- SSOT: {path} (tier={tier})")
    print(f"- Horizontes: {horizons} | Janelas: {windows} | Folds: {len(splits)}")
    print(f"- precision_floor: D+1={precision_floor['D+1']:.2f}, D+3={precision_floor['D+3']:.2f}, D+5={precision_floor['D+5']:.2f}")
    print(f"- N_min_preds_val: {N_min_preds_val}")
    print(f"- Baselines presentes: {'SIM' if not baseline_df.empty else 'NÃO'}")
    print(f"- dry_run=True (nenhum arquivo salvo)")

    print(f"\n[{now_ts()}] Fim — CAI vs NÃO CAI (dry_run={dry_run})")

if __name__ == "__main__":
    main()

[2025-09-19 17:48:41] Início — CAI vs NÃO CAI (prioridade CAI, pisos por horizonte) — XGB vs LSTM (Patch V1.1)

[PROVA SSOT]
- Caminho: /home/wrm/BOLSA_2026/gold/IBOV_gold.parquet (tier=GOLD)
- Linhas: 3400 | date_min: 2012-01-03 | date_max: 2025-09-19
- Colunas (amostra): date, open, high, low, close, volume, ticker, open_norm, high_norm, low_norm, close_norm, volume_norm, return_1d, volatility_5d, sma_5, sma_20, sma_ratio, y_h1, y_h3, y_h5 ...

[WALK-FORWARD — Folds]
Fold 01 | train[2012-01-03 → 2013-04-02] | val[2013-04-03 → 2013-07-02] | test[2013-07-03 → 2014-01-02]
Fold 02 | train[2012-01-03 → 2013-10-02] | val[2013-10-03 → 2014-01-02] | test[2014-01-03 → 2014-07-02]
Fold 03 | train[2012-01-03 → 2014-04-02] | val[2014-04-03 → 2014-07-02] | test[2014-07-03 → 2015-01-02]
Fold 04 | train[2012-01-03 → 2014-10-02] | val[2014-10-03 → 2015-01-02] | test[2015-01-03 → 2015-07-02]
Fold 05 | train[2012-01-03 → 2015-04-02] | val[2015-04-03 → 2015-07-02] | test[2015-07-03 → 2016-01-02]
Fold 0