In [2]:
# ============================================================
# REGRESSÃO POR TICKER - v22 (Relaxed Filters + Smart Fallback)
# ============================================================
from google.colab import drive
drive.mount('/content/drive')

!pip -q install lightgbm xgboost pyarrow fastparquet joblib

import os, json, glob, time, warnings
from pathlib import Path
from dataclasses import dataclass
from concurrent.futures import ThreadPoolExecutor, as_completed

import numpy as np
import pandas as pd
from tqdm.auto import tqdm

import lightgbm as lgb
import xgboost as xgb

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet
from sklearn.pipeline import Pipeline
import joblib

warnings.filterwarnings("ignore")
np.random.seed(42)

# ============================================================
# CONFIG - RELAXADO E INTELIGENTE
# ============================================================
PARQUET_PATH = "/content/drive/MyDrive/Colab Notebooks/stock/expanded_stock_reduced.parquet"
MODEL_DIR = "/content/drive/MyDrive/Colab Notebooks/stock/models"
MODEL_TAG = "v22_RELAXED_SMART"
SAVE_MODELS = True
MODE = "auto"

REG_TARGETS = ["target_best_entry", "target_best_sale"]
RUN_MODELS = ["lgbm", "xgb"]

APPLY_DAYS = 5
VAL_DAYS = 90
VALID_POS = 0.90
HORIZON = 30
PURGE_DAYS = HORIZON

N_CONFIGS = 4
LGBM_TREES = 200
XGB_TREES = 200
EARLY_STOP = 30

MAX_TICKERS = None
USE_PARALLEL = True
MAX_WORKERS = 4

# ============================================================
# FILTROS RELAXADOS + INTELIGENTES
# ============================================================
MIN_R2_THRESHOLD = -1.0          # Aceita qualquer (finanças é ruidoso)
MAX_RMSE_PERCENTUAL_STOCKS = 0.25  # 25% para ações (.SA)
MAX_RMSE_PERCENTUAL_INDICES = 0.35 # 35% para índices/commodities (alta vol)
MAX_MAE_PERCENTUAL = 0.20        # 20% para MAE

# Low variance tolerance
LOW_VARIANCE_THRESHOLD = 1e-4    # Se var < isso, aceita R²=0

# Clipping ajustado por target
CLIP_ENTRY = (-0.20, 0.30)       # Entry: -20% a +30%
CLIP_SALE = (0.05, 0.40)         # Sale: +5% a +40%

USE_SMART_FALLBACK = True        # Média móvel 30d + última predição

print(f"MODE: {MODE} | Modelos: {RUN_MODELS} | Configs: {N_CONFIGS}")
print(f"Filtros relaxados: R²>{MIN_R2_THRESHOLD}, RMSE<25-35%, MAE<20%")
print(f"Low var tolerance: {LOW_VARIANCE_THRESHOLD}, Smart fallback: {USE_SMART_FALLBACK}")

# ============================================================
# UTILS (SAME AS BEFORE)
# ============================================================
def ensure_dir(p): Path(p).mkdir(parents=True, exist_ok=True)

def _model_path(ticker, model_key, target):
    ext = {"lgbm": ".txt", "xgb": ".json"}[model_key]
    return os.path.join(MODEL_DIR, f"{MODEL_TAG}_{ticker}_{model_key}_{target}{ext}")

def _meta_path(ticker, model_key, target):
    return _model_path(ticker, model_key, target) + ".meta.json"

def save_model_ticker(ticker, model_key, target, obj, feature_list, metrics, validation_info, fallback_info):
    ensure_dir(MODEL_DIR)
    path = _model_path(ticker, model_key, target)

    if model_key == "lgbm":
        obj.save_model(path)
    elif model_key == "xgb":
        obj.save_model(path)

    meta = {
        "ticker": ticker,
        "model_key": model_key,
        "target": target,
        "feature_cols": list(feature_list),
        "metrics": metrics,
        "validation": validation_info,
        "fallback_info": fallback_info,
        "saved_at": time.strftime("%Y-%m-%d %H:%M:%S"),
    }
    with open(_meta_path(ticker, model_key, target), "w") as f:
        json.dump(meta, f)

def load_model_ticker(ticker, model_key, target):
    path = _model_path(ticker, model_key, target)
    if not os.path.exists(path):
        return None, None, None, None, None

    try:
        if model_key == "lgbm":
            obj = lgb.Booster(model_file=path)
        elif model_key == "xgb":
            obj = xgb.Booster()
            obj.load_model(path)
        else:
            return None, None, None, None, None

        meta_p = _meta_path(ticker, model_key, target)
        feat = None
        metrics = None
        validation = None
        fallback = None
        if os.path.exists(meta_p):
            with open(meta_p, "r") as f:
                meta = json.load(f)
                feat = meta.get("feature_cols")
                metrics = meta.get("metrics")
                validation = meta.get("validation", {})
                fallback = meta.get("fallback_info", {})

        return obj, feat, metrics, validation, fallback
    except Exception as e:
        print(f"[warn] load fail {ticker}/{model_key}/{target}: {e}")
        return None, None, None, None, None

# ============================================================
# MÉTRICAS + VALIDAÇÃO INTELIGENTE
# ============================================================
def _mae(y, p): return float(np.mean(np.abs(y - p)))
def _rmse(y, p): return float(np.sqrt(np.mean((y - p)**2)))
def _r2(y, p):
    num = np.sum((y - p)**2)
    den = np.sum((y - np.mean(y))**2)
    return float(1.0 - num/den) if den > 1e-12 else 0.0

def metrics_report(y_true, y_pred):
    return {
        "MAE": _mae(y_true, y_pred),
        "RMSE": _rmse(y_true, y_pred),
        "R2": _r2(y_true, y_pred)
    }

def calibrate_predictions(y_true, y_pred):
    valid = np.isfinite(y_true) & np.isfinite(y_pred)
    if valid.sum() < 3:
        return {"slope": 1.0, "intercept": 0.0}
    y_t = y_true[valid]
    y_p = y_pred[valid]
    var = np.var(y_p)
    if var < 1e-8:
        return {"slope": 1.0, "intercept": float(np.mean(y_t) - np.mean(y_p))}
    slope, intercept = np.polyfit(y_p, y_t, 1)
    if not np.isfinite(slope) or not np.isfinite(intercept):
        return {"slope": 1.0, "intercept": 0.0}
    slope = float(np.clip(slope, 0.2, 1.8))
    intercept = float(intercept)
    return {"slope": slope, "intercept": intercept}

def get_asset_type(ticker):
    """Classifica tipo de ativo para thresholds dinâmicos"""
    if ticker.endswith('.SA'):
        return 'stock'  # Ações brasileiras
    elif any(idx in ticker for idx in ['^', 'BTC', 'ETH', 'GC=F', 'SI=F', 'CL=F']):
        return 'index_commodity'  # Índices, crypto, commodities
    else:
        return 'other'

def validate_model_quality(y_true, y_pred, ticker):
    """
    Validação relaxada + inteligente por tipo de ativo.
    """
    y_range = y_true.max() - y_true.min()
    y_var = y_true.var()
    y_std = y_true.std()

    mae = _mae(y_true, y_pred)
    rmse = _rmse(y_true, y_pred)
    r2 = _r2(y_true, y_pred)

    mae_pct = mae / (y_range + 1e-9)
    rmse_pct = rmse / (y_range + 1e-9)

    # Thresholds dinâmicos
    asset_type = get_asset_type(ticker)
    max_rmse_pct = MAX_RMSE_PERCENTUAL_INDICES if asset_type == 'index_commodity' else MAX_RMSE_PERCENTUAL_STOCKS

    reasons = []
    is_valid = True
    quality_score = 1.0

    # Low variance: aceita automaticamente
    if y_var < LOW_VARIANCE_THRESHOLD:
        reasons.append("Low variance (aceito)")
        quality_score = 0.8  # Bom o suficiente
        is_valid = True
    else:
        # R² (relaxado)
        if r2 < MIN_R2_THRESHOLD:
            reasons.append(f"R²={r2:.3f} < {MIN_R2_THRESHOLD}")
            is_valid = False
            quality_score = max(0.0, quality_score - 0.3)

        # RMSE
        if rmse_pct > max_rmse_pct:
            reasons.append(f"RMSE%={rmse_pct*100:.1f}% > {max_rmse_pct*100}% ({asset_type})")
            is_valid = False
            quality_score = max(0.0, quality_score - 0.4)

        # MAE
        if mae_pct > MAX_MAE_PERCENTUAL:
            reasons.append(f"MAE%={mae_pct*100:.1f}% > {MAX_MAE_PERCENTUAL*100}%")
            is_valid = False
            quality_score = max(0.0, quality_score - 0.3)

    # Quality score final (0-1)
    if r2 > 0.3:
        quality_score += 0.2
    elif r2 > 0:
        quality_score += 0.1
    quality_score = min(1.0, quality_score)

    validation_info = {
        "is_valid": is_valid,
        "quality_score": float(quality_score),
        "asset_type": asset_type,
        "mae_pct": float(mae_pct),
        "rmse_pct": float(rmse_pct),
        "r2": float(r2),
        "y_var": float(y_var),
        "rejection_reasons": reasons
    }

    return is_valid, validation_info

# ============================================================
# LOAD PARQUET + NORMALIZA + SMART FALLBACK DATA
# ============================================================
print("\n[1/7] Carregando parquet e normalizando targets...")
df = pd.read_parquet(PARQUET_PATH)

if not isinstance(df.columns, pd.MultiIndex):
    raise ValueError("Parquet deve ter MultiIndex")

if not isinstance(df.index, pd.DatetimeIndex):
    df.index = pd.to_datetime(df.index)

df = df.sort_index()

lvl0 = df.columns.get_level_values(0).astype(str)
lvl1 = df.columns.get_level_values(1).astype(str)

close_cols = [(c0, c1) for c0, c1 in df.columns if 'close' in str(c0).lower()]
has_close = len(close_cols) > 0

tickers_valid = sorted(
    set(lvl1[lvl0 == REG_TARGETS[0]]) &
    set(lvl1[lvl0 == REG_TARGETS[1]])
)

if MAX_TICKERS and len(tickers_valid) > MAX_TICKERS:
    tickers_valid = tickers_valid[:MAX_TICKERS]

print(f"  ✓ Tickers válidos: {len(tickers_valid)}")

# Armazena dados para fallback smart: média móvel 30d e última predição
SMART_FALLBACK = {}  # (ticker, target) -> {'rolling_mean': val, 'last_pred': val}
LOW_VAR_TICKERS = set()

if has_close:
    print("  ✓ Normalizando targets + preparando fallback smart...")
    for ticker in tqdm(tickers_valid, desc="Processando", leave=False):
        close_col = None
        for c0, c1 in close_cols:
            if c1 == ticker:
                close_col = (c0, c1)
                break

        if close_col is None:
            continue

        close_series = df[close_col]

        for target_name in REG_TARGETS:
            target_col = (target_name, ticker)
            if target_col not in df.columns:
                continue

            target_abs = df[target_col]
            normalized = (target_abs - close_series) / close_series.replace(0, np.nan)

            # Detecta low variance
            var = normalized.var()
            if var < LOW_VARIANCE_THRESHOLD:
                LOW_VAR_TICKERS.add(ticker)

            # Smart fallback: média móvel 30d (mais recente)
            rolling_mean = normalized.tail(30).mean()

            # Última predição válida (do valid set)
            valid_part = normalized.iloc[-VAL_DAYS*len(tickers_valid):]  # Aproximação
            last_valid = valid_part.dropna().iloc[-1] if len(valid_part.dropna()) > 0 else rolling_mean

            SMART_FALLBACK[(ticker, target_name)] = {
                'rolling_mean': float(rolling_mean),
                'last_pred': float(last_valid)
            }

            df[target_col] = normalized

print(f"  ✓ Targets normalizados + {len(SMART_FALLBACK)} fallbacks smart")
print(f"  ⚠️ Low variance tickers: {len(LOW_VAR_TICKERS)} ({sorted(LOW_VAR_TICKERS)[:5]})")

# ============================================================
# MONTA LONG FORMAT (SAME)
# ============================================================
print("\n[2/7] Montando formato LONG...")

is_target = lvl0.str.startswith("target_")
feat_names = sorted(set(lvl0[~is_target]))

rows = []
for tk in tqdm(tickers_valid, desc="Stacking", leave=False):
    X_tk = df.xs(tk, level=1, axis=1)
    X_tk = X_tk.loc[:, [c for c in X_tk.columns if c in feat_names]].reindex(columns=feat_names, fill_value=0.0)

    block = X_tk.copy()
    for t in REG_TARGETS:
        block[t] = df[(t, tk)].astype('float32').values
    block['ticker'] = tk
    rows.append(block)

LONG = pd.concat(rows, axis=0).sort_index()

dummies = pd.get_dummies(LONG['ticker'], prefix='tk', dtype=np.uint8)
LONG = pd.concat([LONG.drop(columns=['ticker']), dummies], axis=1)

for c in LONG.columns:
    LONG[c] = pd.to_numeric(LONG[c], errors='coerce')
LONG = LONG.replace([np.inf, -np.inf], np.nan)

feature_cols = [c for c in LONG.columns if c not in REG_TARGETS]
target_cols = list(REG_TARGETS)

LONG[feature_cols] = LONG[feature_cols].fillna(0.0)

print(f"  ✓ LONG shape: {LONG.shape}")

# ============================================================
# SPLIT TEMPORAL (SAME)
# ============================================================
print("\n[3/7] Split temporal...")

dates = np.array(sorted(LONG.index.unique()))
apply_dates = dates[-APPLY_DAYS:]
preapply = dates[:-APPLY_DAYS]

n_pre = len(preapply)
center = int(round(VALID_POS * (n_pre - 1)))
v_start = max(PURGE_DAYS, center - VAL_DAYS // 2)
v_end = min(n_pre, v_start + VAL_DAYS)
v_start = max(PURGE_DAYS, v_end - VAL_DAYS)

valid_dates = preapply[v_start:v_end]
train_end = max(0, v_start - PURGE_DAYS)
train_dates = preapply[:train_end]

mask_train = LONG.index.isin(train_dates)
mask_valid = LONG.index.isin(valid_dates)
mask_apply = LONG.index.isin(apply_dates)

print(f"  ✓ Train: {len(train_dates)} dias")
print(f"  ✓ Valid: {len(valid_dates)} dias")
print(f"  ✓ Apply: {len(apply_dates)} dias")

# ============================================================
# HP SAMPLERS (SAME)
# ============================================================
def sample_lgbm():
    return {
        "objective": "regression",
        "boosting_type": "gbdt",
        "learning_rate": float(10**np.random.uniform(-2.0, -0.9)),
        "num_leaves": int(np.random.randint(31, 80)),
        "max_depth": -1,
        "min_child_samples": 20,
        "subsample": 0.8,
        "colsample_bytree": 0.8,
        "reg_lambda": float(10**np.random.uniform(-2, 1)),
        "verbosity": -1,
        "n_jobs": 1,
        "metric": "rmse",
    }

def sample_xgb():
    return {
        "objective": "reg:squarederror",
        "learning_rate": float(10**np.random.uniform(-2.0, -0.9)),
        "max_depth": int(np.random.choice([4, 6, 8])),
        "subsample": 0.8,
        "colsample_bytree": 0.8,
        "reg_lambda": float(10**np.random.uniform(-2, 1)),
        "tree_method": "hist",
        "n_jobs": 1,
    }

SAMPLERS = {"lgbm": sample_lgbm, "xgb": sample_xgb}

# ============================================================
# TREINO POR TICKER (COM VALIDAÇÃO RELAXADA)
# ============================================================
def train_ticker_target(ticker, target, model_key):
    col_tk = f'tk_{ticker}'
    if col_tk not in LONG.columns:
        return None

    mask_tk = (LONG[col_tk] == 1)
    tr_mask = mask_train & mask_tk & LONG[target].notna()
    va_mask = mask_valid & mask_tk & LONG[target].notna()

    if tr_mask.sum() < 500 or va_mask.sum() < 50:
        return None

    X_tr = LONG.loc[tr_mask, feature_cols].values.astype('float32')
    y_tr = LONG.loc[tr_mask, target].values.astype('float32')

    X_va = LONG.loc[va_mask, feature_cols].values.astype('float32')
    y_va = LONG.loc[va_mask, target].values.astype('float32')

    best_model = None
    best_rmse = float('inf')
    best_validation = None

    for _ in range(N_CONFIGS):
        params = SAMPLERS[model_key]()

        try:
            if model_key == "lgbm":
                params['n_estimators'] = LGBM_TREES
                md = lgb.LGBMRegressor(**params)
                md.fit(
                    X_tr, y_tr,
                    eval_set=[(X_va, y_va)],
                    callbacks=[
                        lgb.early_stopping(EARLY_STOP, verbose=False),
                        lgb.log_evaluation(period=0)
                    ]
                )
                p_va = md.predict(X_va)
                obj = md.booster_

            elif model_key == "xgb":
                dtr = xgb.DMatrix(X_tr, label=y_tr)
                dva = xgb.DMatrix(X_va, label=y_va)
                obj = xgb.train(
                    params, dtr,
                    num_boost_round=XGB_TREES,
                    evals=[(dva, 'valid')],
                    early_stopping_rounds=EARLY_STOP,
                    verbose_eval=False
                )
                p_va = obj.predict(dva)

            # Validação relaxada
            is_valid, validation_info = validate_model_quality(y_va, p_va, ticker)

            if not is_valid and validation_info['y_var'] >= LOW_VARIANCE_THRESHOLD:
                continue  # Só rejeita se não for low var

            rmse = _rmse(y_va, p_va)
            if rmse < best_rmse:
                best_rmse = rmse
                best_model = obj
                best_validation = validation_info

        except Exception as e:
            continue

    if best_model is None:
        return None

    # Métricas finais
    if model_key == "lgbm":
        p_va_final = best_model.predict(X_va)
    elif model_key == "xgb":
        p_va_final = best_model.predict(xgb.DMatrix(X_va))

    metrics = metrics_report(y_va, p_va_final)
    calib = calibrate_predictions(y_va, p_va_final)
    if best_validation is None:
        best_validation = {}
    best_validation['calibration'] = calib

    fallback_info = SMART_FALLBACK.get((ticker, target), {})

    return {
        'ticker': ticker,
        'target': target,
        'model_key': model_key,
        'obj': best_model,
        'metrics': metrics,
        'validation': best_validation,
        'fallback_info': fallback_info
    }

# ============================================================
# CARREGA OU TREINA (ADAPTADO PARA RELAX)
# ============================================================
print(f"\n[4/7] Treinando/carregando modelos por ticker (relaxado)...")

models_db = {}
metrics_rows = []
rejected_count = 0
low_var_accepted = 0

tasks = [(tk, tgt, mk) for tk in tickers_valid for tgt in REG_TARGETS for mk in RUN_MODELS]

if MODE == "load":
    print("  Modo LOAD: carregando...")
    for tk, tgt, mk in tqdm(tasks, desc="Carregando"):
        obj, feat, met, val, fb = load_model_ticker(tk, mk, tgt)
        if obj is not None:
            models_db[(tk, tgt, mk)] = {
                'model': obj,
                'validation': val or {},
                'metrics': met or {}
            }
            if met:
                metrics_rows.append({'ticker': tk, 'target': tgt, 'model': mk, **met})

else:
    def process_task(tk, tgt, mk):
        if MODE == "auto":
            obj, feat, met, val, fb = load_model_ticker(tk, mk, tgt)
            if obj is not None:
                return (tk, tgt, mk, obj, met, val, fb, False)

        result = train_ticker_target(tk, tgt, mk)
        if result is None:
            return None

        return (tk, tgt, mk, result['obj'], result['metrics'], result['validation'], result['fallback_info'], True)

    if USE_PARALLEL and len(tasks) > 10:
        print(f"  Usando paralelização ({MAX_WORKERS} workers)...")
        with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
            futures = {executor.submit(process_task, tk, tgt, mk): (tk, tgt, mk) for tk, tgt, mk in tasks}

            for future in tqdm(as_completed(futures), total=len(futures), desc="Treinando"):
                result = future.result()
                if result is None:
                    rejected_count += 1
                    continue

                tk, tgt, mk, obj, met, val, fb, trained = result
                models_db[(tk, tgt, mk)] = {
                    'model': obj,
                    'validation': val or {},
                    'metrics': met or {}
                }
                metrics_rows.append({'ticker': tk, 'target': tgt, 'model': mk, **met})

                if tk in LOW_VAR_TICKERS:
                    low_var_accepted += 1

                if SAVE_MODELS and trained:
                    save_model_ticker(tk, mk, tgt, obj, feature_cols, met, val, fb)
    else:
        # Sequential version (same logic)
        pass  # Implement if needed

print(f"  ✓ Modelos aceitos: {len(models_db)}/{len(tasks)}")
print(f"  ⚠️ Rejeitados: {rejected_count}")
print(f"  ✓ Low var aceitos: {low_var_accepted}")

# ============================================================
# MÉTRICAS VALID
# ============================================================
print("\n[5/7] Compilando métricas VALID...")

if metrics_rows:
    METRICS_DF = pd.DataFrame(metrics_rows).sort_values(['ticker', 'target', 'RMSE'])
    print("\n=== Top 20 melhores modelos (relaxados) ===")
    print(METRICS_DF.head(20).to_string(index=False))

    # Quality score médio
    avg_quality = METRICS_DF['R2'].mean() if 'R2' in METRICS_DF else 0
    print(f"  📊 R² médio: {avg_quality:.4f}")
else:
    METRICS_DF = pd.DataFrame()

# ============================================================
# PREDICTIONS APPLY (COM CLIPPING E FALLBACK SMART)
# ============================================================
print("\n[6/7] Gerando predições para APPLY (smart)...")

apply_rows = []

for tk in tqdm(tickers_valid, desc="APPLY", leave=False):
    col_tk = f'tk_{tk}'
    if col_tk not in LONG.columns:
        continue

    ap_mask = mask_apply & (LONG[col_tk] == 1)
    if ap_mask.sum() == 0:
        continue

    for tgt in REG_TARGETS:
        tgt_mask = ap_mask & LONG[tgt].notna()
        if tgt_mask.sum() == 0:
            continue

        X_ap = LONG.loc[tgt_mask, feature_cols].values.astype('float32')
        dates_ap = LONG.index[tgt_mask]
        y_true_ap = LONG.loc[tgt_mask, tgt].values

        has_valid_model = False
        used_fallback_type = None

        # Tenta modelos
        for mk in RUN_MODELS:
            model_entry = models_db.get((tk, tgt, mk))
            if model_entry is None:
                continue
            obj = model_entry.get('model')
            validation = model_entry.get('validation', {})
            calib = validation.get('calibration', {"slope": 1.0, "intercept": 0.0})
            quality_score = float(validation.get('quality_score', 1.0))
            alpha = float(np.clip(quality_score, 0.2, 1.0))
            fb_data = SMART_FALLBACK.get((tk, tgt), {})
            fallback_val = fb_data.get('last_pred', fb_data.get('rolling_mean', 0.0))

            try:
                if mk == "lgbm":
                    preds = obj.predict(X_ap)
                elif mk == "xgb":
                    preds = obj.predict(xgb.DMatrix(X_ap))

                preds = preds * calib.get('slope', 1.0) + calib.get('intercept', 0.0)

                # Blend com fallback quando qualidade é baixa
                if fb_data:
                    preds = alpha * preds + (1.0 - alpha) * fallback_val

                # Clipping por target
                if tgt == 'target_best_entry':
                    preds = np.clip(preds, CLIP_ENTRY[0], CLIP_ENTRY[1])
                else:
                    preds = np.clip(preds, CLIP_SALE[0], CLIP_SALE[1])

                for dt, pred, ytrue in zip(dates_ap, preds, y_true_ap):
                    apply_rows.append({
                        'Date': dt,
                        'ticker': tk,
                        'target': tgt,
                        'model': mk,
                        'pred_pct': float(pred),
                        'y_true_pct': float(ytrue),
                        'used_fallback': False,
                        'quality_score': quality_score,
                        'blend_alpha': alpha
                    })

                has_valid_model = True
            except Exception as e:
                continue

        # Smart fallback se necessário
        if not has_valid_model and USE_SMART_FALLBACK:
            fb_data = SMART_FALLBACK.get((tk, tgt), {})
            if fb_data:
                # Prioriza last_pred, senão rolling_mean
                fallback_val = fb_data.get('last_pred', fb_data.get('rolling_mean', 0.0))

                # Clipping no fallback também
                if tgt == 'target_best_entry':
                    fallback_val = np.clip(fallback_val, CLIP_ENTRY[0], CLIP_ENTRY[1])
                else:
                    fallback_val = np.clip(fallback_val, CLIP_SALE[0], CLIP_SALE[1])

                for dt, ytrue in zip(dates_ap, y_true_ap):
                    apply_rows.append({
                        'Date': dt,
                        'ticker': tk,
                        'target': tgt,
                        'model': f'smart_fallback_{used_fallback_type or "last"}',
                        'pred_pct': float(fallback_val),
                        'y_true_pct': float(ytrue),
                        'used_fallback': True,
                        'quality_score': 0.5,  # Fallback médio
                        'blend_alpha': 0.0
                    })

                used_fallback_type = 'last' if fb_data.get('last_pred') else 'rolling'
                has_valid_model = True  # Garante cobertura

APPLY_DF = pd.DataFrame(apply_rows).sort_values(['Date', 'ticker', 'target', 'model'])

fallback_count = APPLY_DF['used_fallback'].sum()
avg_quality = APPLY_DF['quality_score'].mean()
print(f"  ✓ Predições APPLY: {len(APPLY_DF)} linhas")
print(f"  ✓ Fallbacks: {fallback_count} ({fallback_count/len(APPLY_DF)*100:.1f}%)")
print(f"  📊 Qualidade média: {avg_quality:.3f}")

print("\n=== APPLY Preview (primeiras 30 linhas, com quality) ===")
preview = APPLY_DF[['Date', 'ticker', 'target', 'model', 'pred_pct', 'y_true_pct', 'used_fallback', 'quality_score']].head(30)
print(preview.to_string(index=False))

# ============================================================
# SALVAR OUTPUTS
# ============================================================
print("\n[7/7] Salvando outputs...")

OUT_DIR = os.path.join(MODEL_DIR, f"{MODEL_TAG}_outputs")
ensure_dir(OUT_DIR)

if not METRICS_DF.empty:
    path = os.path.join(OUT_DIR, "metrics_valid.csv")
    METRICS_DF.to_csv(path, index=False)
    print(f"  ✓ {path}")

if not APPLY_DF.empty:
    path = os.path.join(OUT_DIR, "apply_predictions_tidy.csv")
    APPLY_DF.to_csv(path, index=False)
    print(f"  ✓ {path}")

# Salva low var tickers e smart fallback
low_var_path = os.path.join(OUT_DIR, "low_variance_tickers.txt")
with open(low_var_path, 'w') as f:
    f.write('\n'.join(sorted(LOW_VAR_TICKERS)))
print(f"  ✓ {low_var_path}")

fallback_path = os.path.join(OUT_DIR, "smart_fallback_data.json")
with open(fallback_path, 'w') as f:
    json.dump(SMART_FALLBACK, f, indent=2)
print(f"  ✓ {fallback_path}")

print("\n✅ Processo concluído!")
print(f"📁 Outputs em: {OUT_DIR}")
print(f"📊 Modelos aceitos: {len(models_db)} (esperado ~80-90%)")
print(f"🎯 Clipping: Entry {CLIP_ENTRY}, Sale {CLIP_SALE}")
print(f"💡 Low var tickers salvos para análise")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
MODE: auto | Modelos: ['lgbm', 'xgb'] | Configs: 4
Filtros relaxados: R²>-1.0, RMSE<25-35%, MAE<20%
Low var tolerance: 0.0001, Smart fallback: True

[1/7] Carregando parquet e normalizando targets...
  ✓ Tickers válidos: 147
  ✓ Normalizando targets + preparando fallback smart...


Processando:   0%|          | 0/147 [00:00<?, ?it/s]

  ✓ Targets normalizados + 214 fallbacks smart
  ⚠️ Low variance tickers: 0 ([])

[2/7] Montando formato LONG...


Stacking:   0%|          | 0/147 [00:00<?, ?it/s]

  ✓ LONG shape: (969759, 316)

[3/7] Split temporal...
  ✓ Train: 5857 dias
  ✓ Valid: 90 dias
  ✓ Apply: 5 dias

[4/7] Treinando/carregando modelos por ticker (relaxado)...
  Usando paralelização (4 workers)...


Treinando:   0%|          | 0/588 [00:00<?, ?it/s]

  ✓ Modelos aceitos: 208/588
  ⚠️ Rejeitados: 380
  ✓ Low var aceitos: 0

[5/7] Compilando métricas VALID...

=== Top 20 melhores modelos (relaxados) ===
   ticker            target model      MAE     RMSE         R2
 AUDUSD=X target_best_entry  lgbm 0.007426 0.009096   0.305858
 B3SA3.SA target_best_entry  lgbm 0.018003 0.023461   0.687105
 B3SA3.SA target_best_entry   xgb 0.025750 0.031847   0.423454
 BBAS3.SA  target_best_sale  lgbm 0.019628 0.024333   0.632072
 BBAS3.SA  target_best_sale   xgb 0.021160 0.025901   0.583108
 BBDC3.SA target_best_entry   xgb 0.016704 0.021225   0.837694
 BBDC3.SA target_best_entry  lgbm 0.024560 0.029539   0.685627
 BBDC4.SA target_best_entry   xgb 0.042310 0.046891   0.420527
 BBDC4.SA  target_best_sale  lgbm 0.025990 0.030347   0.504414
 BBDC4.SA  target_best_sale   xgb 0.033855 0.036357   0.288673
 BEEF3.SA target_best_entry   xgb 0.014061 0.017713   0.310798
 BEEF3.SA target_best_entry  lgbm 0.017009 0.021175   0.015095
BRCR11.SA  target_best_sale

APPLY:   0%|          | 0/147 [00:00<?, ?it/s]

  ✓ Predições APPLY: 1675 linhas
  ✓ Fallbacks: 635 (37.9%)
  📊 Qualidade média: 0.810

=== APPLY Preview (primeiras 30 linhas, com quality) ===
      Date    ticker            target               model  pred_pct  y_true_pct  used_fallback  quality_score
2025-10-29  ABEV3.SA target_best_entry smart_fallback_last -0.015736   -0.004156           True            0.5
2025-10-29  ABEV3.SA  target_best_sale smart_fallback_last  0.050000    0.073982           True            0.5
2025-10-29 ALZR11.SA target_best_entry smart_fallback_last -0.002857   -0.012276           True            0.5
2025-10-29 ALZR11.SA  target_best_sale smart_fallback_last  0.050000    0.002833           True            0.5
2025-10-29  AUDUSD=X target_best_entry                lgbm -0.029378   -0.007790          False            1.0
2025-10-29  AUDUSD=X  target_best_sale smart_fallback_last  0.050000    0.004920           True            0.5
2025-10-29  AZUL4.SA target_best_entry smart_fallback_last -0.017544   -0.0090

TypeError: keys must be str, int, float, bool or None, not tuple