# Offline RL (fase SELL/HOLD) — preparação de s_t e D_t + Bandit baseline

Nesta seção:
- Preparamos estados s_t (apenas passado) a partir de `gold_tabular.csv` (Close & Volume — e CLV se houver High/Low).
- Calculamos D_t: pior queda futura em 1/3/5 dias com pesos w=(1.0, 0.6, 0.3) — usado só para recompensa/rótulo, nunca como feature.
- Baseline contextual: regressão logística (π(SELL|s)) com partição walk-forward e normalização apenas no treino.
- Simulação com quarentena Q=5 para métricas: recall de quedas grandes, precisão, perda média evitada, turnover e P&L líquido (custos).

In [6]:
# Preparar s_t e D_t; Baseline Bandit SELL/HOLD usando GOLD-RL recém-criado
import os
from pathlib import Path
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import precision_score, recall_score

# Configs principais
ALPHA = 1.0   # aversão à perda em r(HOLD) = -α·D_t
COST  = 0.0015  # custo de SELL (slippage+corretagem) em fração (ex: 15 bps)
Q     = 5     # quarentena em pregões
W     = np.array([1.0, 0.6, 0.3])  # pesos para janelas 1/3/5
AHEAD = [1,2,3,4,5]

# 1) Carregar gold_rl_tabular.csv (produzido no bloco anterior)
csv_path = Path(r"G:\Drives compartilhados\BOLSA_2026\a_bolsa2026_gemini\00_data\03_final\gold_rl_tabular.csv")
if not csv_path.exists():
    raise FileNotFoundError(f"gold_rl_tabular.csv não encontrado: {csv_path}")

df = pd.read_csv(csv_path)

# Normalizar nomes mínimos
lower_map = {c.lower(): c for c in df.columns}
col_date   = next((lower_map[c] for c in ['date','data','session','trading_day'] if c in lower_map), None)
col_ticker = next((lower_map[c] for c in ['ticker','symbol','ativo'] if c in lower_map), None)

if col_date != 'date':
    df = df.rename(columns={col_date:'date'})
if col_ticker != 'ticker':
    df = df.rename(columns={col_ticker:'ticker'})

# Parse e sort
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values(['ticker','date']).reset_index(drop=True)

# 2) D_t: pior retorno futuro observado em 1/3/5 dias à frente
for k in AHEAD:
    df[f'fwd{k}'] = df.groupby('ticker')['close'].pct_change(-k)

df['D1'] = df['fwd1']
df['D3'] = df[['fwd1','fwd2','fwd3']].min(axis=1)
df['D5'] = df[['fwd1','fwd2','fwd3','fwd4','fwd5']].min(axis=1)

df['D_t'] = W[0]*df['D1'] + W[1]*df['D3'] + W[2]*df['D5']

# 3) Walk-forward simples (70/15/15 por data)
all_dates = np.sort(df['date'].dropna().unique())
tr_end = all_dates[int(0.7*len(all_dates))]
va_end = all_dates[int(0.85*len(all_dates))]

train = df[df['date'] <= tr_end].copy()
val   = df[(df['date'] > tr_end) & (df['date'] <= va_end)].copy()
test  = df[df['date'] > va_end].copy()

# 4) Rotulagem do baseline e treino
# Critério heurístico: SELL se (1+α)*D_t > COST
for part in (train, val, test):
    part['sell_label'] = (part['D_t'] * (1.0 + ALPHA) > COST)

feature_cols = ['z1','z2','z3','z5','vol21','vol21_pct','rvol_pct','rvol_chg','clv','dist_peak20_sigma','pct_z2_le_m1','med_vol21']
# Garantir que as colunas existam (CLV pode ser NaN)
feature_cols = [c for c in feature_cols if c in df.columns]

Xtr, Xva, Xte = train[feature_cols], val[feature_cols], test[feature_cols]
ytr, yva, yte = train['sell_label'], val['sell_label'], test['sell_label']

pipe = Pipeline([
    ('scaler', StandardScaler(with_mean=False)),
    ('lr', LogisticRegression(max_iter=1000))
])

Xtr_, Xva_, Xte_ = Xtr.fillna(0.0), Xva.fillna(0.0), Xte.fillna(0.0)
pipe.fit(Xtr_, ytr)
va_pred, te_pred = pipe.predict(Xva_), pipe.predict(Xte_)

print("Baseline Bandit (LogReg) — Val: prec=%.3f rec=%.3f" % (precision_score(yva, va_pred, zero_division=0), recall_score(yva, va_pred, zero_division=0)))
print("Baseline Bandit (LogReg) — Test: prec=%.3f rec=%.3f" % (precision_score(yte, te_pred, zero_division=0), recall_score(yte, te_pred, zero_division=0)))

# 5) Simulação com quarentena Q
val['prob'] = pipe.predict_proba(Xva_)[:,1]
test['prob'] = pipe.predict_proba(Xte_)[:,1]

THR = 0.5

def simulate(df_part: pd.DataFrame, thr=THR):
    dfp = df_part.sort_values(['ticker','date']).copy()
    dfp['action'] = 'HOLD'
    dfp['cost'] = 0.0
    dfp['avoid'] = 0.0
    for tk, g in dfp.groupby('ticker'):
        q_until = pd.Timestamp.min
        idxs = g.index.tolist()
        for idx in idxs:
            d = dfp.at[idx, 'date']
            act = 'HOLD'
            cst = 0.0
            if d >= q_until and dfp.at[idx, 'prob'] >= thr:
                act = 'SELL'
                cst = COST
                q_until = d + pd.tseries.offsets.BDay(Q)
            Dt = dfp.at[idx, 'D_t']
            avoid = (-Dt) if (act=='SELL' and pd.notna(Dt)) else 0.0
            dfp.at[idx, 'action'] = act
            dfp.at[idx, 'cost'] = cst
            dfp.at[idx, 'avoid'] = avoid
    sold = (dfp['action']=='SELL')
    recall_big = float(np.mean(dfp.loc[dfp['sell_label'], 'action']=='SELL')) if dfp['sell_label'].any() else 0.0
    precision  = float(np.mean(dfp.loc[sold, 'sell_label'])) if sold.any() else 0.0
    total_cost = float(dfp['cost'].sum())
    total_avoid= float(dfp['avoid'].sum())
    turnover   = float(sold.mean())
    return {
        'recall_big': recall_big,
        'precision': precision,
        'turnover': turnover,
        'net_PnL_proxy': total_avoid - total_cost
    }

val_metrics = simulate(val)
test_metrics = simulate(test)
print("Sim/Val:", val_metrics)
print("Sim/Test:", test_metrics)

Baseline Bandit (LogReg) — Val: prec=0.600 rec=0.001
Baseline Bandit (LogReg) — Test: prec=0.500 rec=0.001
Sim/Val: {'recall_big': 0.0003572704537334762, 'precision': 0.6666666666666666, 'turnover': 0.00018158707100054475, 'net_PnL_proxy': -0.06482440203951295}
Sim/Test: {'recall_big': 0.0005583472920156337, 'precision': 0.6, 'turnover': 0.00030350855894136213, 'net_PnL_proxy': -0.7594226656277075}


# GOLD-RL a partir de Silver (Close & Volume)

Este bloco constrói um dataset tabular para RL (SELL/HOLD) diretamente de:
- `00_data/02_curado/silver_close.parquet`
- `00_data/02_curado/silver_volume.parquet`

Com máscara de datas B3 (via índice do BVSP quando disponível ou união de datas presentes), computando por ticker e dia apenas-features de passado:
- z-scores dos retornos (1/3/5d) usando janela 252d (mín. 60)
- distância ao pico 20d em σ
- vol 21d e seu percentil rolling (252d)
- RVOL (percentil rolling 252d e variação diária)
- CLV (se houver High/Low)

E dois sinais cross-section por dia:
- % de papéis com z2 ≤ −1
- mediana de vol21

Saídas: `00_data/03_final/gold_rl_tabular.parquet` e `.csv`.

In [4]:
# Construir GOLD-RL (tabular) de silver_close/volume com máscara B3 e features pedidas
from pathlib import Path
import numpy as np
import pandas as pd

CLOSE_PATH  = Path(r"G:\Drives compartilhados\BOLSA_2026\a_bolsa2026_gemini\00_data\02_curado\silver_close.parquet")
VOLUME_PATH = Path(r"G:\Drives compartilhados\BOLSA_2026\a_bolsa2026_gemini\00_data\02_curado\silver_volume.parquet")
OUT_DIR     = Path(r"G:\Drives compartilhados\BOLSA_2026\a_bolsa2026_gemini\00_data\03_final")

if not CLOSE_PATH.exists() or not VOLUME_PATH.exists():
    raise FileNotFoundError("silver_close.parquet ou silver_volume.parquet não encontrados em 00_data/02_curado.")

close_wide  = pd.read_parquet(CLOSE_PATH)
volume_wide = pd.read_parquet(VOLUME_PATH)

# Normaliza colunas de data/index
if 'date' in close_wide.columns:
    close_wide = close_wide.set_index('date')
if 'date' in volume_wide.columns:
    volume_wide = volume_wide.set_index('date')

close_wide.index = pd.to_datetime(close_wide.index)
volume_wide.index = pd.to_datetime(volume_wide.index)

# Máscara de datas B3: interseção dos índices e remoção de finais de semana
b3_idx = close_wide.index.intersection(volume_wide.index)
b3_idx = pd.DatetimeIndex(b3_idx)
b3_idx = b3_idx[b3_idx.dayofweek < 5]

close_wide = close_wide.loc[b3_idx].sort_index()
volume_wide = volume_wide.loc[b3_idx].sort_index()

# Long format
close_long = close_wide.stack().rename('close').to_frame().reset_index()
close_long.columns = ['date','ticker','close']

volume_long = volume_wide.stack().rename('volume').to_frame().reset_index()
volume_long.columns = ['date','ticker','volume']

panel = close_long.merge(volume_long, on=['date','ticker'], how='inner').sort_values(['ticker','date']).reset_index(drop=True)

# Função de pct_change segura a divisão por zero e tipos

def pct_change_safe(s: pd.Series, periods: int = 1) -> pd.Series:
    s = pd.to_numeric(s, errors='coerce').astype(float)
    prev = s.shift(periods)
    with np.errstate(divide='ignore', invalid='ignore'):
        out = s / prev - 1.0
    # quando prev==0, define NaN para evitar ZeroDivisionError
    out = out.mask(prev == 0, np.nan)
    return out

# Features por ticker (passado)

def per_ticker_features(g: pd.DataFrame) -> pd.DataFrame:
    g = g.sort_values('date').copy()
    g['close'] = pd.to_numeric(g['close'], errors='coerce').astype(float)
    g['volume'] = pd.to_numeric(g['volume'], errors='coerce').astype(float)

    # retornos (seguros)
    g['ret1'] = pct_change_safe(g['close'], 1)
    g['ret2'] = pct_change_safe(g['close'], 2)
    g['ret3'] = pct_change_safe(g['close'], 3)
    g['ret5'] = pct_change_safe(g['close'], 5)

    # z-scores (janela 252d)
    for k in [1,2,3,5]:
        r = g[f'ret{k}']
        mu = r.rolling(252, min_periods=60).mean()
        sd = r.rolling(252, min_periods=60).std()
        g[f'z{k}'] = (r - mu) / sd.replace(0,np.nan)

    # vol 21d
    g['vol21'] = g['ret1'].rolling(21, min_periods=10).std()

    # percentil da vol21 rolling 252
    def last_pct(x):
        s = pd.Series(x)
        return (s.rank(pct=True).iloc[-1] if len(s) else np.nan)
    g['vol21_pct'] = g['vol21'].rolling(252, min_periods=60).apply(last_pct, raw=False)

    # RVOL percentil (volume) e variação diária
    v = g['volume']
    g['rvol_pct'] = v.rolling(252, min_periods=60).apply(last_pct, raw=False)
    g['rvol_chg'] = g['rvol_pct'].diff()

    # CLV precisa high/low (não temos nos silver informados). Deixamos NaN para manter esquema.
    g['clv'] = np.nan

    # distância ao pico 20d em σ (usando std21 e close)
    roll_max20 = g['close'].rolling(20, min_periods=10).max()
    std21 = g['vol21']
    denom = (std21 * g['close']).replace(0, np.nan)
    g['dist_peak20_sigma'] = (roll_max20 - g['close']) / denom

    return g

feat = panel.groupby('ticker', group_keys=False).apply(per_ticker_features)

# Cross-section sinais por dia (usa z2 real)
agg = feat.groupby('date').agg(
    pct_z2_le_m1 = ('z2', lambda s: float(np.mean(s <= -1.0))),
    med_vol21    = ('vol21', 'median'),
).reset_index()

feat = feat.merge(agg, on='date', how='left')

# Ordena e salva
feat = feat.sort_values(['ticker','date']).reset_index(drop=True)

OUT_DIR.mkdir(parents=True, exist_ok=True)
parq_path = OUT_DIR / 'gold_rl_tabular.parquet'
csv_path  = OUT_DIR / 'gold_rl_tabular.csv'

feat.to_parquet(parq_path, index=False)
feat.to_csv(csv_path, index=False)

print(f"Salvo: {parq_path} e {csv_path}")

  feat = panel.groupby('ticker', group_keys=False).apply(per_ticker_features)


Salvo: G:\Drives compartilhados\BOLSA_2026\a_bolsa2026_gemini\00_data\03_final\gold_rl_tabular.parquet e G:\Drives compartilhados\BOLSA_2026\a_bolsa2026_gemini\00_data\03_final\gold_rl_tabular.csv


## Fase 1 — Advantage regression (GBM) com compra por seleção e punição assimétrica

Implementa o alvo A_t = (1+α)D_t - cost + U*_t - φ·max(0, δ - D_t),
com compra determinística do melhor papel (U*_t = max_j U_t(j)), quarentena Q=5 na simulação,
e varredura de limiar por bucket de liquidez (tercis de ADTV).

In [8]:
# Advantage regression (GBM) — SELL vs HOLD com compra por seleção
from pathlib import Path
import numpy as np
import pandas as pd
from sklearn.metrics import precision_score, recall_score
from sklearn.isotonic import IsotonicRegression

try:
    from lightgbm import LGBMRegressor
    _HAS_LGBM = True
except Exception:
    from sklearn.ensemble import GradientBoostingRegressor
    _HAS_LGBM = False

# Hiperparâmetros iniciais
ALPHA = 3.0
COST  = 0.0025   # 25 bps ida+volta
PHI   = 0.25
Q     = 5
W     = np.array([1.0, 0.6, 0.3])
AHEAD = [1,2,3,4,5]

csv_path = Path(r"G:\Drives compartilhados\BOLSA_2026\a_bolsa2026_gemini\00_data\03_final\gold_rl_tabular.csv")
df = pd.read_csv(csv_path)
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values(['ticker','date']).reset_index(drop=True)

# Log-retornos à frente seguros (tratando zeros como missing)
for k in AHEAD:
    fwd = df.groupby('ticker')['close'].shift(-k)
    ratio = fwd / df['close']
    ratio = ratio.mask((df['close']<=0) | (fwd<=0))
    df[f'logfwd{k}'] = np.log(ratio)

# D_t(i) = max(0, - min_{h in H} sum_{k=1..h} r_{t+k}) usando log-returns
cum1 = df['logfwd1']
cum3 = df[['logfwd1','logfwd2','logfwd3']].sum(axis=1)
cum5 = df[['logfwd1','logfwd2','logfwd3','logfwd4','logfwd5']].sum(axis=1)
min_cum = pd.concat([cum1, cum3, cum5], axis=1).min(axis=1)
df['D_t'] = np.maximum(0.0, -min_cum)

# U_t(j) = w1*r_{t+1} + w3*sum_{1..3} + w5*sum_{1..5}
U1 = cum1
U3 = cum3
U5 = cum5
U  = W[0]*U1 + W[1]*U3 + W[2]*U5

# Determina U*_t (melhor j) por data entre papéis elegíveis (aqui, todos)
best_U_by_date = U.groupby(df['date']).max()
df = df.join(best_U_by_date.rename('U_star'), on='date')

# δ por ticker: p65(D_t>0)
delta_by_ticker = (df[df['D_t']>0].groupby('ticker')['D_t'].quantile(0.65)).rename('delta')
df = df.join(delta_by_ticker, on='ticker')
# fallback se não houver amostras >0
df['delta'] = df['delta'].fillna(df['D_t'].quantile(0.65))

# Alvo advantage: A_t = (1+α)D_t - COST + U* - PHI*max(0, δ - D_t)
df['A_t'] = (1.0 + ALPHA)*df['D_t'] - COST + df['U_star'] - PHI * np.maximum(0.0, df['delta'] - df['D_t'])

# Features (somente passado)
base_feats = ['z1','z2','z3','z5','vol21','vol21_pct','rvol_pct','rvol_chg','clv','dist_peak20_sigma','pct_z2_le_m1','med_vol21']
feature_cols = [c for c in base_feats if c in df.columns]

# Split walk-forward (70/15/15)
all_dates = np.sort(df['date'].unique())
tr_end = all_dates[int(0.7*len(all_dates))]
va_end = all_dates[int(0.85*len(all_dates))]
train = df[df['date'] <= tr_end].copy()
val   = df[(df['date'] > tr_end) & (df['date'] <= va_end)].copy()
test  = df[df['date'] > va_end].copy()

# Drop NaNs no alvo somente no treino
train = train.dropna(subset=['A_t'])

Xtr, ytr = train[feature_cols].fillna(0.0), train['A_t']
Xva, yva = val[feature_cols].fillna(0.0), val['A_t']
Xte, yte = test[feature_cols].fillna(0.0), test['A_t']

# Modelo GBM
if _HAS_LGBM:
    model = LGBMRegressor(
        n_estimators=800,
        learning_rate=0.05,
        max_depth=-1,
        subsample=0.8,
        colsample_bytree=0.8,
        objective='quantile',   # conservador
        alpha=0.3,
        random_state=42
    )
else:
    model = GradientBoostingRegressor(
        n_estimators=400,
        learning_rate=0.05,
        max_depth=3,
        loss='quantile',
        alpha=0.3,
        random_state=42
    )

model.fit(Xtr, ytr)
val['score'] = model.predict(Xva)
test['score'] = model.predict(Xte)

# Calibração isotônica (apenas em pares válidos)
try:
    mask = (~val['score'].isna()) & (~yva.isna())
    if mask.any():
        iso = IsotonicRegression(out_of_bounds='clip')
        iso.fit(val.loc[mask, 'score'], yva.loc[mask])
        val.loc[mask, 'score'] = 0.9 * iso.transform(val.loc[mask, 'score'])
        test['score'] = 0.9 * iso.transform(test['score'])
    else:
        val['score'] *= 0.9
        test['score'] *= 0.9
except Exception:
    val['score'] *= 0.9
    test['score'] *= 0.9

# ADTV ~ média de volume 21d por ticker
# criar coluna adtv21 já alinhada a df
adtv21 = df.groupby('ticker')['volume'].rolling(21, min_periods=10).mean().reset_index(level=0, drop=True)
df_with_adtv = df[['date','ticker']].copy()
df_with_adtv['adtv21'] = adtv21.values

val = val.merge(df_with_adtv, on=['date','ticker'], how='left')
test = test.merge(df_with_adtv, on=['date','ticker'], how='left')

# Buckets de liquidez
try:
    val['liq_bucket'] = pd.qcut(val['adtv21'], q=3, labels=['low','mid','high'])
    test['liq_bucket'] = pd.qcut(test['adtv21'], q=3, labels=['low','mid','high'])
except Exception:
    val['liq_bucket'] = 'all'
    test['liq_bucket'] = 'all'

# Simulador com quarentena e compra por seleção (usa U_star já calculado por data)
THRS = np.linspace(-0.5, 1.5, 21)  # varredura mais ampla no score


def simulate_advantage(dfp: pd.DataFrame, thr: float) -> dict:
    dfp = dfp.sort_values(['ticker','date']).copy()
    dfp['action'] = 'HOLD'
    dfp['cost'] = 0.0
    dfp['avoid'] = 0.0
    dfp['uplift'] = 0.0
    for tk, g in dfp.groupby('ticker'):
        q_until = pd.Timestamp.min
        for idx in g.index:
            d = dfp.at[idx, 'date']
            act = 'HOLD'
            cst = 0.0
            if d >= q_until and dfp.at[idx, 'score'] >= thr:
                act = 'SELL'
                cst = COST
                q_until = d + pd.tseries.offsets.BDay(Q)
            Dt = dfp.at[idx, 'D_t']
            Ustar = dfp.at[idx, 'U_star']
            # tratar NaNs
            Dt = 0.0 if pd.isna(Dt) else Dt
            Ustar = 0.0 if pd.isna(Ustar) else Ustar
            avoid = Dt if (act=='SELL') else 0.0
            uplift = (Ustar if act=='SELL' else 0.0)
            dfp.at[idx, 'action'] = act
            dfp.at[idx, 'cost'] = cst
            dfp.at[idx, 'avoid'] = avoid
            dfp.at[idx, 'uplift'] = uplift
    sold = (dfp['action']=='SELL')
    recall_big = float(np.mean(dfp.loc[dfp['D_t']>0, 'action']=='SELL')) if (dfp['D_t']>0).any() else 0.0
    precision  = float(np.mean(dfp.loc[sold, 'D_t']>0)) if sold.any() else 0.0
    total_cost = float(dfp['cost'].sum())
    total_avoid= float(dfp['avoid'].sum())
    total_upl  = float(dfp['uplift'].sum())
    turnover   = float(sold.mean())
    net_pnl    = total_avoid - total_cost + total_upl
    return {
        'thr': thr,
        'recall_big': recall_big,
        'precision': precision,
        'turnover': turnover,
        'net_PnL': net_pnl
    }

# Varredura por buckets
def sweep(dfp: pd.DataFrame):
    out = []
    for thr in THRS:
        out.append(simulate_advantage(dfp, thr))
    return pd.DataFrame(out)

val_res = val.groupby('liq_bucket', dropna=False).apply(sweep).reset_index(level=0).rename(columns={'level_0':'liq_bucket'})
print("Sweep/Val head:\n", val_res.groupby('liq_bucket').head(3))

# Escolha de τ por bucket maximizando net PnL com recall_big >= 0.6 e turnover em [1%,5%]
def pick_tau(df_res: pd.DataFrame):
    cand = df_res[(df_res['recall_big'] >= 0.6) & (df_res['turnover'].between(0.01, 0.05))]
    if len(cand)==0:
        cand = df_res.sort_values('net_PnL', ascending=False)
    return cand.iloc[0]['thr'] if len(cand)>0 else np.nan

tau_by_bucket = val_res.groupby('liq_bucket').apply(pick_tau)
print("τ por bucket:", tau_by_bucket.to_dict())

# Avaliação em Test usando τ por bucket

def eval_with_tau(dfp: pd.DataFrame, tau_map: dict):
    rows = []
    for b, grp in dfp.groupby('liq_bucket'):
        tau = tau_map.get(b, np.nan)
        if pd.isna(tau):
            continue
        rows.append(simulate_advantage(grp, tau))
    return pd.DataFrame(rows)

test_res = eval_with_tau(test, tau_by_bucket.to_dict())
print("Test per-bucket:")
print(test_res)
print("Test aggregate:", {
    'recall_big': float(test_res['recall_big'].mean()) if len(test_res) else 0.0,
    'precision': float(test_res['precision'].mean()) if len(test_res) else 0.0,
    'turnover': float(test_res['turnover'].mean()) if len(test_res) else 0.0,
    'net_PnL': float(test_res['net_PnL'].sum()) if len(test_res) else 0.0
})

  val_res = val.groupby('liq_bucket', dropna=False).apply(sweep).reset_index(level=0).rename(columns={'level_0':'liq_bucket'})
  val_res = val.groupby('liq_bucket', dropna=False).apply(sweep).reset_index(level=0).rename(columns={'level_0':'liq_bucket'})
  print("Sweep/Val head:\n", val_res.groupby('liq_bucket').head(3))
  tau_by_bucket = val_res.groupby('liq_bucket').apply(pick_tau)
  tau_by_bucket = val_res.groupby('liq_bucket').apply(pick_tau)
  for b, grp in dfp.groupby('liq_bucket'):


Sweep/Val head:
   liq_bucket  thr  recall_big  precision  turnover     net_PnL
0        low -0.5    0.211227   0.570694  0.211912  435.643420
1        low -0.4    0.211227   0.570694  0.211912  435.643420
2        low -0.3    0.211227   0.570694  0.211912  435.643420
0        mid -0.5    0.218776   0.668064  0.216633  450.894257
1        mid -0.4    0.218776   0.668064  0.216633  450.894257
2        mid -0.3    0.218776   0.668064  0.216633  450.894257
0       high -0.5    0.212556   0.643649  0.213002  442.297068
1       high -0.4    0.212556   0.643649  0.213002  442.297068
2       high -0.3    0.212556   0.643649  0.213002  442.297068
τ por bucket: {'low': 0.20000000000000007, 'mid': -0.5, 'high': -0.5}
Test per-bucket:
   thr  recall_big  precision  turnover     net_PnL
0  0.2    0.205024   0.566349  0.209980  352.043969
1 -0.5    0.221307   0.655172  0.216536  383.774710
2 -0.5    0.211654   0.636052  0.212127  369.952916
Test aggregate: {'recall_big': 0.2126613930862462, 'precis

In [2]:
# Otimização por ticker: τ_i por validação e avaliação em teste (autossuficiente)
import numpy as np
import pandas as pd
from pathlib import Path

# Constantes (cai de pé se não vierem da célula 6)
try:
    ALPHA
except NameError:
    ALPHA = 3.0
try:
    COST
except NameError:
    COST = 0.0025  # 25 bps
try:
    PHI
except NameError:
    PHI = 0.25
try:
    Q
except NameError:
    Q = 5
try:
    W
except NameError:
    W = np.array([1.0, 0.6, 0.3])
AHEAD = [1,2,3,4,5]

csv_path = Path(r"G:\Drives compartilhados\BOLSA_2026\a_bolsa2026_gemini\00_data\03_final\gold_rl_tabular.csv")

# Requisitos para per-ticker: val/test com colunas ['date','ticker','D_t','U_star','score']

def have_val_test_env():
    return ('val' in globals()) and ('test' in globals()) and all(
        all(c in globals()[name].columns for c in ['date','ticker','D_t','U_star','score'])
        for name in ['val','test']
    )

# Se não existir 'val' e 'test' prontos, reconstrói mínimos + modelo rápido para obter 'score'
if not have_val_test_env():
    try:
        from lightgbm import LGBMRegressor
        _HAS_LGBM = True
    except Exception:
        from sklearn.ensemble import GradientBoostingRegressor
        _HAS_LGBM = False
    df = pd.read_csv(csv_path)
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values(['ticker','date']).reset_index(drop=True)
    # logfwd seguros
    for k in AHEAD:
        fwd = df.groupby('ticker')['close'].shift(-k)
        ratio = (fwd / df['close']).mask((df['close']<=0) | (fwd<=0))
        df[f'logfwd{k}'] = np.log(ratio)
    cum1 = df['logfwd1']
    cum3 = df[['logfwd1','logfwd2','logfwd3']].sum(axis=1)
    cum5 = df[['logfwd1','logfwd2','logfwd3','logfwd4','logfwd5']].sum(axis=1)
    min_cum = pd.concat([cum1, cum3, cum5], axis=1).min(axis=1)
    df['D_t'] = np.maximum(0.0, -min_cum)
    U = W[0]*cum1 + W[1]*cum3 + W[2]*cum5
    best_U_by_date = U.groupby(df['date']).max()
    df = df.join(best_U_by_date.rename('U_star'), on='date')
    # δ por ticker
    delta_by_ticker = (df[df['D_t']>0].groupby('ticker')['D_t'].quantile(0.65)).rename('delta')
    df = df.join(delta_by_ticker, on='ticker')
    df['delta'] = df['delta'].fillna(df['D_t'].quantile(0.65))
    # A_t
    df['A_t'] = (1.0 + ALPHA)*df['D_t'] - COST + df['U_star'] - PHI * np.maximum(0.0, df['delta'] - df['D_t'])
    # features
    base_feats = ['z1','z2','z3','z5','vol21','vol21_pct','rvol_pct','rvol_chg','clv','dist_peak20_sigma','pct_z2_le_m1','med_vol21']
    feature_cols = [c for c in base_feats if c in df.columns]
    # split
    all_dates = np.sort(df['date'].unique())
    tr_end = all_dates[int(0.7*len(all_dates))]
    va_end = all_dates[int(0.85*len(all_dates))]
    train = df[df['date'] <= tr_end].copy()
    val   = df[(df['date'] > tr_end) & (df['date'] <= va_end)].copy()
    test  = df[df['date'] > va_end].copy()
    # train model
    train = train.dropna(subset=['A_t'])
    Xtr, ytr = train[feature_cols].fillna(0.0), train['A_t']
    Xva = val[feature_cols].fillna(0.0)
    Xte = test[feature_cols].fillna(0.0)
    if _HAS_LGBM:
        model = LGBMRegressor(
            n_estimators=400,
            learning_rate=0.05,
            max_depth=-1,
            subsample=0.8,
            colsample_bytree=0.8,
            objective='quantile',
            alpha=0.3,
            random_state=42
        )
    else:
        model = GradientBoostingRegressor(
            n_estimators=300,
            learning_rate=0.05,
            max_depth=3,
            loss='quantile',
            alpha=0.3,
            random_state=42
        )
    model.fit(Xtr, ytr)
    val['score'] = model.predict(Xva)
    test['score'] = model.predict(Xte)

# Parâmetros da varredura e critérios
THRS = np.linspace(-0.5, 1.5, 41)
TARGET_RECALL = 0.6
TURNOVER_MIN, TURNOVER_MAX = 0.01, 0.05


def simulate_advantage(dfp: pd.DataFrame, thr: float) -> dict:
    dfp = dfp.sort_values(['ticker','date']).copy()
    dfp['action'] = 'HOLD'
    dfp['cost'] = 0.0
    dfp['avoid'] = 0.0
    dfp['uplift'] = 0.0
    for tk, g in dfp.groupby('ticker'):
        q_until = pd.Timestamp.min
        for idx in g.index:
            d = dfp.at[idx, 'date']
            act = 'HOLD'
            cst = 0.0
            if d >= q_until and dfp.at[idx, 'score'] >= thr:
                act = 'SELL'
                cst = COST
                q_until = d + pd.tseries.offsets.BDay(Q)
            Dt = dfp.at[idx, 'D_t']
            Ustar = dfp.at[idx, 'U_star']
            Dt = 0.0 if pd.isna(Dt) else Dt
            Ustar = 0.0 if pd.isna(Ustar) else Ustar
            avoid = Dt if (act=='SELL') else 0.0
            uplift = (Ustar if act=='SELL' else 0.0)
            dfp.at[idx, 'action'] = act
            dfp.at[idx, 'cost'] = cst
            dfp.at[idx, 'avoid'] = avoid
            dfp.at[idx, 'uplift'] = uplift
    sold = (dfp['action']=='SELL')
    recall_big = float(np.mean(dfp.loc[dfp['D_t']>0, 'action']=='SELL')) if (dfp['D_t']>0).any() else 0.0
    precision  = float(np.mean(dfp.loc[sold, 'D_t']>0)) if sold.any() else 0.0
    total_cost = float(dfp['cost'].sum())
    total_avoid= float(dfp['avoid'].sum())
    total_upl  = float(dfp['uplift'].sum())
    turnover   = float(sold.mean())
    net_pnl    = total_avoid - total_cost + total_upl
    return {
        'thr': thr,
        'recall_big': recall_big,
        'precision': precision,
        'turnover': turnover,
        'net_PnL': net_pnl
    }

# Escolha de τ por ticker

def pick_tau_per_ticker(g: pd.DataFrame) -> float:
    res = []
    for thr in THRS:
        res.append(simulate_advantage(g, thr))
    res = pd.DataFrame(res)
    cand = res[(res['recall_big'] >= TARGET_RECALL) & (res['turnover'].between(TURNOVER_MIN, TURNOVER_MAX))]
    if len(cand)==0:
        cand = res.sort_values('net_PnL', ascending=False)
    return float(cand.iloc[0]['thr']) if len(cand)>0 else np.nan

# τ_i na validação por ticker
val_tau_by_ticker = val.groupby('ticker').apply(pick_tau_per_ticker)
print("τ_i (Val) — primeiros 10:")
print(val_tau_by_ticker.head(10))

# Avaliação em Teste com τ_i
rows = []
for tk, g in test.groupby('ticker'):
    tau_i = val_tau_by_ticker.get(tk, np.nan)
    if pd.isna(tau_i) or len(g)==0:
        continue
    rows.append({**simulate_advantage(g, tau_i), 'ticker': tk, 'tau': tau_i})

test_per_ticker = pd.DataFrame(rows)
print("Test per-ticker — primeiros 10:")
print(test_per_ticker.head(10))

# Agregados e salvamento
agg = {
    'recall_big': float(test_per_ticker['recall_big'].mean()) if len(test_per_ticker) else 0.0,
    'precision': float(test_per_ticker['precision'].mean()) if len(test_per_ticker) else 0.0,
    'turnover': float(test_per_ticker['turnover'].mean()) if len(test_per_ticker) else 0.0,
    'net_PnL': float(test_per_ticker['net_PnL'].sum()) if len(test_per_ticker) else 0.0
}
print("Test aggregate (per-ticker τ):", agg)

# Salvar resultados
out_dir = Path(r"G:\Drives compartilhados\BOLSA_2026\a_bolsa2026_gemini\04_outputs")
out_dir.mkdir(parents=True, exist_ok=True)
val_tau_by_ticker.to_csv(out_dir / 'val_tau_by_ticker.csv')
test_per_ticker.to_csv(out_dir / 'test_per_ticker_metrics.csv', index=False)
print("Arquivos salvos em:", out_dir)

  val_tau_by_ticker = val.groupby('ticker').apply(pick_tau_per_ticker)


τ_i (Val) — primeiros 10:
ticker
ABEV3.SA   -0.5
B3SA3.SA   -0.5
BBAS3.SA   -0.5
CPLE6.SA   -0.5
CSNA3.SA   -0.5
ELET3.SA   -0.5
GGBR4.SA   -0.5
HAPV3.SA   -0.5
ITUB4.SA   -0.5
LREN3.SA   -0.5
dtype: float64
Test per-ticker — primeiros 10:
   thr  recall_big  precision  turnover    net_PnL    ticker  tau
0 -0.5    0.202279   0.663551  0.208577  34.231615  ABEV3.SA -0.5
1 -0.5    0.193642   0.626168  0.208171  35.635915  B3SA3.SA -0.5
2 -0.5    0.206061   0.635514  0.208171  33.905012  BBAS3.SA -0.5
3 -0.5    0.181529   0.532710  0.208984  32.540322  CPLE6.SA -0.5
4 -0.5    0.202817   0.672897  0.208577  38.934704  CSNA3.SA -0.5
5 -0.5    0.185759   0.560748  0.208577  33.362116  ELET3.SA -0.5
6 -0.5    0.222222   0.710280  0.208577  35.642827  GGBR4.SA -0.5
7 -0.5    0.191549   0.635514  0.208577  38.712888  HAPV3.SA -0.5
8 -0.5    0.201923   0.588785  0.208171  33.022387  ITUB4.SA -0.5
9 -0.5    0.214925   0.672897  0.208171  36.723370  LREN3.SA -0.5
Test aggregate (per-ticker τ): {'r