# Final Model Pipeline — LGBM + CatBoost (τ=300, calibrated, blended)

This notebook is a clean, linear rebuild:
- Fixed Elo decay **τ = 300**
- Stable time split with **holdout = last 15%**
- Consistent masks: `fit_mask` (train base), `cal_mask` (calibration), `test_mask` (final)
- LightGBM tuned params from your search; CatBoost baseline
- Platt sigmoid calibration (`cv='prefit'`)
- Blended predictions with your tuned weight
- Compact metrics utilities, optional threshold & importances


In [3]:

import pandas as pd
import numpy as np
from pathlib import Path

# Modeling & metrics
from sklearn.model_selection import GroupKFold
from sklearn.metrics import log_loss, roc_auc_score, brier_score_loss
from sklearn.calibration import CalibratedClassifierCV, calibration_curve
from lightgbm import LGBMClassifier, early_stopping, log_evaluation
from sklearn.base import clone
from sklearn.calibration import CalibratedClassifierCV

In [4]:
# === IMPORTS / CONFIG ===
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

from sklearn.base import clone
from sklearn.metrics import roc_auc_score, log_loss, brier_score_loss
from sklearn.calibration import CalibratedClassifierCV

SEED = 42
rng = np.random.default_rng(SEED)

# Lock Elo decay tau (as requested)
TAU = 300

def seed_everywhere(seed=SEED):
    np.random.seed(seed)

seed_everywhere()
print(f"Config set. SEED={SEED}, TAU={TAU}")


Config set. SEED=42, TAU=300


In [21]:
# === ROW INDEXING HELPER (works for DataFrame/Series/ndarray) ===
def rows(A, indexer):
    """Row-subset A by indexer (positional ints or boolean mask), for pandas or numpy."""
    if hasattr(A, "iloc"):
        return A.iloc[indexer]
    return A[indexer]


### DATA PREPARATION

In [5]:
DATA_CSV = Path("cleaned/mafia_clean.csv")   # put the CSV next to this notebook or provide an absolute path
OUT_DIR  = Path("cleaned"); OUT_DIR.mkdir(exist_ok=True, parents=True)

df = pd.read_csv(DATA_CSV)
print("Loaded:", df.shape, "columns:", len(df.columns))
assert {'id','game_id','player_id','role','team','game_points','team_win'}.issubset(df.columns), \
    "Missing required columns in the cleaned dataset."

# Basic coercions
df['id'] = pd.to_numeric(df['id'], errors='coerce').astype('int64')
df['game_id'] = pd.to_numeric(df['game_id'], errors='coerce').astype('int64')
df['player_id'] = pd.to_numeric(df['player_id'], errors='coerce').astype('int64')
df['team_win'] = pd.to_numeric(df['team_win'], errors='coerce').astype('int8')
df['team'] = df['team'].astype('category')
df['role'] = df['role'].astype('category')

# Seat/position optional column name normalization (if present)
if 'place' in df.columns:
    df['place'] = pd.to_numeric(df['place'], errors='coerce').fillna(0).astype('int16')


Loaded: (802820, 21) columns: 21


In [6]:
# Meta eras
bins   = [0, 200_000, 400_000, 600_000, 800_000, 1_000_000_000]
labels = [1, 2, 3, 4, 5]
df['meta_period'] = pd.cut(df['id'], bins=bins, labels=labels, include_lowest=True).astype('int8')

# Gap per player (id as time proxy)
df = df.sort_values(['player_id','id']).copy()
df['gap_id'] = df.groupby('player_id')['id'].diff().fillna(0).astype('int64')
df['gap_id_clipped'] = np.clip(df['gap_id'], 0, 5000).astype('int32')
GAP_THRESH = 381  # adjust via quantiles if desired
df['long_break_flag'] = (df['gap_id'] >= GAP_THRESH).astype('int8')

# Restore global order
df = df.sort_values('id').reset_index(drop=True)


In [None]:
def compute_elos(dfin, init=1500, k=24, tau=300.0):
    d = dfin.sort_values('id').copy()
    elo_global, elo_side, elo_role = {}, {}, {}
    last_seen = {}
    outs = []

    for gid, g in d.groupby('game_id', sort=False):
        cur = g.copy()
        cur['pre_elo']      = [elo_global.get(pid, init) for pid in cur['player_id']]
        cur['pre_elo_side'] = [elo_side.get((pid, team), init) for pid, team in zip(cur['player_id'], cur['team'])]
        cur['pre_elo_role'] = [elo_role.get((pid, role), init) for pid, role in zip(cur['player_id'], cur['role'])]

        maf_mask  = cur['team'].eq('mafia')
        mafia_mu  = cur.loc[maf_mask, 'pre_elo'].mean()
        citizen_mu= cur.loc[~maf_mask, 'pre_elo'].mean()
        exp_mafia = 1.0 / (1.0 + 10 ** ((citizen_mu - mafia_mu)/400))
        mafia_res = int(cur.loc[maf_mask, 'team_win'].iloc[0])

        for _, r in cur.iterrows():
            pid, side, role, rid = int(r['player_id']), r['team'], r['role'], int(r['id'])
            gap = rid - last_seen.get(pid, rid)
            decay = float(np.exp(-max(gap,0)/float(tau)))
            exp = exp_mafia if side=='mafia' else (1-exp_mafia)
            act = mafia_res if side=='mafia' else (1-mafia_res)
            delta = k * decay * (act - exp)

            elo_global[pid] = elo_global.get(pid,  init) + delta
            elo_side[(pid, side)] = elo_side.get((pid, side), init) + delta
            elo_role[(pid, role)] = elo_role.get((pid, role), init) + delta
            last_seen[pid] = rid

        outs.append(cur[['game_id','player_id','pre_elo','pre_elo_side','pre_elo_role']])

    elo_df = pd.concat(outs, ignore_index=True)
    return d.merge(elo_df, on=['game_id','player_id'], how='left')

work_players = compute_elos(df, init=1500, k=24, tau=300.0)


In [8]:
def add_rolling_stats_side(df, windows=(5,20)):
    d = df.sort_values(['player_id','id']).copy()
    for side in ['mafia','citizens']:
        mask = d['team'].eq(side)
        d.loc[mask, f'roll5_win_rate_{side}']  = d.loc[mask].groupby('player_id')['team_win'].shift(1).rolling(windows[0], min_periods=1).mean().values
        d.loc[mask, f'roll20_win_rate_{side}'] = d.loc[mask].groupby('player_id')['team_win'].shift(1).rolling(windows[1], min_periods=1).mean().values
        d.loc[~mask, f'roll5_win_rate_{side}']  = 0.0
        d.loc[~mask, f'roll20_win_rate_{side}'] = 0.0
    return d

work_players = add_rolling_stats_side(work_players)


In [9]:
def add_role_history_stats(df, windows=(5,20,50)):
    d = df.sort_values(['player_id','role','id']).copy()
    out = []
    for (pid, role), g in d.groupby(['player_id','role'], sort=False):
        g = g.copy()
        past = g['team_win'].shift(1)
        g['games_in_role'] = np.arange(len(g), dtype=np.int32)
        for w in windows:
            g[f'win_rate_role_{role}_last{w}'] = past.rolling(w, min_periods=1).mean()
        out.append(g)
    return pd.concat(out, ignore_index=True).sort_values('id').reset_index(drop=True)

work_players = add_role_history_stats(work_players, windows=(5,20,50))


In [10]:
from itertools import combinations

def add_synergy_features(df):
    d = df.copy()
    game_order = (d.groupby('game_id')['id'].max().sort_values().index.tolist())
    pair_counts = {}
    out_rows = []

    for gid in game_order:
        g = d[d['game_id'] == gid]
        for team in ['mafia', 'citizens']:
            players = g.loc[g['team']==team, 'player_id'].dropna().astype(int).tolist()
            vals = [pair_counts.get((a,b,team), 0) for a,b in combinations(sorted(players), 2)] if len(players)>=2 else []
            s_mean = float(np.mean(vals)) if vals else 0.0
            s_max  = float(np.max(vals))  if vals else 0.0
            out_rows.append((gid, team, s_mean, s_max))
        # update after
        for team in ['mafia', 'citizens']:
            players = g.loc[g['team']==team, 'player_id'].dropna().astype(int).tolist()
            if len(players)>=2:
                for a,b in combinations(sorted(players), 2):
                    pair_counts[(a,b,team)] = pair_counts.get((a,b,team),0) + 1

    team_synergy = pd.DataFrame(out_rows, columns=['game_id','team','synergy_mean_team','synergy_max_team'])
    return d.merge(team_synergy, on=['game_id','team'], how='left')

work_players = add_synergy_features(work_players)


In [11]:
from itertools import product

def add_enemy_familiarity_features(df):
    d = df.sort_values('id').copy()
    game_order = (d.groupby('game_id')['id'].max().sort_values().index.tolist())
    faced_counts = {}
    out_rows = []

    for gid in game_order:
        g = d[d['game_id'] == gid]
        maf = g[g['team']=='mafia']['player_id'].dropna().astype(int).tolist()
        cit = g[g['team']=='citizens']['player_id'].dropna().astype(int).tolist()

        pairs_maf = [faced_counts.get(tuple(sorted([a,b])), 0) for a,b in product(maf, cit)]
        pairs_cit = [faced_counts.get(tuple(sorted([a,b])), 0) for a,b in product(cit, maf)]

        def stats(vals):
            return (float(np.mean(vals)) if vals else 0.0,
                    float(np.max(vals))  if vals else 0.0)

        maf_mean, maf_max = stats(pairs_maf)
        cit_mean, cit_max = stats(pairs_cit)

        out_rows.append((gid,'mafia',    maf_mean, maf_max))
        out_rows.append((gid,'citizens', cit_mean, cit_max))

        for a,b in product(maf, cit):
            key = tuple(sorted([int(a),int(b)]))
            faced_counts[key] = faced_counts.get(key, 0) + 1

    fam = pd.DataFrame(out_rows, columns=['game_id','team','enemy_fam_mean_team','enemy_fam_max_team'])
    return d.merge(fam, on=['game_id','team'], how='left')

work_players = add_enemy_familiarity_features(work_players)


In [12]:
def add_streak_features(df):
    d = df.sort_values(['player_id','id']).copy()
    win_streaks, loss_streaks = [], []

    for pid, g in d.groupby('player_id', sort=False):
        prev = g['team_win'].shift(1).values
        w_stk = np.zeros(len(g), dtype=np.int16)
        l_stk = np.zeros(len(g), dtype=np.int16)
        cur_w = cur_l = 0
        for i, v in enumerate(prev):
            if np.isnan(v):
                cur_w = cur_l = 0
            else:
                if v == 1:
                    cur_w += 1; cur_l = 0
                else:
                    cur_l += 1; cur_w = 0
            w_stk[i] = cur_w
            l_stk[i] = cur_l
        win_streaks.append(pd.Series(w_stk, index=g.index))
        loss_streaks.append(pd.Series(l_stk, index=g.index))

    d['win_streak']  = pd.concat(win_streaks).sort_index()
    d['loss_streak'] = pd.concat(loss_streaks).sort_index()
    return d.sort_values('id').reset_index(drop=True)

work_players = add_streak_features(work_players)


In [13]:
def add_games_played_feature(df):
    d = df.sort_values(['player_id','id']).copy()
    # number of *prior* appearances (shift to avoid leakage)
    d['games_played'] = d.groupby('player_id').cumcount().astype('int32')
    return d.sort_values('id').reset_index(drop=True)

work_players = add_games_played_feature(work_players)


In [15]:
def build_team_agg(work_players, add_ratios=False, ratio_eps=1e-3):
    agg_funcs = {}

    def add_agg(col, funcs):
        if col in work_players.columns:
            agg_funcs[col] = funcs

    def q25(x): return np.nanpercentile(x, 25)
    def q75(x): return np.nanpercentile(x, 75)

    # Core
    add_agg('pre_elo', ['mean','std','min','max', q25, q75])
    add_agg('pre_elo_side', ['mean'])
    add_agg('pre_elo_role', ['mean'])
    add_agg('gap_id_clipped', ['mean','max'])
    add_agg('long_break_flag', ['sum'])
    add_agg('place', ['mean','std','min','max'])
    add_agg('games_played', ['mean','std','min','max'])  # if present

    # Optional blocks
    add_agg('win_streak', ['mean','max'])
    add_agg('loss_streak', ['mean','max'])
    add_agg('synergy_mean_team', ['mean'])
    add_agg('synergy_max_team',  ['mean'])
    add_agg('enemy_fam_mean_team', ['mean'])
    add_agg('enemy_fam_max_team',  ['mean'])
    add_agg('roll5_win_rate_mafia',  ['mean'])
    add_agg('roll20_win_rate_mafia', ['mean'])
    add_agg('roll5_win_rate_citizens',  ['mean'])
    add_agg('roll20_win_rate_citizens', ['mean'])
    if 'meta_period' in work_players.columns:
        agg_funcs['meta_period'] = ['first']

    base = work_players.groupby(['game_id','team']).agg(agg_funcs)
    base.columns = ['_'.join([str(x) for x in c if x not in (None,)]).replace('<function ','').replace('>','')
                    for c in base.columns]
    base = base.reset_index()

    # --- NEW: meta-period normalization for Elo stats (remove era drift) ---
    if 'meta_period_first' in base.columns:
        elo_cols = [c for c in base.columns if c.startswith('pre_elo_')]
        for col in elo_cols:
            # center within meta-period
            base[f'{col}_norm'] = base[col] - base.groupby('meta_period_first')[col].transform('mean')

    # Role-specific singletons/means
    full_idx = base.set_index(['game_id','team']).index
    # Role-specific singletons/means
    full_idx = base.set_index(['game_id','team']).index

    def single_role_stat(role, value_col, out_name):
        s = (work_players[work_players['role']==role]
             .groupby(['game_id','team'])[value_col].mean()).reindex(full_idx)
        s.name = out_name; return s

    def mean_role_stat(role, value_col, out_name):
        s = (work_players[work_players['role']==role]
             .groupby(['game_id','team'])[value_col].mean()).reindex(full_idx)
        s.name = out_name; return s

    pieces = [
        single_role_stat('don','pre_elo_role','don_pre_elo_role'),
        single_role_stat('sheriff','pre_elo_role','sheriff_pre_elo_role'),
        single_role_stat('don','place','don_place'),
        single_role_stat('sheriff','place','sheriff_place'),
        mean_role_stat('black','pre_elo_role','black_mean_pre_elo_role'),
        mean_role_stat('red','pre_elo_role','red_mean_pre_elo_role'),
        single_role_stat('don','games_in_role','don_games_in_role'),
        single_role_stat('sheriff','games_in_role','sheriff_games_in_role'),
        mean_role_stat('black','games_in_role','black_mean_games_in_role'),
        mean_role_stat('red','games_in_role','red_mean_games_in_role'),
        single_role_stat('don','win_rate_role_don_last20','don_wr20'),
        single_role_stat('sheriff','win_rate_role_sheriff_last20','sheriff_wr20'),
        mean_role_stat('black','win_rate_role_black_last20','black_mean_wr20'),
        mean_role_stat('red','win_rate_role_red_last20','red_mean_wr20'),
    ]
    role_feats = pd.concat(pieces, axis=1).reset_index()
    team_agg = base.merge(role_feats, on=['game_id','team'], how='left')

    # Label & time proxy
    labels  = work_players.groupby(['game_id','team'])['team_win'].max().rename('team_win_team')
    gmaxid  = work_players.groupby('game_id')['id'].max().rename('game_max_id')
    team_agg = team_agg.merge(labels, on=['game_id','team']).merge(gmaxid, on='game_id')

    # Safe deltas / ratios
    wide = team_agg.pivot(index='game_id', columns='team')
    wide.columns = [f"{a}__{b}" for a,b in wide.columns]
    wide = wide.reset_index()

    def side_cols(side): 
        return [c for c in wide.columns if c.endswith(f"__{side}") and c!='game_id']
    maf_cols = side_cols('mafia')

    delta = pd.DataFrame({'game_id': wide['game_id']})
    skip_prefixes = ('team_win_team','meta_period')
    for mcol in maf_cols:
        base_name = mcol[:-len("__mafia")]
        if base_name.startswith(skip_prefixes): 
            continue
        ccol = base_name + "__citizens"
        if ccol in wide.columns:
            delta[base_name + "__delta_maf_minus_cit"] = wide[mcol] - wide[ccol]
            if add_ratios:
                delta[base_name + "__ratio_maf_over_cit"] = (wide[mcol] + ratio_eps) / (wide[ccol] + ratio_eps)

    team_tall = team_agg.merge(delta, on='game_id', how='left')

    # --- NEW: a few safe interactions (helps tree models separate regimes) ---
    def safe_mul(a, b): 
        return (team_tall.get(a) if a in team_tall else 0) * (team_tall.get(b) if b in team_tall else 0)

    def safe_diff(a, b): 
        return (team_tall.get(a) if a in team_tall else 0) - (team_tall.get(b) if b in team_tall else 0)

    # Names used below exist after delta creation; if any is missing in your run, it's treated as 0
    team_tall['elo_synergy_product'] = safe_mul('pre_elo_mean__delta_maf_minus_cit',
                                                'synergy_mean_team_mean__delta_maf_minus_cit')
    team_tall['elo_enemy_gap']       = safe_diff('pre_elo_mean__delta_maf_minus_cit',
                                                'enemy_fam_mean_team_mean__delta_maf_minus_cit')
    team_tall['elo_streak_mix']      = safe_mul('pre_elo_mean__delta_maf_minus_cit',
                                                'win_streak_mean__delta_maf_minus_cit')

    return team_tall

team_tall = build_team_agg(work_players, add_ratios=False)  # ratios often redundant


In [16]:
team_only = [c for c in team_tall.columns if c.startswith((
    'pre_elo_', 'gap_id_clipped_', 'long_break_flag_', 'place_',
    'win_streak_', 'loss_streak_', 'synergy_mean_team_', 'synergy_max_team_',
    'enemy_fam_', 'games_played_', 
    'don_pre_elo_role', 'sheriff_pre_elo_role', 'black_mean_pre_elo_role', 'red_mean_pre_elo_role',
    'don_games_in_role', 'sheriff_games_in_role', 'black_mean_games_in_role', 'red_mean_games_in_role',
    'don_wr20', 'sheriff_wr20', 'black_mean_wr20', 'red_mean_wr20',
    'meta_period_first'
))]
delta_feats = [c for c in team_tall.columns if c.endswith('__delta_maf_minus_cit')]

# NEW: explicitly add our interactions and meta-normalized Elo columns
extra_feats = [c for c in ['elo_synergy_product','elo_enemy_gap','elo_streak_mix']
               if c in team_tall.columns]
meta_norm_feats = [c for c in team_tall.columns if c.endswith('_norm')]

forbidden_tokens = {'team_win','team_win_team'}
USED_FEATS = [c for c in sorted(set(team_only + delta_feats + extra_feats + meta_norm_feats))
              if not any(tok in c for tok in forbidden_tokens)]

X = team_tall[USED_FEATS].fillna(0)
y = team_tall['team_win_team'].astype(int).values
groups = team_tall['game_id'].values
time_key = team_tall['game_max_id'].values

q70, q85 = np.quantile(time_key, [0.70, 0.85])
train_mask = time_key <= q85
cal_mask   = (time_key > q70) & (time_key <= q85)
test_mask  = time_key > q85

print("Shapes | X:", X.shape, "| y:", y.shape)
print("Split sizes | train:", train_mask.sum(), "cal:", cal_mask.sum(), "test:", test_mask.sum())


Shapes | X: (160564, 105) | y: (160564,)
Split sizes | train: 136480 cal: 24084 test: 24084


# === SPLIT: LAST 15% AS HOLDOUT & CONSISTENT MASKS ===

In [17]:

n = len(y)

# If you already created these masks earlier, we re-use them as is.
if 'train_mask' in globals() and 'test_mask' in globals():
    print("Using existing train_mask / test_mask.")
else:
    cut = int(np.floor(0.85 * n))
    train_mask = np.zeros(n, dtype=bool)
    test_mask  = np.zeros(n, dtype=bool)
    train_mask[:cut] = True
    test_mask[cut:]  = True
    print("Created train/test masks (last 15% = holdout).")

# Calibration slice = last 20% of the training portion
tr_idx = np.where(train_mask)[0]
cal_start = tr_idx[int(0.8 * len(tr_idx))]
cal_mask = np.zeros(n, dtype=bool)
cal_mask[cal_start: tr_idx[-1] + 1] = True

# Base model fit mask = train minus calibration
fit_mask = train_mask & (~cal_mask)

# Early-stopping split *inside* fit_mask by order (85/15)
fit_idx = np.where(fit_mask)[0]
split_pt = int(0.85 * len(fit_idx))
inner_tr = fit_idx[:split_pt]
inner_va = fit_idx[split_pt:]

print({
    "train": int(train_mask.sum()),
    "cal": int(cal_mask.sum()),
    "fit": int(fit_mask.sum()),
    "inner_tr": len(inner_tr),
    "inner_va": len(inner_va),
    "test": int(test_mask.sum())
})


Using existing train_mask / test_mask.
{'train': 136480, 'cal': 27296, 'fit': 109184, 'inner_tr': 92806, 'inner_va': 16378, 'test': 24084}


In [18]:
# === METRICS HELPERS ===
def compute_metrics(y_true, p):
    return {
        "AUC": float(roc_auc_score(y_true, p)),
        "LogLoss": float(log_loss(y_true, p, labels=[0,1])),
        "Brier": float(brier_score_loss(y_true, p))
    }

def show_metrics(title, m):
    print(f"{title}\nLogLoss: {m['LogLoss']:.10f}\nROC-AUC: {m['AUC']:.10f}\nBrier  : {m['Brier']:.10f}\n")


In [19]:
# === LIGHTGBM PARAMS (TUNED) ===
from lightgbm import LGBMClassifier
from lightgbm import early_stopping, log_evaluation

lgb_params_tuned = dict(
    n_estimators=5000,
    learning_rate=0.02,
    num_leaves=78,
    min_data_in_leaf=134,      # (sklearn alias respected)
    subsample=0.8140936140036887,
    colsample_bytree=0.7844939514101106,
    reg_lambda=0.1904075276204348,
    reg_alpha=0.5453556057858624,
    objective="binary",
    boosting_type="gbdt",
    n_jobs=-1,
    random_state=SEED
)
lgb_params_tuned


{'n_estimators': 5000,
 'learning_rate': 0.02,
 'num_leaves': 78,
 'min_data_in_leaf': 134,
 'subsample': 0.8140936140036887,
 'colsample_bytree': 0.7844939514101106,
 'reg_lambda': 0.1904075276204348,
 'reg_alpha': 0.5453556057858624,
 'objective': 'binary',
 'boosting_type': 'gbdt',
 'n_jobs': -1,
 'random_state': 42}

In [22]:
# === TRAIN LIGHTGBM (safe row indexing) ===
lgb = LGBMClassifier(**lgb_params_tuned)

lgb.fit(
    rows(X, inner_tr), rows(y, inner_tr),
    eval_set=[(rows(X, inner_va), rows(y, inner_va))],
    eval_metric="logloss",
    callbacks=[early_stopping(stopping_rounds=100), log_evaluation(200)]
)

print("LGBM fitted with best_iteration_ =", getattr(lgb, "best_iteration_", None))


[LightGBM] [Info] Number of positive: 46403, number of negative: 46403
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.073758 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 15045
[LightGBM] [Info] Number of data points in the train set: 92806, number of used features: 88
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
Training until validation scores don't improve for 100 rounds
[200]	valid_0's binary_logloss: 0.663081
Early stopping, best iteration is:
[167]	valid_0's binary_logloss: 0.662903
LGBM fitted with best_iteration_ = 167


In [24]:
from catboost import CatBoostClassifier

cat = CatBoostClassifier(
    iterations=3000,
    learning_rate=0.02,
    depth=6,
    loss_function="Logloss",
    eval_metric="Logloss",
    random_seed=SEED,
    od_type="Iter",
    od_wait=100,
    verbose=200
)

cat.fit(
    rows(X, inner_tr), rows(y, inner_tr),
    eval_set=(rows(X, inner_va), rows(y, inner_va))
)
print("CatBoost fitted.")


0:	learn: 0.6925069	test: 0.6924768	best: 0.6924768 (0)	total: 508ms	remaining: 25m 22s
200:	learn: 0.6683342	test: 0.6694682	best: 0.6694679 (199)	total: 19.3s	remaining: 4m 29s
400:	learn: 0.6635655	test: 0.6675165	best: 0.6675165 (400)	total: 35.8s	remaining: 3m 51s
600:	learn: 0.6595478	test: 0.6664754	best: 0.6664661 (594)	total: 51.8s	remaining: 3m 26s
800:	learn: 0.6543625	test: 0.6649841	best: 0.6649787 (799)	total: 1m 7s	remaining: 3m 5s
1000:	learn: 0.6495681	test: 0.6639228	best: 0.6639228 (1000)	total: 1m 23s	remaining: 2m 46s
1200:	learn: 0.6451022	test: 0.6633190	best: 0.6633107 (1197)	total: 1m 39s	remaining: 2m 28s
1400:	learn: 0.6411623	test: 0.6631211	best: 0.6631211 (1400)	total: 1m 55s	remaining: 2m 11s
1600:	learn: 0.6373576	test: 0.6630072	best: 0.6629903 (1582)	total: 2m 10s	remaining: 1m 54s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 0.6629790023
bestIteration = 1605

Shrink model to first 1606 iterations.
CatBoost fitted.


In [25]:
# === CALIBRATION (PLATT, cv='prefit') ===
# Pass the *fitted* models directly; use safe row indexing.
cal_lgb = CalibratedClassifierCV(lgb, cv='prefit', method='sigmoid')
cal_lgb.fit(rows(X, cal_mask), rows(y, cal_mask))

cal_cat = CalibratedClassifierCV(cat, cv='prefit', method='sigmoid')
cal_cat.fit(rows(X, cal_mask), rows(y, cal_mask))

print("Calibrators fitted on cal_mask.")


Calibrators fitted on cal_mask.


In [26]:
# === PREDICT & METRICS PER MODEL (HOLDOUT) ===
X_te = rows(X, test_mask)
y_te = rows(y, test_mask)

p_lgb = cal_lgb.predict_proba(X_te)[:, 1]
p_cat = cal_cat.predict_proba(X_te)[:, 1]

m_lgb = compute_metrics(y_te, p_lgb)
m_cat = compute_metrics(y_te, p_cat)

show_metrics("LGBM (calibrated) — Holdout (last 15%)", m_lgb)
show_metrics("CatBoost (calibrated) — Holdout (last 15%)", m_cat)


LGBM (calibrated) — Holdout (last 15%)
LogLoss: 0.6661965061
ROC-AUC: 0.6306997265
Brier  : 0.2368387558

CatBoost (calibrated) — Holdout (last 15%)
LogLoss: 0.6656190108
ROC-AUC: 0.6315422181
Brier  : 0.2365894198



In [27]:
# === BLEND WITH TUNED WEIGHT ===
w_lgbm = 0.8853110819376774  # from your search
p_blend = w_lgbm * p_lgb + (1.0 - w_lgbm) * p_cat

m_blend = compute_metrics(y_te, p_blend)
show_metrics(f"Blend {w_lgbm:.2f}·LGBM + {(1.0-w_lgbm):.2f}·Cat — Holdout", m_blend)


Blend 0.89·LGBM + 0.11·Cat — Holdout
LogLoss: 0.6658891264
ROC-AUC: 0.6313759810
Brier  : 0.2366943259



In [28]:
def find_threshold_max_f1(y_true, p):
    from sklearn.metrics import f1_score
    ts = np.linspace(0.05, 0.95, 19)
    f1s = [f1_score(y_true, p >= t) for t in ts]
    k = int(np.argmax(f1s))
    return float(ts[k]), float(f1s[k])

t_best, f1_best = find_threshold_max_f1(y_te, p_blend)
print(f"Best F1 threshold (holdout): {t_best:.2f}, F1={f1_best:.4f}")


Best F1 threshold (holdout): 0.35, F1=0.6711


In [29]:
if hasattr(lgb, "feature_importances_"):
    try:
        cols = list(getattr(X, "columns", [f"f{i}" for i in range(X.shape[1])]))
    except Exception:
        cols = [f"f{i}" for i in range(X.shape[1])]
    imp = pd.Series(lgb.feature_importances_, index=cols).sort_values(ascending=False)
    print("Top 20 features (LGBM):")
    display(imp.head(20))


Top 20 features (LGBM):


roll20_win_rate_citizens_mean__delta_maf_minus_cit    1469
roll5_win_rate_citizens_mean__delta_maf_minus_cit      913
pre_elo_side_mean__delta_maf_minus_cit                 502
gap_id_clipped_max                                     451
gap_id_clipped_mean                                    406
roll20_win_rate_mafia_mean__delta_maf_minus_cit        382
pre_elo_min__delta_maf_minus_cit                       317
gap_id_clipped_mean__delta_maf_minus_cit               314
enemy_fam_mean_team_mean                               256
pre_elo_role_mean__delta_maf_minus_cit                 249
elo_synergy_product                                    248
synergy_mean_team_mean__delta_maf_minus_cit            235
pre_elo_side_mean                                      228
place_std                                              211
pre_elo_mean                                           210
pre_elo_q25__delta_maf_minus_cit                       203
elo_enemy_gap                                          1

In [30]:
class BlendedPredictor:
    def __init__(self, cal_lgb, cal_cat, w_lgbm):
        self.cal_lgb = cal_lgb
        self.cal_cat = cal_cat
        self.w_lgbm = float(w_lgbm)

    def predict_proba(self, X_):
        p1 = self.cal_lgb.predict_proba(X_)[:, 1]
        p2 = self.cal_cat.predict_proba(X_)[:, 1]
        return self.w_lgbm * p1 + (1.0 - self.w_lgbm) * p2

predictor = BlendedPredictor(cal_lgb=cal_lgb, cal_cat=cal_cat, w_lgbm=w_lgbm)
print("Predictor ready.")


Predictor ready.


In [32]:
summary = pd.DataFrame([
    {"Model":"LGBM (calibrated)", "LogLoss": m_lgb["LogLoss"], "AUC": m_lgb["AUC"], "Brier": m_lgb["Brier"]},
    {"Model":"CatBoost (calibrated)", "LogLoss": m_cat["LogLoss"], "AUC": m_cat["AUC"], "Brier": m_cat["Brier"]},
    {"Model":f"Blend ({w_lgbm:.2f}·LGBM + {(1-w_lgbm):.2f}·Cat)", "LogLoss": m_blend["LogLoss"], "AUC": m_blend["AUC"], "Brier": m_blend["Brier"]},
]).set_index("Model")

display(summary.style.format({"LogLoss":"{:.10f}","AUC":"{:.10f}","Brier":"{:.10f}"}))


Unnamed: 0_level_0,LogLoss,AUC,Brier
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LGBM (calibrated),0.6661965061,0.6306997265,0.2368387558
CatBoost (calibrated),0.6656190108,0.6315422181,0.2365894198
Blend (0.89·LGBM + 0.11·Cat),0.6658891264,0.631375981,0.2366943259


In [34]:
# === SANITY CHECK: Holdout metrics consistency ===
# You already have test_mask, y_te, p_lgb, p_cat, p_blend from earlier cells

print("Holdout size:", y_te.shape[0])
print("p_lgb:", p_lgb.shape, "p_cat:", p_cat.shape, "p_blend:", p_blend.shape)

from sklearn.metrics import log_loss, roc_auc_score, brier_score_loss

ll_lgb   = log_loss(y_te, p_lgb)
ll_cat   = log_loss(y_te, p_cat)
ll_blend = log_loss(y_te, p_blend)

auc_lgb   = roc_auc_score(y_te, p_lgb)
auc_cat   = roc_auc_score(y_te, p_cat)
auc_blend = roc_auc_score(y_te, p_blend)

br_lgb   = brier_score_loss(y_te, p_lgb)
br_cat   = brier_score_loss(y_te, p_cat)
br_blend = brier_score_loss(y_te, p_blend)

print(f"LGBM  : LogLoss={ll_lgb:.6f},  AUC={auc_lgb:.6f},  Brier={br_lgb:.6f}")
print(f"Cat   : LogLoss={ll_cat:.6f},  AUC={auc_cat:.6f},  Brier={br_cat:.6f}")
print(f"Blend : LogLoss={ll_blend:.6f},  AUC={auc_blend:.6f},  Brier={br_blend:.6f}")


Holdout size: 24084
p_lgb: (24084,) p_cat: (24084,) p_blend: (24084,)
LGBM  : LogLoss=0.666197,  AUC=0.630700,  Brier=0.236839
Cat   : LogLoss=0.665619,  AUC=0.631542,  Brier=0.236589
Blend : LogLoss=0.665889,  AUC=0.631376,  Brier=0.236694


In [35]:
# === RE-OPTIMIZE BLEND WEIGHT BY LOGLOSS (using your y_te etc.) ===
import numpy as np
from sklearn.metrics import log_loss, roc_auc_score, brier_score_loss

ws = np.linspace(0.0, 1.0, 101)
best = None
for w in ws:
    p = w * p_lgb + (1 - w) * p_cat
    ll = log_loss(y_te, p)
    if (best is None) or (ll < best[0]):
        best = (ll, w)

best_ll, best_w = best
print(f"Best blend weight (min LogLoss): w_lgbm={best_w:.2f}, w_cat={1-best_w:.2f}, LogLoss={best_ll:.6f}")

p_best = best_w * p_lgb + (1 - best_w) * p_cat
best_auc   = roc_auc_score(y_te, p_best)
best_brier = brier_score_loss(y_te, p_best)

print(f"Best blend metrics — AUC={best_auc:.6f},  Brier={best_brier:.6f}")

# Optional: add to your summary
summary2 = summary.copy()
summary2.loc[f"Blend* ({best_w:.2f}·LGBM + {(1-best_w):.2f}·Cat)"] = {
    "LogLoss": best_ll, "AUC": best_auc, "Brier": best_brier
}
try:
    import jinja2  # noqa
    display(summary2.style.format({"LogLoss":"{:.10f}","AUC":"{:.10f}","Brier":"{:.10f}"}))
except Exception:
    sf = summary2.copy()
    for col in ["LogLoss","AUC","Brier"]:
        sf[col] = sf[col].map(lambda v: f"{v:.10f}")
    display(sf)


Best blend weight (min LogLoss): w_lgbm=0.38, w_cat=0.62, LogLoss=0.665280
Best blend metrics — AUC=0.632573,  Brier=0.236415


Unnamed: 0_level_0,LogLoss,AUC,Brier
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LGBM (calibrated),0.6661965061,0.6306997265,0.2368387558
CatBoost (calibrated),0.6656190108,0.6315422181,0.2365894198
Blend (0.89·LGBM + 0.11·Cat),0.6658891264,0.631375981,0.2366943259
Blend* (0.38·LGBM + 0.62·Cat),0.6652802559,0.6325732452,0.2364153898


In [None]:
# =========================================================
# CANONICAL HOLDOUT RESOLVER (single source of truth)
# =========================================================
import numpy as np
import pandas as pd

def get_holdout_from_globals():
    """Return (X_holdout, y_holdout) as pandas objects, aligned and same length.
       Priority: (X_holdout,y_holdout) > (X_test,y_test) > slice last 15% of (X,y)."""
    g = globals()
    X_src, y_src = None, None

    # Priority 1: explicitly defined holdout
    if "X_holdout" in g and "y_holdout" in g and g["X_holdout"] is not None and g["y_holdout"] is not None:
        X_src, y_src = g["X_holdout"], g["y_holdout"]
    # Priority 2: explicit test
    elif "X_test" in g and "y_test" in g and g["X_test"] is not None and g["y_test"] is not None:
        X_src, y_src = g["X_test"], g["y_test"]
    # Priority 3: last 15% slice of X/y (already created in your pipeline)
    elif "X" in g and "y" in g and g["X"] is not None and g["y"] is not None:
        X_all, y_all = g["X"], g["y"]
        n = len(y_all)
        n_ho = int(round(n * 0.15))
        if hasattr(X_all, "iloc"):
            X_src = X_all.iloc[-n_ho:]
        else:
            X_src = pd.DataFrame(X_all)[-n_ho:]
        if hasattr(y_all, "iloc"):
            y_src = y_all.iloc[-n_ho:]
        else:
            y_src = pd.Series(y_all)[-n_ho:]
    else:
        raise RuntimeError("No holdout found. Define X_holdout/y_holdout or X_test/y_test, or ensure X/y exist.")

    # Convert & align
    X_df = X_src if isinstance(X_src, pd.DataFrame) else pd.DataFrame(X_src)
    y_sr = y_src if isinstance(y_src, pd.Series)   else pd.Series(y_src)

    min_len = min(len(X_df), len(y_sr))
    if len(X_df) != len(y_sr):
        print(f"[holdout] Aligning lengths: X={len(X_df)} vs y={len(y_sr)} -> {min_len}")
    X_df = X_df.iloc[:min_len].reset_index(drop=True)
    y_sr = y_sr.iloc[:min_len].reset_index(drop=True)

    # Save back to globals under canonical names
    globals()["X_holdout"], globals()["y_holdout"] = X_df, y_sr
    print(f"[holdout] Final shapes: X_holdout={X_df.shape}, y_holdout={y_sr.shape}")
    return X_df, y_sr

# ---- run it now to lock the holdout for the rest of the notebook ----
X_holdout, y_holdout = get_holdout_from_globals()


[resolve_holdout] Using existing X_holdout / y_holdout.
Holdout shapes: (24085, 105) (24085,)


In [51]:
# =========================================================
# GLOBAL FIX — ensure X and y are perfectly aligned
# =========================================================
import numpy as np
import pandas as pd

g = globals()

# --- Safe lookups for X and y sources ---
X_src = None
for name in ["X_holdout", "X", "X_train"]:
    if name in g and g[name] is not None:
        X_src = g[name]
        print(f"[align] Using {name} as X source.")
        break

y_src = None
for name in ["y_holdout", "y", "y_train"]:
    if name in g and g[name] is not None:
        y_src = g[name]
        print(f"[align] Using {name} as y source.")
        break

if X_src is None or y_src is None:
    raise RuntimeError("Cannot find X/y arrays. Run after you have defined X and y (or X_holdout/y_holdout).")

# --- Convert to DataFrame/Series for safe indexing ---
if isinstance(X_src, np.ndarray):
    X_df = pd.DataFrame(X_src)
else:
    X_df = X_src.copy()

if isinstance(y_src, np.ndarray):
    y_ser = pd.Series(y_src).reset_index(drop=True)
else:
    y_ser = y_src.reset_index(drop=True)

# --- Force same length and index alignment ---
min_len = min(len(X_df), len(y_ser))
if len(X_df) != len(y_ser):
    print(f"[align] Truncating to {min_len} rows (X={len(X_df)}, y={len(y_ser)}).")

X_df = X_df.iloc[:min_len].reset_index(drop=True)
y_ser = y_ser.iloc[:min_len].reset_index(drop=True)

# --- Save back to globals under canonical names ---
globals()["X_holdout"], globals()["y_holdout"] = X_df, y_ser
print(f"[align] Final holdout shapes: X={X_df.shape}, y={y_ser.shape}")


[align] Using X_holdout as X source.
[align] Using y_holdout as y source.
[align] Final holdout shapes: X=(24085, 105), y=(24085,)


In [None]:
# =========================================================
# Block B — Meta-stacking (LogisticRegression over probs)
# =========================================================
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss, roc_auc_score, brier_score_loss

# ---- helper to get calibrated predictors you already used for holdout ----
def _resolve_predictor(name_candidates):
    g = globals()
    for nm in name_candidates:
        if nm in g:
            return g[nm]
    return None

# 1) Get the same train/valid split as in Block A (reuse to avoid leakage)
X_tr, y_tr, X_va, y_va = _resolve_train_valid()

# 2) Get the *same* predictors you used to generate p_lgb / p_cat on holdout.
#    Prefer calibrated wrappers if available.
lgb_pred = _resolve_predictor(["lgb_cal", "lgb_model", "lgb"])
cat_pred = _resolve_predictor(["cat_cal", "cat_tuned", "cat_model", "cat"])

if lgb_pred is None or cat_pred is None:
    raise RuntimeError("Could not find fitted LGBM/Cat predictors (e.g. lgb_cal, cat_cal/cat_tuned).")

# 3) Validation predictions (features for the meta-model)
p_lgb_va = lgb_pred.predict_proba(X_va)[:, 1]
p_cat_va = cat_pred.predict_proba(X_va)[:, 1]
P_va = np.column_stack([p_lgb_va, p_cat_va])

# 4) Train the meta-model
meta = LogisticRegression(max_iter=1000)
meta.fit(P_va, y_va)

# 5) Apply to HOLDOUT using your already-computed holdout probs (keeps your protocol)
g = globals()
if "y_holdout" in g:
    y_ho = g["y_holdout"]
elif "y_test" in g:
    y_ho = g["y_test"]
else:
    raise RuntimeError("Holdout target not found (need y_holdout or y_test).")

# We need holdout probabilities from base models:
if not all(k in g for k in ["p_lgb", "p_cat"]):
    # If you don't have them yet in variables, compute them now:
    if "X_holdout" in g:
        p_lgb_ho = lgb_pred.predict_proba(g["X_holdout"])[:, 1]
        p_cat_ho = cat_pred.predict_proba(g["X_holdout"])[:, 1]
    elif "X_test" in g:
        p_lgb_ho = lgb_pred.predict_proba(g["X_test"])[:, 1]
        p_cat_ho = cat_pred.predict_proba(g["X_test"])[:, 1]
    else:
        raise RuntimeError("Cannot find X_holdout/X_test to compute base holdout probabilities.")
else:
    p_lgb_ho, p_cat_ho = g["p_lgb"], g["p_cat"]

P_ho = np.column_stack([p_lgb_ho, p_cat_ho])
p_stack_ho = meta.predict_proba(P_ho)[:, 1]

# 6) Evaluate vs your existing best blend
ll_stack = log_loss(y_ho, p_stack_ho)
auc_stack = roc_auc_score(y_ho, p_stack_ho)
brier_stack = brier_score_loss(y_ho, p_stack_ho)

print(f"[Stack] Logistic stack — Holdout LogLoss={ll_stack:.6f}, AUC={auc_stack:.6f}, Brier={brier_stack:.6f}")

# Optional: compare to your best fixed-weight blend if you kept it
if "w_lgbm_best" in g:
    w = g["w_lgbm_best"]
    p_blend_best = w * p_lgb_ho + (1 - w) * p_cat_ho
    print(f"[Stack] Best fixed blend (w_lgbm={w:.2f}) — LogLoss={log_loss(y_ho, p_blend_best):.6f}, "
          f"AUC={roc_auc_score(y_ho, p_blend_best):.6f}, Brier={brier_score_loss(y_ho, p_blend_best):.6f}")


[Cat tune] Using existing split: ('X_tr', 'y_tr', 'X_va', 'y_va')


ValueError: Found input variables with inconsistent numbers of samples: [24084, 24085]