# NBA Playoffs Simulator — Model Calibration

Notebook 03. Acá entreno el XGBoost, valido que generalice bien con Leave-One-Season-Out, y verifico que las probabilidades estén calibradas. Para la simulacion Monte Carlo lo que importa no es tanto la accuracy sino que cuando el modelo diga "65%" realmente signifique 65%.

También hago feature selection porque con ~150 series de entrenamiento, meter muchos features es receta para overfitting.

In [None]:
!pip install xgboost --quiet

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os
import pickle

from xgboost import XGBClassifier
from sklearn.model_selection import LeaveOneGroupOut, StratifiedKFold
from sklearn.metrics import accuracy_score, brier_score_loss, log_loss, roc_auc_score
from sklearn.calibration import calibration_curve, CalibratedClassifierCV
from sklearn.feature_selection import mutual_info_classif

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
plt.style.use('dark_background')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

PROJECT_DIR = '/content/drive/MyDrive/nba-playoffs-simulator'
DATA_DIR = f'{PROJECT_DIR}/data'

df_training = pd.read_csv(f'{DATA_DIR}/training_matchups.csv')
df_profiles = pd.read_csv(f'{DATA_DIR}/team_profiles_2026.csv')

with open(f'{DATA_DIR}/feature_columns.txt', 'r') as f:
    ALL_FEATURE_COLS = [line.strip() for line in f.readlines() if line.strip()]

HISTORICAL_SEASONS = [
    '2015-16', '2016-17', '2017-18', '2018-19', '2019-20',
    '2020-21', '2021-22', '2022-23', '2023-24', '2024-25'
]

print(f'Training: {df_training.shape}')
print(f'Profiles: {df_profiles.shape}')
print(f'Features ({len(ALL_FEATURE_COLS)}): {ALL_FEATURE_COLS}')

## Feature selection

Con ~150 series, usar 14 features es demasiado — el modelo memoriza ruido. Regla practica: con N muestras no uses mas de N/10 a N/20 features. Eso me da entre 5 y 8.

Voy a rankear por correlacion + mutual information y probar distintas cantidades para encontrar el punto optimo.

In [None]:
available_features = [c for c in ALL_FEATURE_COLS if c in df_training.columns]

X_all = df_training[available_features].fillna(0)
y = df_training['team_a_won'].copy()
groups = df_training['season'].copy()

print(f'{X_all.shape[0]} series x {X_all.shape[1]} features')
print(f'Balance: {y.mean():.1%} favorito gana')

In [None]:
# Ranking por correlacion + mutual information
correlations = X_all.corrwith(y).abs().sort_values(ascending=False)

mi_scores = mutual_info_classif(X_all, y, random_state=42)
mi_series = pd.Series(mi_scores, index=available_features).sort_values(ascending=False)

ranking = pd.DataFrame({
    'correlation': correlations,
    'mutual_info': mi_series
})

for col in ['correlation', 'mutual_info']:
    ranking[f'{col}_norm'] = ranking[col] / ranking[col].max()
ranking['combined_score'] = (
    0.5 * ranking['correlation_norm'] + 0.5 * ranking['mutual_info_norm']
)
ranking = ranking.sort_values('combined_score', ascending=False)

for feat, row in ranking.iterrows():
    bar = '█' * int(row['combined_score'] * 30)
    print(f'{feat:<24} Corr: {row["correlation"]:.3f}  '
          f'MI: {row["mutual_info"]:.3f}  '
          f'Score: {row["combined_score"]:.3f}  {bar}')

In [None]:
# Pruebo con 2 a 10 features y valido con LOSO

logo = LeaveOneGroupOut()
baseline_acc = y.mean()

print(f'Baseline (siempre favorito): {baseline_acc:.3f}\n')

ranked_features = ranking.index.tolist()
results_by_n = []

for n_feat in range(2, min(len(ranked_features), 10) + 1):
    selected = ranked_features[:n_feat]
    X_sel = X_all[selected]

    oof_preds = np.zeros(len(y))
    oof_probs = np.zeros(len(y))

    for train_idx, test_idx in logo.split(X_sel, y, groups):
        temp_model = XGBClassifier(
            n_estimators=50, max_depth=2, learning_rate=0.05,
            subsample=0.7, colsample_bytree=0.8,
            reg_alpha=2.0, reg_lambda=3.0, min_child_weight=5,
            gamma=0.5, objective='binary:logistic',
            eval_metric='logloss', random_state=42,
            use_label_encoder=False
        )
        temp_model.fit(X_sel.iloc[train_idx], y.iloc[train_idx])
        oof_preds[test_idx] = temp_model.predict(X_sel.iloc[test_idx])
        oof_probs[test_idx] = temp_model.predict_proba(X_sel.iloc[test_idx])[:, 1]

    acc = accuracy_score(y, oof_preds)
    brier = brier_score_loss(y, oof_probs)
    auc = roc_auc_score(y, oof_probs)

    results_by_n.append({
        'n_features': n_feat,
        'features': selected,
        'accuracy': acc,
        'brier': brier,
        'auc': auc,
        'vs_baseline': acc - baseline_acc
    })

    marker = '+' if acc > baseline_acc else ' '
    print(f'  {marker} {n_feat} features -> Acc: {acc:.3f} ({acc - baseline_acc:+.3f})  '
          f'Brier: {brier:.4f}  AUC: {auc:.3f}')

df_feat_search = pd.DataFrame(results_by_n)

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 6))

ax1 = axes[0]
ax1.plot(df_feat_search['n_features'], df_feat_search['accuracy'],
         'o-', color='#64B5F6', linewidth=2, markersize=8)
ax1.axhline(y=baseline_acc, color='#FF5252', linestyle='--',
            linewidth=2, label=f'Baseline ({baseline_acc:.0%})')
ax1.set_xlabel('Numero de features', fontsize=12)
ax1.set_ylabel('Accuracy (LOSO)', fontsize=12)
ax1.set_title('Accuracy vs Features', fontsize=13, fontweight='bold')
ax1.legend(fontsize=10)

ax2 = axes[1]
ax2.plot(df_feat_search['n_features'], df_feat_search['brier'],
         's-', color='#FFB74D', linewidth=2, markersize=8)
ax2.set_xlabel('Numero de features', fontsize=12)
ax2.set_ylabel('Brier Score (menor = mejor)', fontsize=12)
ax2.set_title('Calibracion vs Features', fontsize=13, fontweight='bold')

ax3 = axes[2]
ax3.plot(df_feat_search['n_features'], df_feat_search['auc'],
         'D-', color='#81C784', linewidth=2, markersize=8)
ax3.axhline(y=0.5, color='#FF5252', linestyle='--',
            linewidth=1, alpha=0.5, label='Random (0.5)')
ax3.set_xlabel('Numero de features', fontsize=12)
ax3.set_ylabel('ROC AUC', fontsize=12)
ax3.set_title('Discriminacion vs Features', fontsize=13, fontweight='bold')
ax3.legend(fontsize=10)

plt.suptitle('Cuantos features necesita el modelo?',
             fontsize=15, fontweight='bold', y=1.03)
plt.tight_layout()
plt.savefig('feature_selection.png', dpi=150, bbox_inches='tight',
            facecolor='black')
plt.show()

In [None]:
# Elijo la config con mejor Brier entre las que superan el baseline
# Si ninguna supera el baseline en accuracy, priorizo calibracion
# porque para Monte Carlo eso es lo que importa

beats_baseline = df_feat_search[df_feat_search['accuracy'] >= baseline_acc]

if len(beats_baseline) > 0:
    best_row = beats_baseline.loc[beats_baseline['brier'].idxmin()]
    print('Encontre configs que superan el baseline.\n')
else:
    best_row = df_feat_search.loc[df_feat_search['brier'].idxmin()]
    print('Ninguna config supera el baseline en accuracy pura.')
    print('Pero para Monte Carlo lo que importa es la calibracion (Brier),\n'
          'no la prediccion binaria.\n')

FEATURE_COLS = best_row['features']
N_BEST = int(best_row['n_features'])

print(f'Mejor config: {N_BEST} features')
print(f'  Accuracy: {best_row["accuracy"]:.3f} (baseline: {baseline_acc:.3f})')
print(f'  Brier:    {best_row["brier"]:.4f}')
print(f'  AUC:      {best_row["auc"]:.3f}')
print(f'\nFeatures seleccionados:')
for i, feat in enumerate(FEATURE_COLS, 1):
    print(f'  {i}. {feat}')

## Entrenar XGBoost

Hiperparametros bastante conservadores para un dataset chico: arboles superficiales (max_depth=2), learning rate bajo (0.05), regularizacion fuerte (alpha=2, lambda=3). La idea es que no memorice sino que aprenda patrones generales.

In [None]:
X = X_all[FEATURE_COLS].copy()

model = XGBClassifier(
    n_estimators=50,
    max_depth=2,
    learning_rate=0.05,
    subsample=0.7,
    colsample_bytree=0.8,
    reg_alpha=2.0,
    reg_lambda=3.0,
    min_child_weight=5,
    gamma=0.5,
    objective='binary:logistic',
    eval_metric='logloss',
    random_state=42,
    use_label_encoder=False
)

model.fit(X, y)

# Training performance (solo como referencia, no es lo que importa)
y_pred_train = model.predict(X)
y_prob_train = model.predict_proba(X)[:, 1]

print(f'Training (referencia):')
print(f'  Accuracy:    {accuracy_score(y, y_pred_train):.3f}')
print(f'  Brier Score: {brier_score_loss(y, y_prob_train):.4f}')
print(f'  ROC AUC:     {roc_auc_score(y, y_prob_train):.3f}')

In [None]:
# Feature importance
importance = pd.DataFrame({
    'feature': FEATURE_COLS,
    'importance': model.feature_importances_
}).sort_values('importance', ascending=True)

fig, ax = plt.subplots(figsize=(10, max(5, len(FEATURE_COLS) * 0.6)))
colors = plt.cm.YlOrRd(np.linspace(0.3, 1, len(importance)))
ax.barh(importance['feature'], importance['importance'], color=colors)
ax.set_xlabel('Importancia', fontsize=12)
ax.set_title('Que features importan mas para ganar una serie de playoffs?',
             fontsize=14, fontweight='bold', pad=15)

top_feat = importance.iloc[-1]
ax.annotate(f'El mas predictivo',
            xy=(top_feat['importance'], top_feat['feature']),
            xytext=(top_feat['importance'] * 0.6, len(importance) - 1.5),
            fontsize=11, color='#FFD700', fontweight='bold',
            arrowprops=dict(arrowstyle='->', color='#FFD700', lw=1.5))

plt.tight_layout()
plt.savefig('feature_importance.png', dpi=150, bbox_inches='tight',
            facecolor='black')
plt.show()

for _, row in importance.iloc[::-1].iterrows():
    bar = '█' * int(row['importance'] * 50)
    print(f'{row["feature"]:<24} {row["importance"]:.3f}  {bar}')

## Validacion LOSO

Leave-One-Season-Out: entreno con 9 temporadas, predigo la que queda afuera. Repito para cada una. Es la forma mas honesta de validar porque simula el escenario real de predecir playoffs futuros.

In [None]:
logo = LeaveOneGroupOut()

oof_predictions = np.zeros(len(y))
oof_probabilities = np.zeros(len(y))
season_results = []

for train_idx, test_idx in logo.split(X, y, groups):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
    test_season = groups.iloc[test_idx].values[0]

    temp_model = XGBClassifier(
        n_estimators=50, max_depth=2, learning_rate=0.05,
        subsample=0.7, colsample_bytree=0.8,
        reg_alpha=2.0, reg_lambda=3.0, min_child_weight=5,
        gamma=0.5, objective='binary:logistic',
        eval_metric='logloss', random_state=42,
        use_label_encoder=False
    )
    temp_model.fit(X_train, y_train)

    preds = temp_model.predict(X_test)
    probs = temp_model.predict_proba(X_test)[:, 1]

    oof_predictions[test_idx] = preds
    oof_probabilities[test_idx] = probs

    acc = accuracy_score(y_test, preds)
    n_series = len(y_test)
    correct = int((preds == y_test.values).sum())

    season_results.append({
        'season': test_season,
        'n_series': n_series,
        'correct': correct,
        'accuracy': acc
    })

    print(f'{test_season}: {correct}/{n_series} ({acc:.0%})')

oof_acc = accuracy_score(y, oof_predictions)
oof_brier = brier_score_loss(y, oof_probabilities)
oof_auc = roc_auc_score(y, oof_probabilities)

print(f'\nGlobal (out-of-fold):')
print(f'  Accuracy:    {oof_acc:.3f}  (baseline: {baseline_acc:.3f})')
print(f'  Brier Score: {oof_brier:.4f}')
print(f'  ROC AUC:     {oof_auc:.3f}')

if oof_acc < baseline_acc:
    diff = baseline_acc - oof_acc
    print(f'\nQueda {diff:.3f} debajo del baseline en accuracy, pero para Monte Carlo')
    print(f'importa mas la calibracion. Un Brier de {oof_brier:.4f} es bueno.')

In [None]:
# Resultados por temporada
df_season_results = pd.DataFrame(season_results)

fig, ax = plt.subplots(figsize=(12, 6))

colors_bars = ['#00E676' if acc >= baseline_acc else '#64B5F6'
               for acc in df_season_results['accuracy']]

bars = ax.bar(df_season_results['season'], df_season_results['accuracy'],
              color=colors_bars, edgecolor='white', linewidth=0.5)

ax.axhline(y=baseline_acc, color='#FF5252', linestyle='--', linewidth=2,
           label=f'Baseline: siempre favorito ({baseline_acc:.0%})')
ax.axhline(y=oof_acc, color='#FFD700', linestyle='--', linewidth=2,
           label=f'Modelo promedio ({oof_acc:.0%})')

for bar, row in zip(bars, df_season_results.itertuples()):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
            f'{row.correct}/{row.n_series}',
            ha='center', va='bottom', fontsize=9, fontweight='bold')

ax.set_ylim(0, 1.15)
ax.set_ylabel('Accuracy', fontsize=12)
ax.set_title('Validacion Leave-One-Season-Out',
             fontsize=13, fontweight='bold', pad=15)
ax.legend(fontsize=10, loc='upper left')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('validation_by_season.png', dpi=150, bbox_inches='tight',
            facecolor='black')
plt.show()

## Calibracion de probabilidades

Para Monte Carlo necesito que las probabilidades sean confiables. Si el modelo dice 60% para OKC, tiene que significar que en situaciones similares OKC gana ~60% de las veces. Un modelo que siempre dice "favorito gana al 100%" tiene buena accuracy pero probabilidades inutiles para simular.

El Brier Score mide exactamente eso.

In [None]:
n_bins = 5
prob_true, prob_pred = calibration_curve(
    y, oof_probabilities, n_bins=n_bins, strategy='uniform'
)

fig, axes = plt.subplots(1, 2, figsize=(16, 7))

ax1 = axes[0]
ax1.plot([0, 1], [0, 1], 'w--', linewidth=1, alpha=0.5, label='Calibracion perfecta')
ax1.plot(prob_pred, prob_true, 's-', color='#64B5F6', linewidth=2,
         markersize=10, label='XGBoost')

x_line = np.linspace(0, 1, 100)
ax1.fill_between(x_line, x_line - 0.15, x_line + 0.15,
                 alpha=0.1, color='#00E676', label='Zona aceptable (+/-15%)')

ax1.set_xlabel('Probabilidad predicha', fontsize=12)
ax1.set_ylabel('Frecuencia real de victoria', fontsize=12)
ax1.set_title('Calibration Plot', fontsize=14, fontweight='bold')
ax1.legend(fontsize=10)
ax1.set_xlim(0, 1)
ax1.set_ylim(0, 1)

ax2 = axes[1]
ax2.hist(oof_probabilities[y == 1], bins=12, alpha=0.7,
         label='Favorito gano', color='#00E676')
ax2.hist(oof_probabilities[y == 0], bins=12, alpha=0.7,
         label='Favorito perdio', color='#FF5252')
ax2.set_xlabel('Probabilidad predicha', fontsize=12)
ax2.set_ylabel('Frecuencia', fontsize=12)
ax2.set_title('Distribucion de probabilidades', fontsize=14, fontweight='bold')
ax2.legend(fontsize=10)

plt.tight_layout()
plt.savefig('calibration_plot.png', dpi=150, bbox_inches='tight',
            facecolor='black')
plt.show()

print(f'Brier Score: {oof_brier:.4f}')
print(f'  < 0.25: aceptable | < 0.20: bueno | < 0.15: muy bueno')
print(f'\nRango de probabilidades: {oof_probabilities.min():.3f} - {oof_probabilities.max():.3f}')
print(f'Std: {oof_probabilities.std():.3f}')

## Backtest historico

Simulo los playoffs de las ultimas 3 temporadas para ver si el modelo produce rankings razonables. El criterio: que el campeon real aparezca en el top 5.

In [None]:
# Funciones de simulacion (las mismas que uso en NB04)

def simulate_series(prob_a_wins, n_games=7, rng=None):
    """Simula una serie Bo7. Retorna True si Team A gana."""
    if rng is None:
        rng = np.random.default_rng()

    wins_a, wins_b = 0, 0
    games_to_win = (n_games // 2) + 1

    # Formato 2-2-1-1-1
    home_a_games = {1, 2, 5, 7}
    home_boost = 0.03

    game_num = 0
    while wins_a < games_to_win and wins_b < games_to_win:
        game_num += 1
        p = prob_a_wins + (home_boost if game_num in home_a_games else -home_boost)
        p = np.clip(p, 0.05, 0.95)

        if rng.random() < p:
            wins_a += 1
        else:
            wins_b += 1

    return wins_a >= games_to_win


def get_matchup_probability(team_a_stats, team_b_stats, feature_cols, model):
    """Probabilidad de que Team A gane la serie."""
    row = {}
    for feat in feature_cols:
        base_feat = feat.replace('_diff', '')
        if base_feat in team_a_stats.index and base_feat in team_b_stats.index:
            val_a = team_a_stats[base_feat]
            val_b = team_b_stats[base_feat]
            if pd.notna(val_a) and pd.notna(val_b):
                if base_feat in ['DEF_RATING', 'TM_TOV_PCT']:
                    row[feat] = val_b - val_a
                else:
                    row[feat] = val_a - val_b
            else:
                row[feat] = 0
        elif feat == 'seed_diff':
            seed_a = team_a_stats.get('SEED', team_a_stats.get('PlayoffRank', 4))
            seed_b = team_b_stats.get('SEED', team_b_stats.get('PlayoffRank', 4))
            row[feat] = seed_b - seed_a
        else:
            row[feat] = 0

    X_matchup = pd.DataFrame([row])[feature_cols]
    prob = model.predict_proba(X_matchup)[0][1]
    return prob


def simulate_playoffs(teams_east, teams_west, feature_cols, model,
                      n_simulations=10000, seed=42):
    """Simula el bracket completo de playoffs n veces."""
    rng = np.random.default_rng(seed)
    r1_matchups = [(0, 7), (3, 4), (2, 5), (1, 6)]
    prob_cache = {}

    def get_prob(team_a, team_b):
        key = (team_a['TEAM_ID'], team_b['TEAM_ID'])
        if key not in prob_cache:
            prob_cache[key] = get_matchup_probability(
                team_a, team_b, feature_cols, model
            )
        return prob_cache[key]

    east_list = [teams_east.iloc[i] for i in range(len(teams_east))]
    west_list = [teams_west.iloc[i] for i in range(len(teams_west))]

    results = {team['TEAM_NAME']: {
        'champion': 0, 'finals': 0, 'conf_finals': 0, 'conf_semis': 0
    } for team in east_list + west_list}

    finals_matchups = []

    for sim in range(n_simulations):
        conf_winners = {}

        for conf_name, teams in [('East', east_list), ('West', west_list)]:
            r1_winners = []
            for seed_a, seed_b in r1_matchups:
                prob = get_prob(teams[seed_a], teams[seed_b])
                a_wins = simulate_series(prob, rng=rng)
                winner = teams[seed_a] if a_wins else teams[seed_b]
                r1_winners.append(winner)

            for w in r1_winners:
                results[w['TEAM_NAME']]['conf_semis'] += 1

            r2_winners = []
            for i in range(0, 4, 2):
                a, b = r1_winners[i], r1_winners[i+1]
                seed_a = a.get('SEED', a.get('PlayoffRank', 4))
                seed_b = b.get('SEED', b.get('PlayoffRank', 4))
                if seed_a <= seed_b:
                    prob = get_prob(a, b)
                    a_wins = simulate_series(prob, rng=rng)
                    winner = a if a_wins else b
                else:
                    prob = get_prob(b, a)
                    a_wins = simulate_series(prob, rng=rng)
                    winner = b if a_wins else a
                r2_winners.append(winner)

            for w in r2_winners:
                results[w['TEAM_NAME']]['conf_finals'] += 1

            a, b = r2_winners[0], r2_winners[1]
            seed_a = a.get('SEED', a.get('PlayoffRank', 4))
            seed_b = b.get('SEED', b.get('PlayoffRank', 4))
            if seed_a <= seed_b:
                prob = get_prob(a, b)
                a_wins = simulate_series(prob, rng=rng)
                conf_winner = a if a_wins else b
            else:
                prob = get_prob(b, a)
                a_wins = simulate_series(prob, rng=rng)
                conf_winner = b if a_wins else a

            conf_winners[conf_name] = conf_winner

        east_champ = conf_winners['East']
        west_champ = conf_winners['West']

        results[east_champ['TEAM_NAME']]['finals'] += 1
        results[west_champ['TEAM_NAME']]['finals'] += 1

        finals_matchups.append((east_champ['TEAM_NAME'], west_champ['TEAM_NAME']))

        if east_champ.get('NET_RATING', 0) >= west_champ.get('NET_RATING', 0):
            prob = get_prob(east_champ, west_champ)
            a_wins = simulate_series(prob, rng=rng)
            champion = east_champ if a_wins else west_champ
        else:
            prob = get_prob(west_champ, east_champ)
            a_wins = simulate_series(prob, rng=rng)
            champion = west_champ if a_wins else east_champ

        results[champion['TEAM_NAME']]['champion'] += 1

    df_results = pd.DataFrame(results).T
    for col in ['champion', 'finals', 'conf_finals', 'conf_semis']:
        df_results[f'{col}_pct'] = (df_results[col] / n_simulations * 100).round(2)

    df_results = df_results.sort_values('champion', ascending=False)
    return df_results, finals_matchups

In [None]:
BACKTEST_SEASONS = ['2022-23', '2023-24', '2024-25']
KNOWN_CHAMPIONS = {
    '2022-23': 'Denver Nuggets',
    '2023-24': 'Boston Celtics',
    '2024-25': None
}

# Buscar campeon 2024-25 si esta en los datos
finals_2025 = df_training[
    (df_training['season'] == '2024-25') & (df_training['round'] == 4)
]
if len(finals_2025) > 0:
    row = finals_2025.iloc[0]
    KNOWN_CHAMPIONS['2024-25'] = (
        row['team_a_abbr'] if row['team_a_won'] == 1 else row['team_b_abbr']
    )

print('Campeones para backtest:')
for s, c in KNOWN_CHAMPIONS.items():
    print(f'  {s}: {c or "(no encontrado)"}')

In [None]:
df_hist_stats = pd.read_csv(f'{DATA_DIR}/historical_team_stats.csv')
df_hist_standings = pd.read_csv(f'{DATA_DIR}/historical_standings.csv')

for season in BACKTEST_SEASONS:
    print(f'--- {season} ---')

    # Entreno sin esta temporada
    mask_train = df_training['season'] != season
    X_bt = df_training.loc[mask_train, FEATURE_COLS]
    y_bt = df_training.loc[mask_train, 'team_a_won']

    bt_model = XGBClassifier(
        n_estimators=50, max_depth=2, learning_rate=0.05,
        subsample=0.7, colsample_bytree=0.8,
        reg_alpha=2.0, reg_lambda=3.0, min_child_weight=5,
        gamma=0.5, objective='binary:logistic',
        eval_metric='logloss', random_state=42,
        use_label_encoder=False
    )
    bt_model.fit(X_bt, y_bt)

    season_stats = df_hist_stats[df_hist_stats['SEASON'] == season].copy()
    season_standings = df_hist_standings[df_hist_standings['SEASON'] == season].copy()

    if season_stats.empty:
        print(f'  Sin stats para {season}\n')
        continue

    if 'TeamID' in season_standings.columns:
        stand_merge = season_standings[['TeamID', 'Conference', 'PlayoffRank']].rename(
            columns={'TeamID': 'TEAM_ID'}
        )
        season_stats = season_stats.merge(stand_merge, on='TEAM_ID', how='left')

    if 'Conference' not in season_stats.columns or 'PlayoffRank' not in season_stats.columns:
        print(f'  Sin Conference/Seed para {season}\n')
        continue

    season_stats['SEED'] = season_stats['PlayoffRank'].astype(float)
    east = season_stats[season_stats['Conference'] == 'East'].nsmallest(8, 'SEED')
    west = season_stats[season_stats['Conference'] == 'West'].nsmallest(8, 'SEED')

    if len(east) < 8 or len(west) < 8:
        print(f'  Equipos insuficientes\n')
        continue

    bt_results, _ = simulate_playoffs(
        east, west, FEATURE_COLS, bt_model,
        n_simulations=5000, seed=42
    )

    top5 = bt_results.head(5)
    champion = KNOWN_CHAMPIONS.get(season, '?')

    print(f'  Top 5:')
    for rank, (team, row) in enumerate(top5.iterrows(), 1):
        marker = ' <-- campeon' if champion and champion in team else ''
        print(f'    {rank}. {team:<28} {row["champion_pct"]:>6.1f}%{marker}')

    if champion:
        for pos, (team, _) in enumerate(bt_results.iterrows(), 1):
            if champion in team:
                print(f'  {champion} quedo en posicion #{pos}')
                break
    print()

## Guardar modelo y config

In [None]:
import shutil
import json

MODELS_DIR = f'{PROJECT_DIR}/models'
OUTPUTS_DIR = f'{PROJECT_DIR}/outputs'
os.makedirs(MODELS_DIR, exist_ok=True)
os.makedirs(OUTPUTS_DIR, exist_ok=True)

# Modelo
model_path = f'{MODELS_DIR}/xgb_playoff_model.pkl'
with open(model_path, 'wb') as f:
    pickle.dump(model, f)
print(f'Modelo: {model_path}')

# Features seleccionados
features_path = f'{MODELS_DIR}/feature_columns.txt'
with open(features_path, 'w') as f:
    f.write('\n'.join(FEATURE_COLS))
print(f'Features ({len(FEATURE_COLS)}): {features_path}')

# Metricas
metrics = {
    'oof_accuracy': round(oof_acc, 4),
    'oof_brier_score': round(oof_brier, 4),
    'oof_roc_auc': round(oof_auc, 4),
    'baseline_accuracy': round(baseline_acc, 4),
    'n_training_samples': int(len(y)),
    'n_features': len(FEATURE_COLS),
    'features_used': list(FEATURE_COLS),
    'model_params': {
        'n_estimators': 50, 'max_depth': 2, 'learning_rate': 0.05,
        'subsample': 0.7, 'reg_alpha': 2.0, 'reg_lambda': 3.0,
        'min_child_weight': 5, 'gamma': 0.5
    }
}
with open(f'{MODELS_DIR}/validation_metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2, default=str)
print(f'Metricas: validation_metrics.json')

# Graficos a outputs
for img in ['feature_importance.png', 'validation_by_season.png',
            'calibration_plot.png', 'feature_selection.png']:
    if os.path.exists(img):
        shutil.copy(img, f'{OUTPUTS_DIR}/{img}')
        print(f'{img} -> outputs/')

print(f'\nTodo en: {PROJECT_DIR}')

---

Modelo listo. Lo que queda en Drive:
- `models/xgb_playoff_model.pkl` — el modelo entrenado
- `models/feature_columns.txt` — los features que usa
- `models/validation_metrics.json` — metricas de validacion
- `outputs/` — graficos de feature importance, validacion, calibracion

El NB04 carga el modelo y corre la simulacion Monte Carlo con los 16 equipos actuales.