# Modelo Dixon-Coles con xG

Implementacion del modelo **Dixon-Coles (1997)** modificado para usar **xG** en lugar de goles reales.

## Por que xG en vez de goles?

Los goles reales tienen mucho ruido:
- Un equipo puede meter 3 de 3 tiros un dia
- Y 0 de 15 tiros otro dia

El **xG** es un proxy de "lo que deberia haber pasado" segun la calidad de las ocasiones. Hace que las estimaciones de ataque/defensa sean mas **estables y predictivas**.

## El truco: entrenar con xG, predecir goles

- **Entrenamiento**: Usamos xG continuo (ej: 1.73, 2.15)
- **Prediccion**: El modelo predice probabilidades de goles discretos (0, 1, 2, 3...)

Asi aprovechamos la estabilidad del xG sin romper la logica de Poisson.

**Referencia**: Dixon, M. J., & Coles, S. G. (1997). Modelling association football scores and inefficiencies in the football betting market.

In [None]:
# === Imports ===
# Paquete trading-deportivo (pip install -e ../trading-deportivo)
from trading_deportivo import (
    # Data
    fetch_all_shots, fetch_match_shots, get_league_match_ids,
    shots_to_df, build_match_xg_matrix, build_matches_with_dates, clear_cache,
    # Model
    fit_dixon_coles_xg, predict_match, predict_matchday, export_predictions,
    kelly_fraction, save_model, load_model, list_models,
    # Odds
    fetch_ps3838_odds, send_telegram_alert,
    # Betting
    log_bet, log_bets_from_predictions, update_result, show_roi_stats, show_pending_bets,
    # Backtest
    evaluate_model, temporal_validation, backtest_vs_market,
)
from trading_deportivo.team_mappings import (
    normalize_team_name, normalize_ps3838_name, validate_mapping,
    get_football_data_code, get_ps3838_map, get_ps3838_league_id,
)
from trading_deportivo.config import SUPPORTED_LEAGUES

# Libs standard
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import poisson
from scipy.optimize import curve_fit
import os, ssl, io, warnings, urllib.request
from datetime import datetime, timedelta

In [None]:
# Configuracion
# Ligas disponibles: "EPL", "La_Liga", "Bundesliga", "Serie_A", "Ligue_1"
LEAGUE = "La_Liga"
SEASON = ["2024", "2025"]

# Parametros de binning para modelo xG propio (opcional)
PITCH_LENGTH_M = 100.0
PITCH_WIDTH_M = 65.0
GOAL_CENTER_Y = 0.5
BIN_WIDTH = 1.0
ALPHA = 1.0
LOW_N = 10
D_MIN, D_MAX = 2.0, 33.0

## 1. Datos y Calculo de xG

In [None]:
# Descargar datos (con cache automatico)
USE_CACHE = True  # False para forzar descarga fresca

# Obtener partidos
match_ids, raw_matches = get_league_match_ids(LEAGUE, SEASON, use_cache=USE_CACHE)
print(f"Partidos encontrados: {len(match_ids)}")

# Obtener tiros
all_shots = fetch_all_shots(match_ids, LEAGUE, SEASON, use_cache=USE_CACHE)

# Convertir a DataFrame y filtrar penales/tiros libres
df_shots = shots_to_df(all_shots)
df_shots = df_shots[~df_shots["situation"].isin(["Penalty", "DirectFreekick"])]
print(f"Tiros (open play): {len(df_shots)}")

# Para limpiar cache: clear_cache("La_Liga")

In [None]:
# OPCION: Elegir fuente de xG
# True = usar xG de Understat (mas sofisticado: incluye angulo, tipo de tiro, situacion)
# False = calcular xG con modelo exponencial simple (solo distancia)
USE_UNDERSTAT_XG = True

if USE_UNDERSTAT_XG:
    # Usar xG de Understat directamente
    df_shots["xg"] = df_shots["xg_understat"]
    print("Usando xG de Understat (incluye angulo, tipo de tiro, situacion de juego)")
else:
    # Calcular xG con modelo exponencial (solo distancia)
    # Estandarizar coordenadas (todos los tiros hacia X=1)
    mask = df_shots["X"] < 0.5
    df_shots.loc[mask, "X"] = 1 - df_shots.loc[mask, "X"]
    
    # Calcular distancias
    df_shots["dist_long_m"] = (1.0 - df_shots["X"]) * PITCH_LENGTH_M
    df_shots["dist_lateral_m"] = np.abs(df_shots["Y"] - GOAL_CENTER_Y) * PITCH_WIDTH_M
    df_shots["dist_euclid_m"] = np.sqrt(df_shots["dist_long_m"]**2 + df_shots["dist_lateral_m"]**2)
    
    # Binning y ajuste exponencial
    bins = np.arange(0.0, np.ceil(df_shots["dist_euclid_m"].max()) + BIN_WIDTH, BIN_WIDTH)
    df_shots["dist_bin"] = pd.cut(df_shots["dist_euclid_m"], bins=bins, right=False)
    bin_stats = (
        df_shots.groupby("dist_bin", observed=True)
        .agg(shots=("is_goal", "size"), goals=("is_goal", "sum"),
             d_min=("dist_euclid_m", "min"), d_max=("dist_euclid_m", "max"))
        .dropna(subset=["d_min", "d_max"]).reset_index(drop=True)
    )
    bin_stats["d_mid"] = 0.5 * (bin_stats["d_min"] + bin_stats["d_max"])
    bin_stats["p_goal"] = (bin_stats["goals"] + ALPHA) / (bin_stats["shots"] + 2 * ALPHA)
    
    fit_bins = bin_stats[(bin_stats["shots"] >= LOW_N) & 
                         (bin_stats["d_mid"] >= D_MIN) & 
                         (bin_stats["d_mid"] <= D_MAX)].copy()
    
    def exp_decay_xg(d, k, a, b):
        return np.exp(-d / k) * a + b
    
    popt, _ = curve_fit(exp_decay_xg, fit_bins["d_mid"].values, fit_bins["p_goal"].values,
                        p0=[5.0, 0.9, 0.03], sigma=1/np.sqrt(fit_bins["shots"].values), maxfev=5000)
    k_opt, a_opt, b_opt = popt
    
    df_shots["xg"] = np.clip(exp_decay_xg(df_shots["dist_euclid_m"], k_opt, a_opt, b_opt), 1e-9, 1 - 1e-9)
    print(f"Usando xG calculado: xG = e^(-d/{k_opt:.2f}) * {a_opt:.4f} + {b_opt:.4f}")

df_shots["venue"] = df_shots["h_a"].map({"h": "home", "a": "away"})

# Comparar xG Understat vs calculado (si ambos disponibles)
if "xg_understat" in df_shots.columns and not USE_UNDERSTAT_XG:
    corr = df_shots["xg"].corr(df_shots["xg_understat"])
    print(f"\nCorrelacion xG calculado vs Understat: {corr:.3f}")

print(f"\nxG aplicado a {len(df_shots)} tiros")
print(f"xG promedio por tiro: {df_shots['xg'].mean():.3f}")
print(f"xG total temporada: {df_shots['xg'].sum():.1f}")

In [None]:
# Construir matriz de xG por partido
match_xg = build_match_xg_matrix(df_shots, raw_matches)
print(f"Partidos con xG: {len(match_xg)}")
print(f"\nPromedio xG local: {match_xg['home_xg'].mean():.2f}")
print(f"Promedio xG visitante: {match_xg['away_xg'].mean():.2f}")
if "datetime" in match_xg.columns:
    print(f"Rango fechas: {match_xg['datetime'].min().date()} a {match_xg['datetime'].max().date()}")
match_xg.head(10)

## 2. Matematicas del Modelo Dixon-Coles con xG

### Modelo Clasico (con goles)

El modelo original usa goles discretos y Poisson PMF:

$$P(X=x, Y=y) = \tau(x,y,\lambda,\mu,\rho) \cdot \frac{\lambda^x e^{-\lambda}}{x!} \cdot \frac{\mu^y e^{-\mu}}{y!}$$

### Modelo Modificado (con xG)

En lugar de maximizar la probabilidad de goles observados, **minimizamos el error cuadratico** entre:
- $\lambda_{ij}$ (goles esperados segun el modelo)
- $xG_{ij}$ (xG observado en el partido)

Esto es equivalente a una **likelihood Gaussiana**:

$$\mathcal{L} = -\sum_{partidos} \left[ (\lambda_{ij} - xG_{home})^2 + (\mu_{ij} - xG_{away})^2 \right]$$

Donde:
- $\lambda_{ij} = \alpha_i \cdot \beta_j \cdot \gamma$ (local)
- $\mu_{ij} = \alpha_j \cdot \beta_i$ (visitante)

### Prediccion

Para predecir, usamos los parametros estimados para calcular $\lambda$ y $\mu$, y luego aplicamos **Poisson discreto** para obtener probabilidades de 0, 1, 2, 3... goles.

## 3. Entrenar / Cargar Modelo

In [None]:
# Ajustar modelo (pasamos raw_matches para estimar rho)
model = fit_dixon_coles_xg(match_xg, raw_matches=raw_matches, reg=0.001)

print(f"\n{'='*50}")
print(f"Convergencia: {model['converged']}")
print(f"Mensaje: {model['message']}")
print(f"MSE (error cuadratico medio): {model['mse']:.4f}")
print(f"Ventaja local (gamma): {model['gamma']:.3f}")
print(f"Correlacion (rho): {model['rho']:.4f} (estimado)")

In [None]:
# Guardar modelo entrenado
# save_model(model, league=LEAGUE)

# Cargar modelo guardado (alternativa a entrenar)
# model = load_model(latest=True, league=LEAGUE)

# Ver modelos disponibles
# list_models(LEAGUE)

In [None]:
# Tabla de parametros ordenada por ataque
params_df = model["params_df"].copy()
params_df = params_df.sort_values("alpha_attack", ascending=False).reset_index(drop=True)
params_df.index = params_df.index + 1  # Ranking desde 1
params_df["alpha_attack"] = params_df["alpha_attack"].round(3)
params_df["beta_defense"] = params_df["beta_defense"].round(3)

print("Parametros del modelo (ordenados por ataque):")
print("alpha > 1 = ataque fuerte, beta < 1 = defensa fuerte")
params_df

## 3.5 Diagnosticos del Modelo

### Test de Overdispersion

Poisson asume que varianza = media. Si varianza >> media, el modelo subestima la incertidumbre y podria necesitar Negative Binomial.

In [None]:
# Test de Overdispersion: comparar varianza vs media de goles
def test_overdispersion(raw_matches, teams):
    """
    Comprueba si los goles tienen overdispersion (var >> mean).
    Ratio > 1.5 sugiere considerar Negative Binomial.
    """
    team_idx = {team: i for i, team in enumerate(teams)}
    
    home_goals = []
    away_goals = []
    
    for m in raw_matches:
        if not m.get("isResult"):
            continue
        home_team = m.get("h", {}).get("title")
        away_team = m.get("a", {}).get("title")
        if home_team not in team_idx or away_team not in team_idx:
            continue
        home_goals.append(int(m.get("goals", {}).get("h", 0)))
        away_goals.append(int(m.get("goals", {}).get("a", 0)))
    
    all_goals = home_goals + away_goals
    
    print("TEST DE OVERDISPERSION")
    print("=" * 50)
    print(f"\nGoles locales:")
    print(f"  Media: {np.mean(home_goals):.3f}")
    print(f"  Varianza: {np.var(home_goals):.3f}")
    print(f"  Ratio (var/mean): {np.var(home_goals)/np.mean(home_goals):.3f}")
    
    print(f"\nGoles visitantes:")
    print(f"  Media: {np.mean(away_goals):.3f}")
    print(f"  Varianza: {np.var(away_goals):.3f}")
    print(f"  Ratio (var/mean): {np.var(away_goals)/np.mean(away_goals):.3f}")
    
    print(f"\nTodos los goles:")
    print(f"  Media: {np.mean(all_goals):.3f}")
    print(f"  Varianza: {np.var(all_goals):.3f}")
    print(f"  Ratio (var/mean): {np.var(all_goals)/np.mean(all_goals):.3f}")
    
    ratio = np.var(all_goals) / np.mean(all_goals)
    if ratio < 1.2:
        print(f"\n✓ Poisson es adecuado (ratio {ratio:.2f} < 1.2)")
    elif ratio < 1.5:
        print(f"\n~ Poisson es aceptable pero con leve overdispersion (ratio {ratio:.2f})")
    else:
        print(f"\n⚠ Considerar Negative Binomial (ratio {ratio:.2f} > 1.5)")
    
    return ratio

overdispersion_ratio = test_overdispersion(raw_matches, model["teams"])

## 4. Prediccion

Aunque entrenamos con xG continuo, la prediccion usa **Poisson discreto** para calcular probabilidades de goles enteros (0, 1, 2, 3...).

In [None]:
# Ejemplo de prediccion
HOME = "Borussia Dortmund"
AWAY = "Mainz 05"

pred = predict_match(HOME, AWAY, model)

print(f"\n{HOME} vs {AWAY}")
print("=" * 40)
print(f"Goles esperados {HOME}: {pred['lambda_home']:.2f}")
print(f"Goles esperados {AWAY}: {pred['mu_away']:.2f}")
print(f"\nProbabilidades 1X2:")
print(f"  1 ({HOME}): {pred['p_home']*100:.1f}%")
print(f"  X (Empate):       {pred['p_draw']*100:.1f}%")
print(f"  2 ({AWAY}): {pred['p_away']*100:.1f}%")
print(f"\nOver/Under:")
print(f"  O1.5: {pred['p_over_15']*100:.1f}%  |  O2.5: {pred['p_over_25']*100:.1f}%  |  O3.5: {pred['p_over_35']*100:.1f}%")
print(f"\nBTTS: {pred['p_btts_yes']*100:.1f}%")
print(f"Marcador mas probable: {pred['most_likely_score']}")

# Mostrar mapa de calor
#plot_score_heatmap(pred)

In [None]:
def plot_score_heatmap(pred, max_show=6):
    """
    Visualiza matriz de probabilidades de marcadores.
    
    Uso:
        pred = predict_match("Borussia Dortmund", "Mainz 05", model)
        plot_score_heatmap(pred)
    """
    matrix = pred["score_matrix"][:max_show, :max_show]
    
    fig, ax = plt.subplots(figsize=(8, 6))
    im = ax.imshow(matrix, cmap="YlOrRd")
    
    ax.set_xticks(range(max_show))
    ax.set_yticks(range(max_show))
    ax.set_xlabel(f"Goles {pred['away_team']}")
    ax.set_ylabel(f"Goles {pred['home_team']}")
    ax.set_title(f"{pred['home_team']} vs {pred['away_team']}\nProbabilidad de cada marcador")
    
    for i in range(max_show):
        for j in range(max_show):
            text = ax.text(j, i, f"{matrix[i, j]*100:.1f}%",
                          ha="center", va="center", 
                          color="white" if matrix[i, j] > 0.08 else "black",
                          fontsize=9)
    
    plt.colorbar(im, label="Probabilidad")
    plt.tight_layout()
    plt.show()

# Para usar: plot_score_heatmap(pred)

## 5. Visualizaciones

In [None]:
# Scatter: Ataque vs Defensa
params_df = model["params_df"].copy()

fig, ax = plt.subplots(figsize=(10, 8))

ax.scatter(params_df["alpha_attack"], 1/params_df["beta_defense"], s=100, alpha=0.7)

for _, row in params_df.iterrows():
    ax.annotate(row["team"], 
                (row["alpha_attack"], 1/row["beta_defense"]),
                xytext=(5, 5), textcoords="offset points", fontsize=8)

ax.axvline(x=1, color="gray", linestyle="--", alpha=0.5, label="Ataque promedio")
ax.axhline(y=1, color="gray", linestyle="--", alpha=0.5, label="Defensa promedio")

ax.set_xlabel("Fortaleza Ofensiva (alpha)")
ax.set_ylabel("Fortaleza Defensiva (1/beta)")
ax.set_title("Mapa de Equipos: Ataque vs Defensa\n(Arriba-derecha = mejor equipo)")
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Rankings Top 5
params_df = model["params_df"].copy()

top_attack = params_df.nlargest(5, "alpha_attack")[["team", "alpha_attack"]]
top_attack.columns = ["Equipo", "Ataque (alpha)"]

top_defense = params_df.nsmallest(5, "beta_defense")[["team", "beta_defense"]]
top_defense.columns = ["Equipo", "Defensa (beta)"]

print("TOP 5 ATAQUES (mayor alpha = mas goles esperados)")
print(top_attack.to_string(index=False))
print("\nTOP 5 DEFENSAS (menor beta = menos goles recibidos)")
print(top_defense.to_string(index=False))

In [None]:
def predecir_partido(local: str, visitante: str):
    """
    Funcion interactiva para predecir cualquier partido.
    
    Uso:
        predecir_partido("Borussia Dortmund", "Manchester City")
    """
    try:
        pred = predict_match(local, visitante, model)
        
        print(f"\n{'='*50}")
        print(f"{local} vs {visitante}")
        print(f"{'='*50}")
        print(f"\nGoles esperados (xG-based):")
        print(f"  {local}: {pred['lambda_home']:.2f}")
        print(f"  {visitante}: {pred['mu_away']:.2f}")
        print(f"\nProbabilidades 1X2:")
        print(f"  1 (Local):   {pred['p_home']*100:5.1f}%")
        print(f"  X (Empate):  {pred['p_draw']*100:5.1f}%")
        print(f"  2 (Visita): {pred['p_away']*100:5.1f}%")
        
        matrix = pred["score_matrix"]
        max_idx = np.unravel_index(matrix.argmax(), matrix.shape)
        print(f"\nMarcador mas probable: {max_idx[0]}-{max_idx[1]} ({matrix[max_idx]*100:.1f}%)")
        
        plot_score_heatmap(pred)
        
    except ValueError as e:
        print(f"Error: {e}")
        print(f"\nEquipos disponibles:")
        for t in sorted(model["teams"]):
            print(f"  - {t}")

print("Equipos disponibles para prediccion:")
for i, t in enumerate(sorted(model["teams"])):
    print(f"  {i+1:2d}. {t}")

In [None]:
# Ejemplo de uso interactivo
predecir_partido("Borussia Dortmund", "Mainz 05")

## 6. Validacion del Modelo

Metricas de calibracion para evaluar la calidad de las predicciones probabilisticas:

- **Log Loss**: Penaliza fuertemente las predicciones confiadas pero incorrectas
- **Brier Score**: Error cuadratico medio de las probabilidades (menor = mejor)
- **Calibracion**: Las probabilidades predichas deben coincidir con las frecuencias observadas

In [None]:
# Evaluar modelo con metricas de calibracion
validation = evaluate_model(model, raw_matches)

In [None]:
def plot_calibration(validation_data, n_bins=10):
    """
    Grafico de calibracion: probabilidad predicha vs frecuencia observada.
    Una linea diagonal perfecta indica calibracion optima.
    """
    if validation_data is None:
        print("No hay datos de validacion")
        return
    
    df = validation_data["data"]
    
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
    outcomes = ["p_away", "p_draw", "p_home"]
    labels = ["Victoria Visitante", "Empate", "Victoria Local"]
    outcome_idx = [0, 1, 2]
    
    for ax, prob_col, label, oidx in zip(axes, outcomes, labels, outcome_idx):
        # Crear bins de probabilidad
        df["bin"] = pd.cut(df[prob_col], bins=n_bins, labels=False)
        
        calibration = df.groupby("bin").agg(
            mean_pred=(prob_col, "mean"),
            freq_obs=("outcome", lambda x: (x == oidx).mean()),
            count=("outcome", "size")
        ).dropna()
        
        # Solo mostrar bins con suficientes muestras
        calibration = calibration[calibration["count"] >= 3]
        
        if len(calibration) > 0:
            ax.scatter(calibration["mean_pred"], calibration["freq_obs"], 
                      s=calibration["count"]*5, alpha=0.7)
            ax.plot([0, 1], [0, 1], "k--", alpha=0.5, label="Calibracion perfecta")
            ax.set_xlabel("Probabilidad predicha")
            ax.set_ylabel("Frecuencia observada")
            ax.set_title(label)
            ax.set_xlim(0, 1)
            ax.set_ylim(0, 1)
            ax.legend()
            ax.grid(True, alpha=0.3)
    
    plt.suptitle("Calibracion del Modelo\n(tamano del punto = numero de partidos)", y=1.02)
    plt.tight_layout()
    plt.show()

if validation:
    plot_calibration(validation)

## 7. Simulacion Monte Carlo y Value Betting

La simulacion Monte Carlo genera miles de partidos virtuales para:
- Obtener distribuciones de probabilidad mas robustas
- Calcular intervalos de confianza
- Identificar apuestas con valor esperado positivo (value bets)

In [None]:
def simulate_match_monte_carlo(home_team, away_team, model, n_sim=10000):
    """
    Simula un partido n_sim veces usando Poisson.
    
    Returns:
        dict con probabilidades simuladas e intervalos de confianza
    """
    teams = model["teams"]
    team_idx = {team: i for i, team in enumerate(teams)}
    
    home_i = team_idx[home_team]
    away_i = team_idx[away_team]
    
    # Goles esperados
    lambda_home = model["alphas"][home_i] * model["betas"][away_i] * model["gamma"]
    mu_away = model["alphas"][away_i] * model["betas"][home_i]
    
    # Simular partidos
    home_goals = np.random.poisson(lambda_home, size=n_sim)
    away_goals = np.random.poisson(mu_away, size=n_sim)
    
    # Calcular resultados
    home_wins = np.sum(home_goals > away_goals)
    draws = np.sum(home_goals == away_goals)
    away_wins = np.sum(home_goals < away_goals)
    
    # Probabilidades
    p_home = home_wins / n_sim
    p_draw = draws / n_sim
    p_away = away_wins / n_sim
    
    # Total de goles
    total_goals = home_goals + away_goals
    
    return {
        "home_team": home_team,
        "away_team": away_team,
        "lambda_home": lambda_home,
        "mu_away": mu_away,
        "p_home": p_home,
        "p_draw": p_draw,
        "p_away": p_away,
        "mean_total_goals": np.mean(total_goals),
        "std_total_goals": np.std(total_goals),
        "p_over_2_5": np.mean(total_goals > 2.5),
        "p_over_1_5": np.mean(total_goals > 1.5),
        "p_btts": np.mean((home_goals > 0) & (away_goals > 0)),  # Both teams to score
        "home_goals_sim": home_goals,
        "away_goals_sim": away_goals
    }


def find_value_bets(home_team, away_team, model, odds_home, odds_draw, odds_away):
    """
    Identifica apuestas con valor positivo.
    
    Args:
        odds_home, odds_draw, odds_away: Cuotas decimales de la casa
    
    Returns:
        DataFrame con analisis de valor
    """
    pred = predict_match(home_team, away_team, model)
    
    bets = []
    outcomes = [
        ("1 (Local)", pred["p_home"], odds_home),
        ("X (Empate)", pred["p_draw"], odds_draw),
        ("2 (Visita)", pred["p_away"], odds_away)
    ]
    
    for name, prob, odds in outcomes:
        implied = 1 / odds
        value = prob - implied
        ev = prob * (odds - 1) - (1 - prob)  # Expected value per unit
        kelly = kelly_fraction(prob, odds)
        
        bets.append({
            "Apuesta": name,
            "Prob Modelo": f"{prob:.1%}",
            "Prob Implicita": f"{implied:.1%}",
            "Value": f"{value:+.1%}",
            "EV (por unidad)": f"{ev:+.3f}",
            "Kelly (25%)": f"{kelly:.1%}" if kelly > 0 else "-"
        })
    
    df = pd.DataFrame(bets)
    
    print(f"\n{home_team} vs {away_team}")
    print("=" * 60)
    print(df.to_string(index=False))
    
    # Resumen
    value_bets = [b for b in bets if float(b["Value"].replace("%", "").replace("+", "")) > 0]
    if value_bets:
        print(f"\n✓ Value bets encontradas: {len(value_bets)}")
    else:
        print(f"\n✗ No hay value bets con estas cuotas")
    
    return df


# Ejemplo de simulacion Monte Carlo
HOME = "Borussia Dortmund"
AWAY = "Mainz 05"

print(f"SIMULACION MONTE CARLO: {HOME} vs {AWAY}")
print("=" * 50)

sim = simulate_match_monte_carlo(HOME, AWAY, model, n_sim=50000)

print(f"\nGoles esperados:")
print(f"  {HOME}: {sim['lambda_home']:.2f}")
print(f"  {AWAY}: {sim['mu_away']:.2f}")

print(f"\nProbabilidades 1X2 (simuladas):")
print(f"  1 (Local):  {sim['p_home']:.1%}")
print(f"  X (Empate): {sim['p_draw']:.1%}")
print(f"  2 (Visita): {sim['p_away']:.1%}")

print(f"\nMercados adicionales:")
print(f"  Over 2.5: {sim['p_over_2_5']:.1%}")
print(f"  Over 1.5: {sim['p_over_1_5']:.1%}")
print(f"  BTTS (Ambos marcan): {sim['p_btts']:.1%}")
print(f"  Media goles totales: {sim['mean_total_goals']:.2f} +/- {sim['std_total_goals']:.2f}")

In [None]:
# Ejemplo de analisis de value bets
# Cuotas de ejemplo (sustituir por cuotas reales de una casa de apuestas)
print("\nANALISIS DE VALUE BETS")
print("(Ejemplo con cuotas ficticias - usar cuotas reales para aplicar)")

find_value_bets("Borussia Dortmund", "Mainz 05", model, 
                odds_home=3.28,   # Cuota victoria local
                odds_draw=2.23,   # Cuota empate
                odds_away=3.56)   # Cuota victoria visitante

In [None]:
# Visualizacion de la distribucion de goles (Monte Carlo)
def plot_goals_distribution(sim):
    """Histograma de goles simulados."""
    fig, axes = plt.subplots(1, 3, figsize=(14, 4))
    
    # Goles local
    axes[0].hist(sim["home_goals_sim"], bins=range(0, 10), density=True, 
                 alpha=0.7, color="blue", edgecolor="black")
    axes[0].axvline(sim["lambda_home"], color="red", linestyle="--", 
                    label=f"Lambda = {sim['lambda_home']:.2f}")
    axes[0].set_xlabel("Goles")
    axes[0].set_ylabel("Probabilidad")
    axes[0].set_title(f"Goles {sim['home_team']}")
    axes[0].legend()
    
    # Goles visitante
    axes[1].hist(sim["away_goals_sim"], bins=range(0, 10), density=True,
                 alpha=0.7, color="orange", edgecolor="black")
    axes[1].axvline(sim["mu_away"], color="red", linestyle="--",
                    label=f"Mu = {sim['mu_away']:.2f}")
    axes[1].set_xlabel("Goles")
    axes[1].set_title(f"Goles {sim['away_team']}")
    axes[1].legend()
    
    # Total de goles
    total = sim["home_goals_sim"] + sim["away_goals_sim"]
    axes[2].hist(total, bins=range(0, 12), density=True,
                 alpha=0.7, color="green", edgecolor="black")
    axes[2].axvline(2.5, color="red", linestyle="--", label="Linea 2.5")
    axes[2].axvline(sim["mean_total_goals"], color="purple", linestyle="--",
                    label=f"Media = {sim['mean_total_goals']:.2f}")
    axes[2].set_xlabel("Goles")
    axes[2].set_title("Total de Goles")
    axes[2].legend()
    
    plt.suptitle(f"Distribucion de Goles: {sim['home_team']} vs {sim['away_team']}\n(Monte Carlo, 50000 simulaciones)")
    plt.tight_layout()
    plt.show()

plot_goals_distribution(sim)

## 11. Validación Temporal Out-of-Sample (Opcional)

⚠️ **Esta sección tarda ~14 minutos** porque re-entrena el modelo múltiples veces con ventanas deslizantes.

Sáltala si solo quieres generar predicciones. Ejecútala cuando quieras validar el rendimiento histórico del modelo.

---

Esta es la prueba definitiva: ¿el modelo puede predecir partidos que **nunca vio**?

### Metodología
1. Ordenamos partidos por fecha
2. Entrenamos SOLO con partidos antiguos
3. Predecimos partidos futuros (que el modelo nunca vio)
4. Comparamos predicciones vs resultados reales

Esto simula el uso real del modelo.

In [None]:
# Construir dataset con fechas
df_matches = build_matches_with_dates(raw_matches, df_shots)

print(f"Partidos con fecha y xG: {len(df_matches)}")
print(f"Rango de fechas: {df_matches['datetime'].min().date()} a {df_matches['datetime'].max().date()}")
print(f"\nPrimeros 5 partidos:")
df_matches.head()

In [None]:
# ⚠️ SKIP FLAG - Cambia a False para ejecutar la validación (tarda ~14 min)
SKIP_TEMPORAL_VALIDATION = True

if not SKIP_TEMPORAL_VALIDATION:
    val_results = temporal_validation(df_matches, raw_matches, train_ratio=0.7, reg=0.001)
else:
    print("⏭️ Validación temporal SALTADA (SKIP_TEMPORAL_VALIDATION = True)")
    print("   Cambia a False en la línea 2 para ejecutar (~14 min)")
    val_results = None

In [None]:
# Validación con ventana deslizante (más robusta)
def rolling_validation(df_matches, min_train_matches=100, step_matches=10, reg=0.001):
    """
    Validación con ventana deslizante: simula el uso real del modelo.
    
    En cada paso:
    1. Entrena con todos los partidos hasta fecha X
    2. Predice los siguientes `step_matches` partidos
    3. Avanza la ventana y repite
    
    Esto es más realista porque:
    - El modelo se re-entrena periódicamente (como harías en la práctica)
    - Evalúa en múltiples puntos temporales (no solo un split)
    """
    df = df_matches.sort_values("datetime").reset_index(drop=True)
    
    print("=" * 60)
    print("VALIDACIÓN CON VENTANA DESLIZANTE")
    print("=" * 60)
    print(f"Partidos totales: {len(df)}")
    print(f"Mínimo para entrenar: {min_train_matches}")
    print(f"Paso de evaluación: {step_matches} partidos")
    
    all_predictions = []
    n_windows = 0
    
    # Empezar después de tener suficientes partidos para entrenar
    for start_test in range(min_train_matches, len(df), step_matches):
        end_test = min(start_test + step_matches, len(df))
        
        train_df = df.iloc[:start_test].copy()
        test_df = df.iloc[start_test:end_test].copy()
        
        if len(test_df) == 0:
            break
        
        # Verificar equipos
        train_teams = set(train_df["home_team"]) | set(train_df["away_team"])
        test_df = test_df[
            test_df["home_team"].isin(train_teams) & 
            test_df["away_team"].isin(train_teams)
        ]
        
        if len(test_df) == 0:
            continue
        
        # Entrenar modelo (silencioso)
        train_xg = train_df[["match_id", "home_team", "away_team", "home_xg", "away_xg"]]
        train_match_ids = set(train_df["match_id"])
        train_raw = [m for m in raw_matches if str(m.get("id")) in train_match_ids]
        
        import io, sys
        old_stdout = sys.stdout
        sys.stdout = io.StringIO()
        model_window = fit_dixon_coles_xg(train_xg, raw_matches=train_raw, reg=reg)
        sys.stdout = old_stdout
        
        # Evaluar en test
        for _, row in test_df.iterrows():
            ht, at = row["home_team"], row["away_team"]
            hg, ag = row["home_goals"], row["away_goals"]
            
            if hg > ag:
                outcome = 2
            elif hg < ag:
                outcome = 0
            else:
                outcome = 1
            
            try:
                pred = predict_match(ht, at, model_window)
                all_predictions.append({
                    "window": n_windows,
                    "date": row["datetime"],
                    "home": ht,
                    "away": at,
                    "p_home": pred["p_home"],
                    "p_draw": pred["p_draw"],
                    "p_away": pred["p_away"],
                    "outcome": outcome,
                    "train_size": len(train_df)
                })
            except:
                continue
        
        n_windows += 1
    
    if not all_predictions:
        print("No hay suficientes datos para validación rolling")
        return None
    
    pred_df = pd.DataFrame(all_predictions)
    
    # Calcular métricas globales
    eps = 1e-10
    total_ll = 0.0
    total_brier = 0.0
    correct = 0
    
    for _, row in pred_df.iterrows():
        probs = np.array([row["p_away"], row["p_draw"], row["p_home"]])
        probs = np.clip(probs, eps, 1 - eps)
        probs = probs / probs.sum()
        
        outcome = row["outcome"]
        
        total_ll -= np.log(probs[outcome])
        
        true_vec = np.zeros(3)
        true_vec[outcome] = 1
        total_brier += np.sum((probs - true_vec) ** 2)
        
        if np.argmax(probs) == outcome:
            correct += 1
    
    n = len(pred_df)
    log_loss = total_ll / n
    brier = total_brier / n
    accuracy = correct / n
    
    # Benchmarks
    benchmark_ll = -np.log(1/3)
    benchmark_brier = 2/3
    benchmark_acc = 1/3
    
    print(f"\nVentanas evaluadas: {n_windows}")
    print(f"Predicciones totales: {n}")
    print(f"\n{'='*60}")
    print("RESULTADOS ROLLING (Out-of-Sample)")
    print(f"{'='*60}")
    print(f"\n{'Métrica':<20} {'Modelo':<15} {'Benchmark':<15} {'Diferencia':<15}")
    print("-" * 60)
    print(f"{'Log Loss':<20} {log_loss:<15.4f} {benchmark_ll:<15.4f} {log_loss - benchmark_ll:<+15.4f}")
    print(f"{'Brier Score':<20} {brier:<15.4f} {benchmark_brier:<15.4f} {brier - benchmark_brier:<+15.4f}")
    print(f"{'Accuracy':<20} {accuracy*100:<14.1f}% {benchmark_acc*100:<14.1f}% {(accuracy-benchmark_acc)*100:<+14.1f}%")
    
    # Interpretación final
    print(f"\n{'='*60}")
    print("VEREDICTO FINAL")
    print(f"{'='*60}")
    
    if log_loss < benchmark_ll:
        edge = (benchmark_ll - log_loss) / benchmark_ll * 100
        print(f"\n✓ EL MODELO TIENE CAPACIDAD PREDICTIVA REAL")
        print(f"  Edge sobre azar: {edge:.1f}%")
        print(f"  Log Loss OOS: {log_loss:.4f} (mejor que {benchmark_ll:.4f})")
        
        if edge > 5:
            print(f"\n  → Potencial para apuestas (edge > 5%)")
            print(f"    PERO: aún necesitas comparar vs odds del mercado")
        else:
            print(f"\n  → Edge pequeño ({edge:.1f}%), probablemente insuficiente")
            print(f"    Las comisiones (~5%) pueden eliminarlo")
    else:
        print(f"\n✗ EL MODELO NO SUPERA AL AZAR")
        print(f"  Log Loss OOS: {log_loss:.4f} (peor que {benchmark_ll:.4f})")
        print(f"\n  → No recomendado para apuestas")
        print(f"  → Necesita mejoras (más datos, features, etc.)")
    
    return {
        "log_loss": log_loss,
        "brier": brier,
        "accuracy": accuracy,
        "n_predictions": n,
        "n_windows": n_windows,
        "predictions": pred_df
    }

# Ejecutar validación rolling (solo si no está en modo SKIP)
if not SKIP_TEMPORAL_VALIDATION:
    rolling_results = rolling_validation(df_matches, min_train_matches=100, step_matches=10)
else:
    rolling_results = None

In [None]:
# Visualizar rendimiento rolling (solo si se ejecutó la validación)
if rolling_results is not None:
    pred_df = rolling_results["predictions"]
    
    # Calcular log loss acumulado
    pred_df = pred_df.sort_values("date").reset_index(drop=True)
    
    eps = 1e-10
    cumulative_ll = []
    running_ll = 0.0
    
    for i, row in pred_df.iterrows():
        probs = np.array([row["p_away"], row["p_draw"], row["p_home"]])
        probs = np.clip(probs, eps, 1 - eps)
        probs = probs / probs.sum()
        running_ll -= np.log(probs[row["outcome"]])
        cumulative_ll.append(running_ll / (i + 1))
    
    pred_df["cumulative_ll"] = cumulative_ll
    
    # Gráfico
    fig, ax = plt.subplots(figsize=(12, 5))
    
    ax.plot(pred_df.index, pred_df["cumulative_ll"], 'b-', lw=2, label="Modelo")
    ax.axhline(-np.log(1/3), color='red', ls='--', lw=2, label=f"Benchmark (azar): {-np.log(1/3):.4f}")
    
    ax.fill_between(pred_df.index, pred_df["cumulative_ll"], -np.log(1/3),
                    where=pred_df["cumulative_ll"] < -np.log(1/3),
                    color='green', alpha=0.3, label="Edge positivo")
    
    ax.set_xlabel("Número de predicción", fontsize=12)
    ax.set_ylabel("Log Loss acumulado", fontsize=12)
    ax.set_title("Evolución del Log Loss Out-of-Sample (Rolling Validation)", fontsize=14)
    ax.legend(loc="upper right")
    ax.grid(True, alpha=0.3)
    
    # Añadir valor final
    final_ll = cumulative_ll[-1]
    ax.annotate(f"Final: {final_ll:.4f}", 
                xy=(len(cumulative_ll)-1, final_ll),
                xytext=(len(cumulative_ll)*0.8, final_ll + 0.02),
                fontsize=10,
                arrowprops=dict(arrowstyle="->", color="blue"))
    
    plt.tight_layout()
    plt.show()
else:
    print("⏭️ Visualización saltada (rolling_results es None)")
    print("   Ejecuta la validación temporal para ver este gráfico")

## 8. Comparación vs Odds del Mercado (El Test Definitivo)

Superar al azar no es suficiente. El verdadero test es superar a las **odds de cierre del mercado**.

Las casas de apuestas tienen equipos de analistas y modelos sofisticados. Si tu modelo no supera sus odds, no hay edge real.

Usamos [football-data.co.uk](https://www.football-data.co.uk/) que proporciona odds históricas gratuitas de múltiples casas (Bet365, Pinnacle, etc.).

In [None]:
# Descargar odds historicas de football-data.co.uk
def download_odds_data(seasons=["2526", "2425"], league_code=None):
    """Descarga odds historicas de football-data.co.uk."""
    if league_code is None:
        league_code = get_football_data_code(LEAGUE)

    base_url = "https://www.football-data.co.uk/mmz4281/{season}/{league}.csv"
    ssl_context = ssl.create_default_context()
    ssl_context.check_hostname = False
    ssl_context.verify_mode = ssl.CERT_NONE
    all_data = []

    for season in seasons:
        url = base_url.format(season=season, league=league_code)
        print(f"Descargando: {url}")
        try:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
                with urllib.request.urlopen(req, context=ssl_context) as response:
                    csv_data = response.read().decode("latin-1")
            df = pd.read_csv(io.StringIO(csv_data))
            df["season"] = season
            all_data.append(df)
            print(f"  OK {len(df)} partidos")
        except urllib.error.HTTPError as e:
            print(f"  X HTTP Error {e.code}: {e.reason}")
        except Exception as e:
            print(f"  X Error: {e}")

    if not all_data:
        return None

    df = pd.concat(all_data, ignore_index=True)
    cols_to_keep = ["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG", "FTR",
                    "B365H", "B365D", "B365A", "PSH", "PSD", "PSA", "season"]
    cols_available = [c for c in cols_to_keep if c in df.columns]
    df = df[cols_available].copy()
    df["Date"] = pd.to_datetime(df["Date"], dayfirst=True, errors="coerce")
    df = df.dropna(subset=["Date"])
    return df

print(f"Liga: {LEAGUE} -> {get_football_data_code(LEAGUE)}")
odds_df = download_odds_data(seasons=["2526", "2425"])

if odds_df is not None:
    print(f"Total partidos: {len(odds_df)}")
    print(f"Rango: {odds_df['Date'].min().date()} a {odds_df['Date'].max().date()}")
else:
    print("No se pudieron descargar las odds.")

In [None]:
# Ejecutar backtest (solo si tenemos odds)
if odds_df is not None and len(odds_df) > 0:
    backtest_results = backtest_vs_market(
        df_matches,
        odds_df,
        model,
        min_edge=0.03,
        use_pinnacle=True,
        league=LEAGUE,
        normalize_team_name=normalize_team_name,
    )
else:
    backtest_results = None
    print("No hay datos de odds disponibles para backtest")

In [None]:
# Visualización del backtest
if backtest_results is not None and "value_bets" in backtest_results:
    value_bets = backtest_results["value_bets"]
    
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # 1. Evolución del P&L
    ax1 = axes[0, 0]
    ax1.plot(range(len(value_bets)), value_bets["cumulative_pnl"].values, lw=2, color="blue")
    ax1.axhline(0, color="red", ls="--", alpha=0.5)
    ax1.fill_between(range(len(value_bets)), 0, value_bets["cumulative_pnl"].values,
                     where=value_bets["cumulative_pnl"].values >= 0, color="green", alpha=0.3)
    ax1.fill_between(range(len(value_bets)), 0, value_bets["cumulative_pnl"].values,
                     where=value_bets["cumulative_pnl"].values < 0, color="red", alpha=0.3)
    ax1.set_xlabel("Número de apuesta")
    ax1.set_ylabel("P&L acumulado (unidades)")
    ax1.set_title(f"Evolución del Bankroll (ROI: {backtest_results['roi']:+.1f}%)")
    ax1.grid(True, alpha=0.3)
    
    # 2. Distribución de edge
    ax2 = axes[0, 1]
    df_results = backtest_results["df_results"]
    ax2.hist(df_results["edge"] * 100, bins=50, edgecolor="black", alpha=0.7)
    ax2.axvline(0, color="red", ls="--", label="Edge = 0")
    ax2.axvline(3, color="green", ls="--", label="Min edge (3%)")
    ax2.set_xlabel("Edge vs mercado (%)")
    ax2.set_ylabel("Frecuencia")
    ax2.set_title("Distribución del Edge")
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    # 3. Win rate por edge bucket
    ax3 = axes[1, 0]
    value_bets_copy = value_bets.copy()
    value_bets_copy["edge_bucket"] = pd.cut(value_bets_copy["edge"] * 100, 
                                             bins=[3, 5, 7, 10, 15, 100],
                                             labels=["3-5%", "5-7%", "7-10%", "10-15%", "15%+"])
    win_rate_by_edge = value_bets_copy.groupby("edge_bucket", observed=True)["won"].mean() * 100
    win_rate_by_edge.plot(kind="bar", ax=ax3, color="steelblue", edgecolor="black")
    ax3.axhline(33.3, color="red", ls="--", label="Benchmark (33.3%)")
    ax3.set_xlabel("Bucket de Edge")
    ax3.set_ylabel("Win Rate (%)")
    ax3.set_title("Win Rate por Nivel de Edge")
    ax3.legend()
    ax3.tick_params(axis='x', rotation=0)
    ax3.grid(True, alpha=0.3, axis="y")
    
    # 4. P&L por mes
    ax4 = axes[1, 1]
    value_bets_copy["month"] = pd.to_datetime(value_bets_copy["date"]).dt.to_period("M")
    monthly_pnl = value_bets_copy.groupby("month")["pnl"].sum()
    colors = ["green" if x >= 0 else "red" for x in monthly_pnl.values]
    monthly_pnl.plot(kind="bar", ax=ax4, color=colors, edgecolor="black")
    ax4.axhline(0, color="black", lw=0.5)
    ax4.set_xlabel("Mes")
    ax4.set_ylabel("P&L (unidades)")
    ax4.set_title("P&L Mensual")
    ax4.tick_params(axis='x', rotation=45)
    ax4.grid(True, alpha=0.3, axis="y")
    
    plt.tight_layout()
    plt.show()
    
    # Resumen final
    print("\n" + "=" * 70)
    print("RESUMEN FINAL: ¿ES RENTABLE EL MODELO?")
    print("=" * 70)
    
    model_ll = backtest_results.get("model_ll")
    market_ll = backtest_results.get("market_ll")
    roi = backtest_results["roi"]
    
    print(f"\n1. LOG LOSS (capacidad predictiva):")
    if model_ll and market_ll:
        print(f"   Modelo: {model_ll:.4f}")
        print(f"   Mercado: {market_ll:.4f}")
        if model_ll < market_ll:
            print(f"   → Modelo es MÁS preciso que el mercado ✓")
        else:
            print(f"   → Mercado es más preciso que el modelo ✗")
    
    print(f"\n2. ROI (rentabilidad):")
    print(f"   ROI total: {roi:+.2f}%")
    if roi > 0:
        print(f"   → Modelo es RENTABLE ✓")
    else:
        print(f"   → Modelo NO es rentable ✗")
    
    print(f"\n3. CONCLUSIÓN:")
    if model_ll and market_ll and model_ll < market_ll and roi > 0:
        print(f"   ✓✓ El modelo SUPERA al mercado y es RENTABLE")
        print(f"   → Considerar uso real con gestión de bankroll adecuada")
    elif roi > 0:
        print(f"   ✓ El modelo es rentable pero el mercado es más preciso")
        print(f"   → Posible suerte/varianza, necesita más datos")
    elif model_ll and market_ll and model_ll < market_ll:
        print(f"   ~ El modelo es más preciso pero no rentable aún")
        print(f"   → Ajustar threshold de edge o mejorar modelo")
    else:
        print(f"   ✗ El modelo no supera al mercado ni es rentable")
        print(f"   → Necesita mejoras significativas")

## 9. Predicciones para la Próxima Jornada

Ahora sí, lo que viniste a buscar: **predicciones para la próxima jornada**.

El modelo ya está entrenado con todos los datos disponibles. Solo necesitas:
1. Definir los partidos de la jornada
2. (Opcional) Añadir las odds para encontrar value bets

In [None]:
# ============================================================
# PASO 1: OBTENER PARTIDOS Y ODDS DE PS3838 (PINNACLE)
# ============================================================
from trading_deportivo.config import PS3838_USERNAME

print("=" * 70)
print("PASO 1: OBTENER PARTIDOS Y ODDS (PS3838 / Pinnacle)")
print("=" * 70)

if not PS3838_USERNAME:
    print("\n  ERROR: Credenciales PS3838 no configuradas en .env")
    print("  Necesitas PS3838_USERNAME y PS3838_PASSWORD")
    proxima_jornada = []
    odds_api = {}
else:
    print(f"\n[1] Descargando fixtures y odds de PS3838 para {LEAGUE}...")
    matches_api, odds_api, status = fetch_ps3838_odds(LEAGUE)

    if matches_api:
        # Filtrar equipos conocidos por el modelo
        proxima_jornada = [(h, a) for h, a in matches_api if h in model["teams"] and a in model["teams"]]
        odds_api = {f"{h} vs {a}": odds_api.get(f"{h} vs {a}", {}) for h, a in proxima_jornada}

        unknown = [(h, a) for h, a in matches_api if h not in model["teams"] or a not in model["teams"]]

        print(f"    OK - {len(proxima_jornada)} partidos con odds")
        if unknown:
            print(f"    AVISO: {len(unknown)} partidos ignorados (equipos no en modelo):")
            for h, a in unknown:
                missing = []
                if h not in model["teams"]: missing.append(h)
                if a not in model["teams"]: missing.append(a)
                print(f"      {h} vs {a} (falta: {', '.join(missing)})")

        sample = list(odds_api.values())[0] if odds_api else {}
        n_totals = len([k for k in sample if k.startswith("over_")])
        n_spreads = len([k for k in sample if k.startswith("ah_home_")])
        has_ml = "home" in sample
        print(f"    Mercados: ML={'Si' if has_ml else 'No'}, Totals={n_totals} lineas, AH={n_spreads} lineas")
    else:
        print(f"    FALLO: {status}")
        print(f"\n    No hay odds disponibles. PS3838 es la unica fuente.")
        proxima_jornada = []
        odds_api = {}

# Mostrar partidos
if proxima_jornada:
    print(f"\n{'='*70}")
    print("PARTIDOS A PREDECIR:")
    print("=" * 70)
    for i, (h, a) in enumerate(proxima_jornada, 1):
        odds = odds_api.get(f"{h} vs {a}", {})
        ml_str = ""
        if odds.get("home"):
            ml_str = f"  |  1={odds.get('home','?')} X={odds.get('draw','?')} 2={odds.get('away','?')}"
        n_ou = len([k for k in odds if k.startswith("over_")])
        n_ah = len([k for k in odds if k.startswith("ah_home_")])
        markets_str = f"  |  O/U:{n_ou} AH:{n_ah}" if n_ou else ""
        print(f"  {i:2d}. {h} vs {a}{ml_str}{markets_str}")

In [None]:
# ============================================================
# PASO 2: GENERAR PREDICCIONES
# ============================================================
# Usa los partidos y odds del Paso 1

if proxima_jornada and len(proxima_jornada) > 0:
    # Generar predicciones
    df_predictions = predict_matchday(
        proxima_jornada, 
        model, 
        odds=odds_api if odds_api else None, 
        min_edge=0.03
    )
else:
    df_predictions = None
    print("No hay partidos para predecir")
    print("Ejecuta primero el Paso 1")

In [None]:
# ============================================================
# PASO 3: EXPORTAR PREDICCIONES A CSV
# ============================================================
if 'df_predictions' in dir() and df_predictions is not None:
    export_predictions(df_predictions, LEAGUE)
else:
    print("No hay predicciones para exportar")
    print("  Ejecuta primero Paso 1 y Paso 2")

## 10. ROI Tracking

Sistema para registrar apuestas, actualizar resultados y calcular el ROI real del modelo.

In [None]:
# ============================================================
# ROI TRACKING - Uso
# ============================================================
#
# Registrar apuesta manual:
#   log_bet("Borussia Dortmund vs Mainz 05", "1X2", "1", 0.55, 2.10)
#
# Registrar desde predicciones (todas las value bets):
#   log_bets_from_predictions(df_predictions, min_edge=0.03)
#
# Actualizar resultado:
#   update_result("Borussia Dortmund vs Mainz 05", "1X2", "1")  # Gano local
#   update_result("Borussia Dortmund vs Mainz 05", "1X2", "X")  # Empate
#
# Ver estadisticas:
#   show_roi_stats()
#
# Ver pendientes:
#   show_pending_bets()