In [2]:
# ===========================
# SISTEMA DE PREDICCI√ìN DE CORNERS - OPTIMIZADO PARA APUESTAS (VERSI√ìN COMPLETA)
# ===========================

import numpy as np
import pandas as pd
import joblib
from scipy.stats import poisson

# ===========================
# 1. CARGAR MODELO Y SCALER
# ===========================

print("\n" + "=" * 80)
print("üéØ SISTEMA DE PREDICCI√ìN DE CORNERS PARA APUESTAS (VERSI√ìN AVANZADA)")
print("=" * 80)

xgb_model = joblib.load('models/xgboost_corners_optimized_v2_6_leagues.pkl')  # ‚úÖ CAMBIO: versi√≥n v2
scaler = joblib.load('models/scaler_corners_xgb_v2_6_leagues.pkl')

print("‚úÖ Modelo cargado correctamente")
print(f"   Features esperadas: {len(scaler.feature_names_in_)}")

# ===========================
# 2. FUNCIONES AUXILIARES (IGUAL)
# ===========================

def calcular_probabilidades_poisson(lambda_pred, rango_inferior=5, rango_superior=5):
    """Calcula probabilidades usando distribuci√≥n de Poisson"""
    
    valor_central = int(round(lambda_pred))
    valores_analizar = range(
        max(0, valor_central - rango_inferior),
        valor_central + rango_superior + 1
    )
    
    probabilidades_exactas = {}
    for k in valores_analizar:
        prob = poisson.pmf(k, lambda_pred) * 100
        probabilidades_exactas[k] = prob
    
    # ‚úÖ CORRECCI√ìN: MISMAS L√çNEAS PARA OVER Y UNDER
    lines = [7.5, 8.5, 9.5, 10.5, 11.5, 12.5]
    
    probabilidades_over = {}
    for linea in lines:
        prob_over = (1 - poisson.cdf(linea, lambda_pred)) * 100
        probabilidades_over[linea] = prob_over
    
    probabilidades_under = {}
    for linea in lines:  # ‚úÖ CAMBIO: usar la misma lista
        prob_under = poisson.cdf(linea, lambda_pred) * 100
        probabilidades_under[linea] = prob_under
    
    return {
        'exactas': probabilidades_exactas,
        'over': probabilidades_over,
        'under': probabilidades_under
    }

def clasificar_confianza(prob):
    """Clasifica la confianza seg√∫n probabilidad"""
    if prob >= 66:
        return "ALTA ‚úÖ"
    elif prob >= 55:
        return "MEDIA ‚ö†Ô∏è"
    else:
        return "BAJA ‚ùå"

# ===========================
# 3. ‚úÖ NUEVA FUNCI√ìN GET_AVERAGE CON M√âTRICAS AVANZADAS
# ===========================

def get_average(df, is_team=False, lst_avg=None):
    """Calcula promedios de estad√≠sticas (VERSI√ìN COMPLETA)"""
    
    if len(df) == 0:
        if is_team:
            # ‚úÖ Retornar 23 valores (m√©tricas avanzadas)
            return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
        return (0, 0, 0, 0, 0, 0, 0, 0, 0)
    
    if is_team:
        # ===========================
        # ESTAD√çSTICAS B√ÅSICAS (NORMALIZADAS)
        # ===========================
        avg_cross = (df['Performance_Crs'].sum() / len(df)) - lst_avg[3]
        avg_att_3rd = (df['Touches_Att 3rd'].sum() / len(df)) - lst_avg[4]
        avg_sca = (df['SCA Types_SCA'].sum() / len(df)) - lst_avg[2]
        avg_xg = (df['Expected_xG'].sum() / len(df)) - lst_avg[1]
        
        # ‚úÖ VARIANZA DE CORNERS
        var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0
        avg_ck = (df['Pass Types_CK'].sum() / len(df)) - lst_avg[8]
        
        avg_poss = (df['Poss'].sum() / len(df)) - 50
        avg_gf = (df['GF'].sum() / len(df)) - lst_avg[5]
        avg_ga = (df['GA'].sum() / len(df)) - lst_avg[6]
        
        # ===========================
        # M√âTRICAS OFENSIVAS AVANZADAS
        # ===========================
        total_sh = df['Standard_Sh'].sum()
        sh_accuracy = (df['Standard_SoT'].sum() / total_sh) if total_sh > 0 else 0
        xg_shot = (df['Expected_xG'].sum() / total_sh) if total_sh > 0 else 0
        
        total_touches = df['Touches_Touches'].sum()
        attacking_presence = (df['Touches_Att 3rd'].sum() / total_touches) if total_touches > 0 else 0
        
        total_poss = df['Poss'].sum()
        possession_shot = (total_sh / total_poss) if total_poss > 0 else 0
        
        # ===========================
        # M√âTRICAS DE CREACI√ìN
        # ===========================
        total_passes = df['Total_Att'].sum()
        progressive_pass_ratio = (df['PrgP'].sum() / total_passes) if total_passes > 0 else 0
        final_third_involvement = (df['1/3'].sum() / total_passes) if total_passes > 0 else 0
        
        total_sca = df['SCA Types_SCA'].sum()
        assist_sca = (df['Ast'].sum() / total_sca) if total_sca > 0 else 0
        creative_efficiency = (total_sca / total_poss) if total_poss > 0 else 0
        
        # ===========================
        # M√âTRICAS DEFENSIVAS
        # ===========================
        total_tackles = df['Tackles_Tkl'].sum()
        high_press_intensity = (df['Tackles_Att 3rd'].sum() / total_tackles) if total_tackles > 0 else 0
        interception_tackle = (df['Int'].sum() / total_tackles) if total_tackles > 0 else 0
        
        total_defensive_actions = total_tackles + df['Int'].sum()
        clearance_ratio = (df['Clr'].sum() / total_defensive_actions) if total_defensive_actions > 0 else 0
        
        # ===========================
        # M√âTRICAS DE POSESI√ìN
        # ===========================
        total_carries = df['Carries_Carries'].sum()
        progressive_carry_ratio = (df['Carries_PrgC'].sum() / total_carries) if total_carries > 0 else 0
        
        total_prog_passes = df['PrgP'].sum()
        carry_pass_balance = (df['Carries_PrgC'].sum() / total_prog_passes) if total_prog_passes > 0 else 0
        
        # ===========================
        # √çNDICES COMPUESTOS
        # ===========================
        avg_gf_raw = df['GF'].mean()
        avg_xg_raw = df['Expected_xG'].mean()
        avg_sot = df['Standard_SoT'].mean()
        avg_sh = df['Standard_Sh'].mean()
        offensive_index = (avg_gf_raw + avg_xg_raw) * (avg_sot / avg_sh) if avg_sh > 0 else 0
        
        avg_prgp = df['PrgP'].mean()
        avg_prgc = df['Carries_PrgC'].mean()
        avg_poss_raw = df['Poss'].mean()
        transition_index = ((avg_prgp + avg_prgc) / avg_poss_raw) if avg_poss_raw > 0 else 0
        
        # ‚úÖ RETORNAR 23 VALORES
        return (
            avg_ck,           # 0
            var_ck,           # 1 - ‚úÖ NUEVO
            avg_xg,           # 2
            avg_sca,          # 3
            avg_cross,        # 4
            avg_poss,         # 5
            avg_att_3rd,      # 6
            avg_gf,           # 7
            avg_ga,           # 8
            sh_accuracy,      # 9
            xg_shot,          # 10
            attacking_presence,  # 11
            possession_shot,  # 12
            progressive_pass_ratio,  # 13
            final_third_involvement,  # 14
            assist_sca,       # 15
            creative_efficiency,  # 16
            high_press_intensity,  # 17
            interception_tackle,  # 18
            clearance_ratio,  # 19
            progressive_carry_ratio,  # 20
            carry_pass_balance,  # 21
            offensive_index,  # 22
            transition_index  # 23
        )
    
    # ===========================
    # PROMEDIOS DE LIGA (is_team=False)
    # ===========================
    avg_cross = df['Performance_Crs'].mean()
    avg_att_3rd = df['Touches_Att 3rd'].mean()
    avg_sca = df['SCA Types_SCA'].mean()
    avg_xg = df['Expected_xG'].mean()
    var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0
    avg_ck = df['Pass Types_CK'].mean()
    avg_gf = df['GF'].mean()
    avg_ga = df['GA'].mean()
    avg_sh = df['Standard_Sh'].mean() if 'Standard_Sh' in df.columns else 0
    
    return (
        var_ck,      # 0
        avg_xg,      # 1
        avg_sca,     # 2
        avg_cross,   # 3
        avg_att_3rd, # 4
        avg_gf,      # 5
        avg_ga,      # 6
        avg_sh,      # 7
        avg_ck       # 8
    )

# ===========================
# 4. ‚úÖ NUEVA FUNCI√ìN GET_TEAM_PPP
# ===========================

def get_points_from_result(result):
    """Convierte resultado (W/D/L) a puntos"""
    if result == 'W':
        return 3
    elif result == 'D':
        return 1
    else:
        return 0

def get_team_ppp(df, team, season, round_num, league=None):
    """Calcula puntos por partido (PPP) de un equipo"""
    team_matches = df[
        (df['team'] == team) & 
        (df['season'] == season) & 
        (df['round'] < round_num)
    ]
    
    if league is not None:
        team_matches = team_matches[team_matches['league'] == league]
    
    if len(team_matches) == 0:
        return 0.0
    
    total_points = team_matches['result'].apply(get_points_from_result).sum()
    ppp = total_points / len(team_matches)
    
    return ppp

def get_ppp_difference(df, local, away, season, round_num, league=None):
    """Calcula diferencia de PPP entre local y visitante"""
    local_ppp = get_team_ppp(df, local, season, round_num, league)
    away_ppp = get_team_ppp(df, away, season, round_num, league)
    return local_ppp - away_ppp


# ===========================
# 5. ‚úÖ FUNCI√ìN PRINCIPAL DE PREDICCI√ìN (ACTUALIZADA)
# ===========================

def predecir_corners(local, visitante, jornada, temporada="2526", league_code="ESP"):
    """
    Predice corners totales con an√°lisis completo para apuestas
    
    Args:
        local: Equipo local
        visitante: Equipo visitante
        jornada: N√∫mero de jornada
        temporada: Temporada (formato "2526")
        league_code: C√≥digo de liga ("ESP", "GER", "FRA", "ITA", "NED")
    """
    
    print(f"\n{'='*80}")
    print(f"üèüÔ∏è  {local} vs {visitante}")
    print(f"üìÖ Temporada {temporada} | Jornada {jornada} | Liga: {league_code}")
    print(f"{'='*80}")
    
    if jornada < 5:
        return {
            "error": "‚ùå Se necesitan al menos 5 jornadas previas",
            "prediccion": None
        }
    
    try:
        # ===========================
        # EXTRAER FEATURES (igual que antes)
        # ===========================
        
        lst_avg = get_average(
            df_database[
                (df_database['season'] == temporada) & 
                (df_database['round'] < jornada) &
                (df_database['league'] == league_code)
            ],
            is_team=False
        )
        
        (team1_home, team1_away, team1_opp_home, team1_opp_away,
         team2_home, team2_away, team2_opp_home, team2_opp_away) = get_dataframes(
            df_database, temporada, jornada, local, visitante, league=league_code
        )
        
        index = lst_years.index(temporada)
        result = lst_years[:index+1]
        team1_h2h, team2_h2h = get_head_2_head(
            df_database, local, visitante, seasons=result, league=league_code
        )
        
        local_ppp = get_team_ppp(df_database, local, temporada, jornada, league=league_code)
        away_ppp = get_team_ppp(df_database, visitante, temporada, jornada, league=league_code)
        ppp_diff = local_ppp - away_ppp
        
        # ===========================
        # CONSTRUIR DICCIONARIO DE FEATURES (igual que antes)
        # ===========================
        
        def create_line(df, is_form=True, is_team=False, use_advanced=True):
            if is_form:
                df = df[-6:]
            if use_advanced:
                return get_average(df, is_team, lst_avg)
            else:
                result = get_average(df, is_team, lst_avg)
                return result[:9]
        
        dic_features = {}
        
        dic_features['ppp_local'] = (local_ppp,)
        dic_features['ppp_away'] = (away_ppp,)
        dic_features['ppp_difference'] = (ppp_diff,)
        
        dic_features['lst_team1_home_form'] = create_line(team1_home, True, True, use_advanced=True)
        dic_features['lst_team1_home_general'] = create_line(team1_home, False, True, use_advanced=True)
        dic_features['lst_team1_away_form'] = create_line(team1_away, True, True, use_advanced=True)
        dic_features['lst_team1_away_general'] = create_line(team1_away, False, True, use_advanced=True)
        
        dic_features['lst_team2_home_form'] = create_line(team2_home, True, True, use_advanced=True)
        dic_features['lst_team2_home_general'] = create_line(team2_home, False, True, use_advanced=True)
        dic_features['lst_team2_away_form'] = create_line(team2_away, True, True, use_advanced=True)
        dic_features['lst_team2_away_general'] = create_line(team2_away, False, True, use_advanced=True)
        
        dic_features['lst_team1_h2h'] = create_line(team1_h2h, False, True, use_advanced=True)
        dic_features['lst_team2_h2h'] = create_line(team2_h2h, False, True, use_advanced=True)
        
        dic_features['lst_team1_opp_away'] = create_line(team1_opp_away, False, True, use_advanced=False)
        dic_features['lst_team2_opp_home'] = create_line(team2_opp_home, False, True, use_advanced=False)
        
        league_dummies = {
            'league_ESP': 1 if league_code == 'ESP' else 0,
            'league_GER': 1 if league_code == 'GER' else 0,
            'league_FRA': 1 if league_code == 'FRA' else 0,
            'league_ITA': 1 if league_code == 'ITA' else 0,
            'league_NED': 1 if league_code == 'NED' else 0
        }
        
        for key, value in league_dummies.items():
            dic_features[key] = (value,)
        
        # ===========================
        # CONSTRUIR VECTOR DE FEATURES
        # ===========================
        
        lst_base_advanced = [
            "avg_ck", "var_ck", "xg", "sca", "cross", "poss", "att_3rd", "gf", "ga",
            "sh_accuracy", "xg_shot", "attacking_presence", "possession_shot",
            "progressive_pass_ratio", "final_third_involvement", "assist_sca", "creative_efficiency",
            "high_press_intensity", "interception_tackle", "clearance_ratio",
            "progressive_carry_ratio", "carry_pass_balance", "offensive_index", "transition_index"
        ]
        
        lst_base_original = [
            "var_ck", "xg", "sca", "cross", "poss", "att_3rd", "gf", "ga", "avg_ck"
        ]
        
        lst_features_values = []
        lst_features_names = []
        
        for key in dic_features:
            lst_features_values.extend(list(dic_features[key]))
            
            if key in ['ppp_local', 'ppp_away', 'ppp_difference']:
                lst_features_names.append(key)
            elif key.startswith('league_'):
                lst_features_names.append(key)
            elif key in ['lst_team1_opp_away', 'lst_team2_opp_home']:
                lst_features_names.extend([f"{key}_{col}" for col in lst_base_original])
            else:
                lst_features_names.extend([f"{key}_{col}" for col in lst_base_advanced])
        
        df_input = pd.DataFrame([lst_features_values], columns=lst_features_names)
        
        expected_features = scaler.feature_names_in_
        
        if len(df_input.columns) != len(expected_features):
            print(f"\n‚ö†Ô∏è ERROR: N√∫mero de features no coincide")
            print(f"   Esperadas: {len(expected_features)}")
            print(f"   Recibidas: {len(df_input.columns)}")
            return {"error": "Desajuste de features", "prediccion": None}
        
        df_input = df_input[expected_features]
        
        X_input_scaled = pd.DataFrame(
            scaler.transform(df_input), 
            columns=df_input.columns
        )
        
        # ===========================
        # PREDICCI√ìN
        # ===========================
        
        prediccion = xgb_model.predict(X_input_scaled)[0]
        
        # ===========================
        # ‚úÖ AN√ÅLISIS PROBABIL√çSTICO CON POISSON
        # ===========================
        
        analisis = calcular_probabilidades_poisson(prediccion, rango_inferior=5, rango_superior=5)
        
        # ===========================
        # ESTAD√çSTICAS DETALLADAS
        # ===========================
        
        local_ck_home = team1_home['Pass Types_CK'].mean() if len(team1_home) > 0 else 0
        local_xg_home = team1_home['Expected_xG'].mean() if len(team1_home) > 0 else 0
        local_poss_home = team1_home['Poss'].mean() if len(team1_home) > 0 else 0
        
        away_ck_away = team2_away['Pass Types_CK'].mean() if len(team2_away) > 0 else 0
        away_xg_away = team2_away['Expected_xG'].mean() if len(team2_away) > 0 else 0
        away_poss_away = team2_away['Poss'].mean() if len(team2_away) > 0 else 0
        
        local_ck_received = team1_opp_home['Pass Types_CK'].mean() if len(team1_opp_home) > 0 else 0
        away_ck_received = team2_opp_away['Pass Types_CK'].mean() if len(team2_opp_away) > 0 else 0
        
        partido_ck_esperado = local_ck_home + away_ck_away
        
        h2h_ck_local = team1_h2h['Pass Types_CK'].mean() if len(team1_h2h) > 0 else 0
        h2h_ck_away = team2_h2h['Pass Types_CK'].mean() if len(team2_h2h) > 0 else 0
        h2h_total = h2h_ck_local + h2h_ck_away
        
        # ===========================
        # ‚úÖ MOSTRAR RESULTADOS CON PROBABILIDADES
        # ===========================
        
        print(f"\nüé≤ PREDICCI√ìN MODELO: {prediccion:.2f} corners totales")
        print(f"   PPP: {local} ({local_ppp:.2f}) vs {visitante} ({away_ppp:.2f}) | Diff: {ppp_diff:+.2f}")
        
        print(f"\nüìä ESTAD√çSTICAS HIST√ìRICAS:")
        print(f"   {local} (Casa): {local_ck_home:.1f} CK/partido | xG: {local_xg_home:.2f} | Poss: {local_poss_home:.1f}%")
        print(f"   {visitante} (Fuera): {away_ck_away:.1f} CK/partido | xG: {away_xg_away:.2f} | Poss: {away_poss_away:.1f}%")
        print(f"   Corners recibidos: {local} ({local_ck_received:.1f}) | {visitante} ({away_ck_received:.1f})")
        print(f"   Total esperado (suma): {partido_ck_esperado:.1f} corners")
        
        if len(team1_h2h) > 0 or len(team2_h2h) > 0:
            print(f"\nüîÑ HEAD TO HEAD (√∫ltimos {max(len(team1_h2h), len(team2_h2h))} partidos):")
            print(f"   {local}: {h2h_ck_local:.1f} CK/partido")
            print(f"   {visitante}: {h2h_ck_away:.1f} CK/partido")
            print(f"   Promedio total: {h2h_total:.1f} corners")
        
        # ===========================
        # ‚úÖ MOSTRAR PROBABILIDADES EXACTAS
        # ===========================
        
        valor_mas_probable = max(analisis['exactas'].items(), key=lambda x: x[1])
        
        print(f"\nüìà PROBABILIDADES EXACTAS (Poisson):")
        for k in sorted(analisis['exactas'].keys()):
            prob = analisis['exactas'][k]
            bar = '‚ñà' * int(prob / 2)
            marca = ' ‚≠ê' if k == valor_mas_probable[0] else ''
            print(f"   {k:2d} corners: {prob:5.2f}% {bar}{marca}")
        
        print(f"\n‚úÖ Valor m√°s probable: {valor_mas_probable[0]} corners ({valor_mas_probable[1]:.2f}%)")
        
        # ‚úÖ RANGO DE 80% CONFIANZA
        probs_sorted = sorted(analisis['exactas'].items(), key=lambda x: x[1], reverse=True)
        cumsum = 0
        rango_80 = []
        for val, prob in probs_sorted:
            cumsum += prob
            rango_80.append(val)
            if cumsum >= 80:
                break
        
        print(f"üìä Rango 80% confianza: {min(rango_80)}-{max(rango_80)} corners")
        
        # ===========================
        # ‚úÖ MOSTRAR OVER/UNDER CON CUOTAS IMPL√çCITAS
        # ===========================
        
        print(f"\nüéØ AN√ÅLISIS OVER/UNDER:")
        print(f"{'L√≠nea':<10} {'Prob Over':<12} {'Cuota Impl':<12} {'Confianza':<15} {'Prob Under':<12} {'Cuota Impl':<12}")
        print("-" * 85)
        
        for linea in [7.5, 8.5, 9.5, 10.5, 11.5, 12.5]:
            prob_over = analisis['over'][linea]
            prob_under = analisis['under'][linea]
            
            # Cuotas impl√≠citas (inverso de probabilidad en decimal)
            cuota_impl_over = 100 / prob_over if prob_over > 0 else 999
            cuota_impl_under = 100 / prob_under if prob_under > 0 else 999
            
            conf_over = clasificar_confianza(prob_over)
            
            print(f"O/U {linea:<5} {prob_over:6.2f}%     @{cuota_impl_over:5.2f}      {conf_over:<15} {prob_under:6.2f}%     @{cuota_impl_under:5.2f}")
        
        # ===========================
        # ‚úÖ RECOMENDACIONES CON CUOTAS
        # ===========================
        
        print(f"\nüí° RECOMENDACIONES DE APUESTA:")
        
        mejores_over = [(l, p) for l, p in analisis['over'].items() if p >= 55]
        mejores_under = [(l, p) for l, p in analisis['under'].items() if p >= 55]
        
        if mejores_over:
            print(f"\n‚úÖ OVER con confianza MEDIA/ALTA:")
            for linea, prob in sorted(mejores_over, key=lambda x: x[1], reverse=True):
                cuota_impl = 100 / prob
                conf = clasificar_confianza(prob)
                print(f"   ‚Ä¢ Over {linea}: {prob:.2f}% (Cuota justa: @{cuota_impl:.2f}) - {conf}")
        
        if mejores_under:
            print(f"\n‚úÖ UNDER con confianza MEDIA/ALTA:")
            for linea, prob in sorted(mejores_under, key=lambda x: x[1], reverse=True):
                cuota_impl = 100 / prob
                conf = clasificar_confianza(prob)
                print(f"   ‚Ä¢ Under {linea}: {prob:.2f}% (Cuota justa: @{cuota_impl:.2f}) - {conf}")
        
        if not mejores_over and not mejores_under:
            print(f"   ‚ö†Ô∏è No hay apuestas con confianza MEDIA o superior")
        
        # ===========================
        # ‚úÖ AN√ÅLISIS DE RIESGO
        # ===========================
        
        df_varianza_temp = analizar_fiabilidad_equipos(df_database, temporada=temporada, min_partidos=3)
        riesgo = obtener_fiabilidad_partido(local, visitante, df_varianza_temp)

        print(f"\n‚ö†Ô∏è AN√ÅLISIS DE RIESGO:")
        print(f"   Local ({local}): {riesgo['nivel_local']} (CV: {riesgo['cv_local']:.1f}%)")
        print(f"   Away ({visitante}): {riesgo['nivel_away']} (CV: {riesgo['cv_away']:.1f}%)")
        print(f"   üé≤ FIABILIDAD PARTIDO: {riesgo['fiabilidad']} (Score: {riesgo['score_promedio']:.1f})")
        print(f"   üí° {riesgo['mensaje']}")
        
        # ===========================
        # RETORNAR DICCIONARIO COMPLETO
        # ===========================
        
        return {
            "prediccion": round(prediccion, 2),
            "local": local,
            "visitante": visitante,
            "ppp_local": local_ppp,
            "ppp_away": away_ppp,
            "ppp_diff": ppp_diff,
            "riesgo": riesgo,
            "stats": {
                "local_ck": local_ck_home,
                "away_ck": away_ck_away,
                "local_ck_received": local_ck_received,
                "away_ck_received": away_ck_received,
                "h2h_total": h2h_total,
                "partido_esperado": partido_ck_esperado
            },
            "probabilidades_exactas": analisis['exactas'],
            "probabilidades_over": analisis['over'],
            "probabilidades_under": analisis['under'],
            "valor_mas_probable": valor_mas_probable[0],
            "prob_mas_probable": valor_mas_probable[1],
            "rango_80": (min(rango_80), max(rango_80))
        }
        
    except Exception as e:
        print(f"\n‚ùå ERROR: {str(e)}")
        import traceback
        traceback.print_exc()
        return {"error": str(e), "prediccion": None}



def predecir_partidos_batch(partidos, jornada, temporada="2526", league_code="ESP", export_csv=True, filename=None):
    """
    Predice corners para m√∫ltiples partidos y exporta resultados a CSV
    
    Args:
        partidos: Lista de tuplas [(local1, visitante1), (local2, visitante2), ...]
        jornada: N√∫mero de jornada
        temporada: Temporada (formato "2526")
        league_code: C√≥digo de liga ("ESP", "GER", "FRA", "ITA", "NED")
        export_csv: Si True, exporta a CSV
        filename: Nombre del archivo CSV (opcional)
    
    Returns:
        DataFrame con todos los resultados
    """
    
    resultados = []
    
    print("\n" + "=" * 120)
    print(f"üéØ PROCESANDO {len(partidos)} PARTIDOS - {league_code} | J{jornada} | Temporada {temporada}")
    print("=" * 120)
    
    for idx, (local, visitante) in enumerate(partidos, 1):
        print(f"\n[{idx}/{len(partidos)}] Procesando: {local} vs {visitante}...")
        
        resultado = predecir_corners(
            local=local,
            visitante=visitante,
            jornada=jornada,
            temporada=temporada,
            league_code=league_code
        )
        
        if resultado.get("error"):
            print(f"   ‚ùå Error: {resultado['error']}")
            continue
        
        # ===========================
        # CONSTRUIR FILA DE DATOS
        # ===========================
        
        fila = {
            'Partido': f"{local} vs {visitante}",
            'Local': local,
            'Visitante': visitante,
            'Liga': league_code,
            'Jornada': jornada,
            'Temporada': temporada,
            
            # Predicci√≥n
            'Prediccion': resultado['prediccion'],
            'Valor_Mas_Probable': resultado['valor_mas_probable'],
            'Prob_Valor_Mas_Probable_%': round(resultado['prob_mas_probable'], 2),
            'Rango_80%_Min': resultado['rango_80'][0],
            'Rango_80%_Max': resultado['rango_80'][1],
            
            # PPP
            'PPP_Local': round(resultado['ppp_local'], 2),
            'PPP_Away': round(resultado['ppp_away'], 2),
            'PPP_Diferencia': round(resultado['ppp_diff'], 2),
            
            # Estad√≠sticas hist√≥ricas
            'CK_Local_Casa': round(resultado['stats']['local_ck'], 1),
            'CK_Away_Fuera': round(resultado['stats']['away_ck'], 1),
            'CK_Local_Recibidos': round(resultado['stats']['local_ck_received'], 1),
            'CK_Away_Recibidos': round(resultado['stats']['away_ck_received'], 1),
            'CK_Esperado_Suma': round(resultado['stats']['partido_esperado'], 1),
            'CK_H2H_Total': round(resultado['stats']['h2h_total'], 1) if resultado['stats']['h2h_total'] > 0 else 'N/A',
            
            # Riesgo
            'Fiabilidad_Partido': resultado['riesgo']['fiabilidad'],
            'Score_Fiabilidad': round(resultado['riesgo']['score_promedio'], 1),
            'Nivel_Local': resultado['riesgo']['nivel_local'],
            'Nivel_Away': resultado['riesgo']['nivel_away'],
            'CV_Local_%': round(resultado['riesgo']['cv_local'], 1),
            'CV_Away_%': round(resultado['riesgo']['cv_away'], 1),
        }
        
        # ===========================
        # OVER 6.5 a 10.5
        # ===========================
        for linea in [6.5, 7.5, 8.5, 9.5, 10.5]:
            prob = resultado['probabilidades_over'].get(linea, 0)
            cuota_impl = round(100 / prob, 2) if prob > 0 else 999
            conf = clasificar_confianza(prob)
            
            fila[f'Over_{linea}_Prob_%'] = round(prob, 2)
            fila[f'Over_{linea}_Cuota'] = cuota_impl
            fila[f'Over_{linea}_Confianza'] = conf
        
        # ===========================
        # UNDER 12.5 a 9.5
        # ===========================
        for linea in [12.5, 11.5, 10.5, 9.5]:
            prob = resultado['probabilidades_under'].get(linea, 0)
            cuota_impl = round(100 / prob, 2) if prob > 0 else 999
            conf = clasificar_confianza(prob)
            
            fila[f'Under_{linea}_Prob_%'] = round(prob, 2)
            fila[f'Under_{linea}_Cuota'] = cuota_impl
            fila[f'Under_{linea}_Confianza'] = conf
        
        # ===========================
        # RECOMENDACIONES
        # ===========================
        
        mejores_over = [(l, p) for l, p in resultado['probabilidades_over'].items() if p >= 55]
        mejores_under = [(l, p) for l, p in resultado['probabilidades_under'].items() if p >= 55]
        
        if resultado['riesgo']['score_promedio'] < 35:
            fila['Recomendacion'] = "‚õî EVITAR - Baja fiabilidad"
            fila['Es_Apostable'] = "NO"
        elif not mejores_over and not mejores_under:
            fila['Recomendacion'] = "‚ö†Ô∏è NO RECOMENDADO - Sin confianza suficiente"
            fila['Es_Apostable'] = "NO"
        else:
            recomendaciones = []
            
            if mejores_over:
                mejor_over = max(mejores_over, key=lambda x: x[1])
                cuota_over = round(100 / mejor_over[1], 2)
                recomendaciones.append(f"Over {mejor_over[0]} ({mejor_over[1]:.1f}% @{cuota_over})")
            
            if mejores_under:
                mejor_under = max(mejores_under, key=lambda x: x[1])
                cuota_under = round(100 / mejor_under[1], 2)
                recomendaciones.append(f"Under {mejor_under[0]} ({mejor_under[1]:.1f}% @{cuota_under})")
            
            fila['Recomendacion'] = " | ".join(recomendaciones)
            
            if resultado['riesgo']['score_promedio'] >= 65:
                fila['Es_Apostable'] = "S√ç ‚≠ê‚≠ê‚≠ê"
            elif resultado['riesgo']['score_promedio'] >= 50:
                fila['Es_Apostable'] = "S√ç ‚úÖ"
            else:
                fila['Es_Apostable'] = "PRECAUCI√ìN üü°"
        
        fila['Mensaje_Riesgo'] = resultado['riesgo']['mensaje']
        
        resultados.append(fila)
        print(f"   ‚úÖ Completado")
    
    # ===========================
    # CREAR DATAFRAME
    # ===========================
    
    df_resultados = pd.DataFrame(resultados)
    
    print("\n" + "=" * 120)
    print(f"‚úÖ PROCESAMIENTO COMPLETADO: {len(df_resultados)} partidos analizados")
    print("=" * 120)
    
    # ===========================
    # EXPORTAR A CSV
    # ===========================
    
    if export_csv and len(df_resultados) > 0:
        if filename is None:
            filename = f"predicciones_{league_code}_J{jornada}_{temporada}.csv"
        
        df_resultados.to_csv(filename, index=False, encoding='utf-8-sig')
        print(f"\nüíæ Resultados exportados a: {filename}")
    
    # ===========================
    # RESUMEN
    # ===========================
    
    print(f"\nüìä RESUMEN DE APUESTAS:")
    print(f"   Partidos apostables: {len(df_resultados[df_resultados['Es_Apostable'].str.contains('S√ç')])} / {len(df_resultados)}")
    print(f"   Partidos ALTA confianza (‚≠ê‚≠ê‚≠ê): {len(df_resultados[df_resultados['Es_Apostable'] == 'S√ç ‚≠ê‚≠ê‚≠ê'])}")
    print(f"   Partidos MEDIA confianza (‚úÖ): {len(df_resultados[df_resultados['Es_Apostable'] == 'S√ç ‚úÖ'])}")
    print(f"   Partidos a evitar (‚õî): {len(df_resultados[df_resultados['Es_Apostable'] == 'NO'])}")
    
    return df_resultados


# ===========================
# FUNCI√ìN DE RESUMEN VISUAL
# ===========================

def mostrar_resumen_batch(df_resultados):
    """Muestra resumen visual de los resultados"""
    
    print("\n" + "=" * 120)
    print("üéØ MEJORES OPORTUNIDADES DE APUESTA")
    print("=" * 120)
    
    # Filtrar solo apostables
    df_apostables = df_resultados[df_resultados['Es_Apostable'].str.contains('S√ç')].copy()
    
    if len(df_apostables) == 0:
        print("\n‚ö†Ô∏è No se encontraron partidos con oportunidades de apuesta")
        return
    
    # Ordenar por score de fiabilidad
    df_apostables = df_apostables.sort_values('Score_Fiabilidad', ascending=False)
    
    for idx, row in df_apostables.iterrows():
        print(f"\n{'='*120}")
        print(f"üèüÔ∏è  {row['Partido']}")
        print(f"{'='*120}")
        print(f"üìä Predicci√≥n: {row['Prediccion']:.2f} corners | Valor m√°s probable: {row['Valor_Mas_Probable']} ({row['Prob_Valor_Mas_Probable_%']:.1f}%)")
        print(f"üìà Hist√≥rico: Local {row['CK_Local_Casa']:.1f} CK | Away {row['CK_Away_Fuera']:.1f} CK | H2H: {row['CK_H2H_Total']}")
        print(f"üé≤ Fiabilidad: {row['Fiabilidad_Partido']} (Score: {row['Score_Fiabilidad']:.1f}/100)")
        print(f"üí° {row['Recomendacion']}")
        
        # Mostrar l√≠neas con alta probabilidad
        print(f"\n   üìå L√≠neas destacadas:")
        for linea in [7.5, 8.5, 9.5, 10.5]:
            over_prob = row.get(f'Over_{linea}_Prob_%', 0)
            under_prob = row.get(f'Under_{linea}_Prob_%', 0)
            
            if over_prob >= 55:
                cuota = row.get(f'Over_{linea}_Cuota', 0)
                conf = row.get(f'Over_{linea}_Confianza', '')
                print(f"   ‚Ä¢ Over {linea}: {over_prob:.1f}% @{cuota:.2f} - {conf}")
            
            if under_prob >= 55:
                cuota = row.get(f'Under_{linea}_Cuota', 0)
                conf = row.get(f'Under_{linea}_Confianza', '')
                print(f"   ‚Ä¢ Under {linea}: {under_prob:.1f}% @{cuota:.2f} - {conf}")

print("\n‚úÖ Funciones de procesamiento batch listas")
print("\n‚úÖ Sistema listo con probabilidades Poisson y cuotas impl√≠citas")


üéØ SISTEMA DE PREDICCI√ìN DE CORNERS PARA APUESTAS (VERSI√ìN AVANZADA)
‚úÖ Modelo cargado correctamente
   Features esperadas: 266

‚úÖ Funciones de procesamiento batch listas

‚úÖ Sistema listo con probabilidades Poisson y cuotas impl√≠citas


In [3]:
# ===========================
# SISTEMA AVANZADO DE DETECCI√ìN DE PARTIDOS APOSTABLES
# ===========================

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats

def analizar_fiabilidad_equipos(df_database, temporada="2526", min_partidos=5):
    """
    An√°lisis completo de fiabilidad para apuestas de corners
    No solo varianza, sino consistencia, tendencias y patrones
    """
    
    df_temp = df_database[df_database['season'] == temporada].copy()
    resultados = []
    equipos = pd.concat([df_temp['team'], df_temp['opponent']]).unique()
    
    for equipo in equipos:
        # Partidos del equipo
        partidos_equipo = df_temp[df_temp['team'] == equipo]
        
        if len(partidos_equipo) < min_partidos:
            continue
        
        ck_sacados = partidos_equipo['Pass Types_CK'].values
        
        # ===========================
        # 1. M√âTRICAS DE VARIABILIDAD
        # ===========================
        media = ck_sacados.mean()
        std = ck_sacados.std()
        cv = (std / media * 100) if media > 0 else 0
        
        # ===========================
        # 2. M√âTRICAS DE CONSISTENCIA
        # ===========================
        
        # 2.1 Porcentaje de partidos cerca de la media (¬±2 corners)
        cerca_media = np.sum(np.abs(ck_sacados - media) <= 2) / len(ck_sacados) * 100
        
        # 2.2 Rachas (detectar equipos con "explosiones" de corners)
        cambios_bruscos = np.sum(np.abs(np.diff(ck_sacados)) > 4)
        pct_cambios_bruscos = cambios_bruscos / (len(ck_sacados) - 1) * 100
        
        # 2.3 Cuartiles (Q1, Q2=mediana, Q3)
        q1, q2, q3 = np.percentile(ck_sacados, [25, 50, 75])
        iqr = q3 - q1  # Rango intercuart√≠lico (m√°s robusto que std)
        
        # ===========================
        # 3. M√âTRICAS DE TENDENCIA
        # ===========================
        
        # 3.1 Tendencia lineal (¬ømejora/empeora con el tiempo?)
        jornadas = np.arange(len(ck_sacados))
        slope, intercept, r_value, p_value, std_err = stats.linregress(jornadas, ck_sacados)
        
        # 3.2 Autocorrelaci√≥n (¬øresultado actual predice el siguiente?)
        if len(ck_sacados) > 2:
            autocorr = np.corrcoef(ck_sacados[:-1], ck_sacados[1:])[0, 1]
        else:
            autocorr = 0
        
        # ===========================
        # 4. M√âTRICAS DE OUTLIERS
        # ===========================
        
        # 4.1 Detecci√≥n de valores at√≠picos (m√©todo IQR)
        lower_bound = q1 - 1.5 * iqr
        upper_bound = q3 + 1.5 * iqr
        outliers = np.sum((ck_sacados < lower_bound) | (ck_sacados > upper_bound))
        pct_outliers = outliers / len(ck_sacados) * 100
        
        # 4.2 Z-score m√°ximo
        z_scores = np.abs(stats.zscore(ck_sacados))
        max_z = z_scores.max()
        
        # ===========================
        # 5. M√âTRICAS DE RANGO
        # ===========================
        
        rango = ck_sacados.max() - ck_sacados.min()
        rango_normalizado = rango / media if media > 0 else 0
        
        # ===========================
        # 6. SCORE GLOBAL DE FIABILIDAD
        # ===========================
        
        # Penalizaciones (0-100, menor = peor)
        score_cv = max(0, 100 - cv * 2)  # CV alto = mala
        score_consistencia = cerca_media  # M√°s cerca de media = mejor
        score_cambios = max(0, 100 - pct_cambios_bruscos * 2)  # Cambios bruscos = malo
        score_outliers = max(0, 100 - pct_outliers * 3)  # Outliers = malo
        score_iqr = max(0, 100 - iqr * 10)  # IQR grande = malo
        
        # Score final (promedio ponderado)
        score_fiabilidad = (
            score_cv * 0.25 +
            score_consistencia * 0.30 +
            score_cambios * 0.20 +
            score_outliers * 0.15 +
            score_iqr * 0.10
        )
        
        # ===========================
        # 7. CLASIFICACI√ìN MULTI-CRITERIO
        # ===========================
        
        # Clasificaci√≥n basada en score
        if score_fiabilidad >= 70:
            nivel = "EXCELENTE ‚≠ê‚≠ê‚≠ê"
            color = "#27ae60"
        elif score_fiabilidad >= 55:
            nivel = "BUENO ‚úÖ"
            color = "#2ecc71"
        elif score_fiabilidad >= 40:
            nivel = "ACEPTABLE üü°"
            color = "#f39c12"
        elif score_fiabilidad >= 25:
            nivel = "REGULAR ‚ö†Ô∏è"
            color = "#e67e22"
        else:
            nivel = "EVITAR ‚õî"
            color = "#e74c3c"
        
        resultados.append({
            'Equipo': equipo,
            'Partidos': len(ck_sacados),
            
            # Estad√≠sticas b√°sicas
            'Media_CK': round(media, 2),
            'Mediana_CK': round(q2, 2),
            'Std_CK': round(std, 2),
            'CV_%': round(cv, 1),
            
            # Consistencia
            'Pct_Cerca_Media': round(cerca_media, 1),
            'Cambios_Bruscos_%': round(pct_cambios_bruscos, 1),
            'IQR': round(iqr, 2),
            
            # Rango
            'Rango': int(rango),
            'Rango_Norm': round(rango_normalizado, 2),
            'Min': int(ck_sacados.min()),
            'Max': int(ck_sacados.max()),
            
            # Outliers
            'Outliers': int(outliers),
            'Pct_Outliers': round(pct_outliers, 1),
            'Max_ZScore': round(max_z, 2),
            
            # Tendencia
            'Tendencia_Slope': round(slope, 3),
            'Autocorr': round(autocorr, 3),
            
            # Score y clasificaci√≥n
            'Score_Fiabilidad': round(score_fiabilidad, 1),
            'Nivel': nivel,
            'Color': color
        })
    
    df_resultado = pd.DataFrame(resultados)

    print(df_resultado.head(10))
    df_resultado = df_resultado.sort_values('Score_Fiabilidad', ascending=False)
    
    return df_resultado


def mostrar_analisis_fiabilidad(df_analisis, top_n=10):
    """
    Muestra el an√°lisis completo de fiabilidad
    """
    
    print("\n" + "=" * 120)
    print("üéØ AN√ÅLISIS DE FIABILIDAD PARA APUESTAS - CORNERS")
    print("=" * 120)
    
    # TOP EQUIPOS FIABLES
    print(f"\n‚≠ê TOP {top_n} EQUIPOS M√ÅS FIABLES")
    print("-" * 120)
    
    top_fiables = df_analisis.head(top_n)
    
    for idx, row in top_fiables.iterrows():
        print(f"\n{row['Equipo']:25s} | {row['Nivel']:20s} | Score: {row['Score_Fiabilidad']:.1f}")
        print(f"  üìä Media: {row['Media_CK']:.1f} | Mediana: {row['Mediana_CK']:.1f} | CV: {row['CV_%']:.1f}%")
        print(f"  ‚úÖ {row['Pct_Cerca_Media']:.1f}% cerca de media | IQR: {row['IQR']:.1f}")
        print(f"  ‚ö†Ô∏è Cambios bruscos: {row['Cambios_Bruscos_%']:.1f}% | Outliers: {row['Pct_Outliers']:.1f}%")
        print(f"  üìà Rango: {row['Min']}-{row['Max']} ({row['Rango']} corners)")
    
    # TOP EQUIPOS NO FIABLES
    print(f"\n\n‚õî TOP {top_n} EQUIPOS MENOS FIABLES")
    print("-" * 120)
    
    top_no_fiables = df_analisis.tail(top_n)
    
    for idx, row in top_no_fiables.iterrows():
        print(f"\n{row['Equipo']:25s} | {row['Nivel']:20s} | Score: {row['Score_Fiabilidad']:.1f}")
        print(f"  üìä Media: {row['Media_CK']:.1f} | Mediana: {row['Mediana_CK']:.1f} | CV: {row['CV_%']:.1f}%")
        print(f"  ‚ùå Solo {row['Pct_Cerca_Media']:.1f}% cerca de media | IQR: {row['IQR']:.1f}")
        print(f"  ‚ö†Ô∏è Cambios bruscos: {row['Cambios_Bruscos_%']:.1f}% | Outliers: {row['Pct_Outliers']:.1f}%")
    
    # ESTAD√çSTICAS GENERALES
    print(f"\n\nüìä DISTRIBUCI√ìN POR NIVEL DE FIABILIDAD")
    print("-" * 120)
    print(df_analisis['Nivel'].value_counts())
    
    print(f"\nüìà ESTAD√çSTICAS DE SCORE:")
    print(f"  Media: {df_analisis['Score_Fiabilidad'].mean():.1f}")
    print(f"  Mediana: {df_analisis['Score_Fiabilidad'].median():.1f}")
    print(f"  Score m√°ximo: {df_analisis['Score_Fiabilidad'].max():.1f}")
    print(f"  Score m√≠nimo: {df_analisis['Score_Fiabilidad'].min():.1f}")


def obtener_fiabilidad_partido(local, visitante, df_analisis):
    """
    Eval√∫a la fiabilidad de un partido espec√≠fico
    """
    
    datos_local = df_analisis[df_analisis['Equipo'] == local]
    datos_away = df_analisis[df_analisis['Equipo'] == visitante]
    
    if datos_local.empty or datos_away.empty:
        return {
            'fiabilidad': 'DESCONOCIDO',
            'score': 0,
            'mensaje': '‚ö†Ô∏è Datos insuficientes'
        }
    
    score_local = datos_local['Score_Fiabilidad'].values[0]
    score_away = datos_away['Score_Fiabilidad'].values[0]
    score_promedio = (score_local + score_away) / 2
    
    # Clasificaci√≥n del partido
    if score_promedio >= 65:
        fiabilidad = "MUY ALTA ‚≠ê‚≠ê‚≠ê"
        mensaje = "‚úÖ EXCELENTE PARTIDO PARA APOSTAR"
    elif score_promedio >= 50:
        fiabilidad = "ALTA ‚úÖ"
        mensaje = "‚úÖ BUEN PARTIDO PARA APOSTAR"
    elif score_promedio >= 35:
        fiabilidad = "MEDIA üü°"
        mensaje = "üü° APOSTAR CON PRECAUCI√ìN"
    else:
        fiabilidad = "BAJA ‚õî"
        mensaje = "‚õî EVITAR APUESTA"
    
    return {
        'fiabilidad': fiabilidad,
        'score_local': score_local,
        'score_away': score_away,
        'score_promedio': score_promedio,
        'nivel_local': datos_local['Nivel'].values[0],
        'nivel_away': datos_away['Nivel'].values[0],
        'mensaje': mensaje,
        
        # Datos adicionales √∫tiles
        'cv_local': datos_local['CV_%'].values[0],
        'cv_away': datos_away['CV_%'].values[0],
        'consistencia_local': datos_local['Pct_Cerca_Media'].values[0],
        'consistencia_away': datos_away['Pct_Cerca_Media'].values[0]
    }


# ===========================
# EJECUTAR AN√ÅLISIS
# ===========================

df_fiabilidad = analizar_fiabilidad_equipos(df_database, temporada="2526", min_partidos=5)

mostrar_analisis_fiabilidad(df_fiabilidad, top_n=10)

# Guardar resultados
df_fiabilidad.to_csv('analisis_fiabilidad_equipos.csv', index=False)
print("\nüíæ Resultados guardados en 'analisis_fiabilidad_equipos.csv'")

# ===========================
# EJEMPLO DE USO
# ===========================

print("\n" + "=" * 120)
print("üéØ EJEMPLO: AN√ÅLISIS DE FIABILIDAD DE PARTIDO")
print("=" * 120)

fiabilidad = obtener_fiabilidad_partido("Barcelona", "Elche", df_fiabilidad)

print(f"\nüèüÔ∏è PARTIDO: Marseille vs Angers")
print(f"{'='*80}")
print(f"Local (Marseille): {fiabilidad['nivel_local']} (Score: {fiabilidad['score_local']:.1f})")
print(f"  ‚Ä¢ CV: {fiabilidad['cv_local']:.1f}% | Consistencia: {fiabilidad['consistencia_local']:.1f}%")
print(f"\nAway (Angers): {fiabilidad['nivel_away']} (Score: {fiabilidad['score_away']:.1f})")
print(f"  ‚Ä¢ CV: {fiabilidad['cv_away']:.1f}% | Consistencia: {fiabilidad['consistencia_away']:.1f}%")
print(f"\nüé≤ FIABILIDAD DEL PARTIDO: {fiabilidad['fiabilidad']}")
print(f"Score Promedio: {fiabilidad['score_promedio']:.1f}/100")
print(f"\nüí° {fiabilidad['mensaje']}")
print("=" * 80)

           Equipo  Partidos  Media_CK  Mediana_CK  Std_CK  CV_%  \
0         Arsenal        10      7.30         8.0    3.10  42.5   
1     Aston Villa        10      5.30         5.0    2.45  46.3   
2     Bournemouth         9      5.56         5.0    1.57  28.3   
3       Brentford        10      5.30         5.0    2.53  47.8   
4        Brighton        10      4.80         4.0    2.18  45.5   
5         Burnley        10      2.60         2.5    1.50  57.6   
6         Chelsea        10      6.10         5.5    2.34  38.4   
7  Crystal Palace        10      3.80         3.5    2.27  59.8   
8         Everton         9      4.11         3.0    2.96  72.0   
9          Fulham        10      5.10         3.5    2.77  54.4   

   Pct_Cerca_Media  Cambios_Bruscos_%   IQR  Rango  ...  Min  Max  Outliers  \
0             40.0               22.2  5.00     10  ...    2   12         0   
1             50.0               22.2  2.75      8  ...    2   10         0   
2             77.8       

In [4]:
def get_ck(df, season, round_num, local, away, league=None):
    """Obtiene corners totales de un partido espec√≠fico"""
    season_round = (df['season'] == season) & (df['round'] == round_num)
    
    if league is not None:
        season_round = season_round & (df['league'] == league)
    
    df = df[season_round]
    
    df_local = df[df['team'] == local]
    df_away = df[df['team'] == away]
    
    total_ck = df_local["Pass Types_CK"].sum() + df_away["Pass Types_CK"].sum()
    
    return total_ck

def get_dataframes(df, season, round_num, local, away, league=None):
    """Retorna 8 DataFrames filtrados por equipo, venue y liga"""
    
    season_round = (df['season'] == season) & (df['round'] < round_num)
    
    if league is not None:
        season_round = season_round & (df['league'] == league)
    
    def filter_and_split(team_filter):
        filtered = df[season_round & team_filter].copy()
        home = filtered[filtered['venue'] == "Home"]
        away = filtered[filtered['venue'] == "Away"]
        return home, away
    
    local_home, local_away = filter_and_split(df['team'] == local)
    local_opp_home, local_opp_away = filter_and_split(df['opponent'] == local)
    
    away_home, away_away = filter_and_split(df['team'] == away)
    away_opp_home, away_opp_away = filter_and_split(df['opponent'] == away)
    
    return (local_home, local_away, local_opp_home, local_opp_away,
            away_home, away_away, away_opp_home, away_opp_away)

def get_head_2_head(df, local, away, seasons=None, league=None):
    """Obtiene √∫ltimos 3 enfrentamientos directos"""
    if seasons is None:
        seasons = []
    
    df_filtered = df[df['season'].isin(seasons)] if seasons else df
    
    if league is not None:
        df_filtered = df_filtered[df_filtered['league'] == league]
    
    local_h2h = df_filtered[(df_filtered['team'] == local) & (df_filtered['opponent'] == away)]
    away_h2h = df_filtered[(df_filtered['team'] == away) & (df_filtered['opponent'] == local)]
    
    if len(local_h2h) < 4:
        return local_h2h.tail(2), away_h2h.tail(2)
    
    return local_h2h.tail(3), away_h2h.tail(3)

def get_points_from_result(result):
    """Convierte resultado (W/D/L) a puntos"""
    if result == 'W':
        return 3
    elif result == 'D':
        return 1
    else:
        return 0

# ‚úÖ NUEVA FUNCI√ìN: Calcular PPP (Puntos Por Partido)
def get_team_ppp(df, team, season, round_num, league=None):
    """
    Calcula puntos por partido (PPP) de un equipo
    
    Args:
        df: DataFrame completo
        team: Nombre del equipo
        season: Temporada
        round_num: N√∫mero de jornada (NO incluye esta jornada)
        league: C√≥digo de liga (opcional)
    
    Returns:
        float: Puntos por partido (0-3)
    """
    team_matches = df[
        (df['team'] == team) & 
        (df['season'] == season) & 
        (df['round'] < round_num)
    ]
    
    if league is not None:
        team_matches = team_matches[team_matches['league'] == league]
    
    if len(team_matches) == 0:
        return 0.0
    
    total_points = team_matches['result'].apply(get_points_from_result).sum()
    ppp = total_points / len(team_matches)
    
    return ppp

# ‚úÖ NUEVA FUNCI√ìN: Calcular diferencia de PPP
def get_ppp_difference(df, local, away, season, round_num, league=None):
    """
    Calcula la diferencia de puntos por partido entre local y visitante
    
    Args:
        df: DataFrame completo
        local: Equipo local
        away: Equipo visitante
        season: Temporada
        round_num: Jornada actual
        league: C√≥digo de liga (opcional)
    
    Returns:
        float: Diferencia de PPP (local - away)
    """
    local_ppp = get_team_ppp(df, local, season, round_num, league)
    away_ppp = get_team_ppp(df, away, season, round_num, league)
    
    return local_ppp - away_ppp

def get_average(df, is_team=False, lst_avg=None):
    """Calcula promedios de estad√≠sticas"""
    
    if len(df) == 0:
        # Retornar valores por defecto si el DataFrame est√° vac√≠o
        if is_team:
            return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
        return (0, 0, 0, 0, 0, 0, 0, 0)
    
    if is_team:
        # ===========================
        # ESTAD√çSTICAS B√ÅSICAS (NORMALIZADAS)
        # ===========================
        avg_cross = (df['Performance_Crs'].sum() / len(df)) - lst_avg[3]
        avg_att_3rd = (df['Touches_Att 3rd'].sum() / len(df)) - lst_avg[4]
        avg_sca = (df['SCA Types_SCA'].sum() / len(df)) - lst_avg[2]
        avg_xg = (df['Expected_xG'].sum() / len(df)) - lst_avg[1]
        
        # ‚úÖ CAMBIO: VARIANZA EN VEZ DE PROMEDIO DE CK
        var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0
        avg_ck = (df['Pass Types_CK'].sum() / len(df)) - lst_avg[8]
        
        avg_poss = (df['Poss'].sum() / len(df)) - 50
        avg_gf = (df['GF'].sum() / len(df)) - lst_avg[5]
        avg_ga = (df['GA'].sum() / len(df)) - lst_avg[6]
        
        # ===========================
        # M√âTRICAS OFENSIVAS AVANZADAS
        # ===========================
        
        # Precisi√≥n de tiros
        total_sh = df['Standard_Sh'].sum()
        sh_accuracy = (df['Standard_SoT'].sum() / total_sh) if total_sh > 0 else 0
        
        # Eficiencia xG por tiro
        xg_shot = (df['Expected_xG'].sum() / total_sh) if total_sh > 0 else 0
        
        # Presencia atacante (% toques en √°rea rival)
        total_touches = df['Touches_Touches'].sum()
        attacking_presence = (df['Touches_Att 3rd'].sum() / total_touches) if total_touches > 0 else 0
        
        # Tiros por posesi√≥n
        total_poss = df['Poss'].sum()
        possession_shot = (total_sh / total_poss) if total_poss > 0 else 0
        
        # Distancia promedio de tiros
        standard_dist = df['Standard_Dist'].mean() if 'Standard_Dist' in df.columns else 0
        
        # ===========================
        # M√âTRICAS DE CREACI√ìN
        # ===========================
        
        # Ratio de pases progresivos
        total_passes = df['Total_Att'].sum()
        progressive_pass_ratio = (df['PrgP'].sum() / total_passes) if total_passes > 0 else 0
        
        # Participaci√≥n en √∫ltimo tercio
        final_third_passes = df['1/3'].sum()
        final_third_involvement = (final_third_passes / total_passes) if total_passes > 0 else 0
        
        # Ratio de pases largos
        long_ball_ratio = (df['Long_Att'].sum() / total_passes) if total_passes > 0 else 0
        
        # Asistencias por SCA
        total_sca = df['SCA Types_SCA'].sum()
        assist_sca = (df['Ast'].sum() / total_sca) if total_sca > 0 else 0
        
        # Dependencia de centros
        cross_dependency = (df['Performance_Crs'].sum() / total_passes) if total_passes > 0 else 0
        
        # Eficiencia creativa
        creative_efficiency = (total_sca / total_poss) if total_poss > 0 else 0
        
        # ===========================
        # M√âTRICAS DEFENSIVAS
        # ===========================
        
        # Intensidad de presi√≥n alta
        total_tackles = df['Tackles_Tkl'].sum()
        high_press_intensity = (df['Tackles_Att 3rd'].sum() / total_tackles) if total_tackles > 0 else 0
        
        # Ratio intercepciones/tackles
        interception_tackle = (df['Int'].sum() / total_tackles) if total_tackles > 0 else 0
        
        # Ratio bloqueos/tackles
        blocks_tackle = (df['Blocks_Blocks'].sum() / total_tackles) if total_tackles > 0 else 0
        
        # Ratio de despejes
        total_defensive_actions = total_tackles + df['Int'].sum()
        clearance_ratio = (df['Clr'].sum() / total_defensive_actions) if total_defensive_actions > 0 else 0
        
        # ===========================
        # M√âTRICAS DE PORTER√çA
        # ===========================
        
        # Rendimiento del portero normalizado
        avg_save_pct = df['Performance_Save%'].mean() if 'Performance_Save%' in df.columns else 0
        avg_xg_against = df['Expected_xG'].mean() if len(df) > 0 else 1
        performance_save = (avg_save_pct / (1 / avg_xg_against)) if avg_xg_against > 0 else 0
        
        # ===========================
        # M√âTRICAS DE POSESI√ìN
        # ===========================
        
        # Ratio de conducciones progresivas
        total_carries = df['Carries_Carries'].sum()
        progressive_carry_ratio = (df['Carries_PrgC'].sum() / total_carries) if total_carries > 0 else 0
        
        # Ratio de conducciones al √°rea
        penalty_carry_ratio = (df['Carries_CPA'].sum() / total_carries) if total_carries > 0 else 0
        
        # Balance conducci√≥n/pase progresivo
        total_prog_passes = df['PrgP'].sum()
        carry_pass_balance = (df['Carries_PrgC'].sum() / total_prog_passes) if total_prog_passes > 0 else 0
        
        # ===========================
        # √çNDICES COMPUESTOS
        # ===========================
        
        # √çndice ofensivo
        avg_gf_raw = df['GF'].mean()
        avg_xg_raw = df['Expected_xG'].mean()
        avg_sot = df['Standard_SoT'].mean()
        avg_sh = df['Standard_Sh'].mean()
        offensive_index = (avg_gf_raw + avg_xg_raw) * (avg_sot / avg_sh) if avg_sh > 0 else 0
        
        # √çndice defensivo
        avg_int = df['Int'].mean()
        avg_tkl = df['Tackles_Tkl'].mean()
        avg_clr = df['Clr'].mean()
        defensive_index = avg_save_pct * (avg_int / (avg_tkl + avg_clr)) if (avg_tkl + avg_clr) > 0 else 0
        
        # √çndice de control de posesi√≥n
        avg_touches_att = df['Touches_Att 3rd'].mean()
        avg_carries_third = df['Carries_1/3'].mean() if 'Carries_1/3' in df.columns else 0
        avg_touches_total = df['Touches_Touches'].mean()
        possession_control_index = ((avg_touches_att + avg_carries_third) / avg_touches_total) if avg_touches_total > 0 else 0
        
        # √çndice de transici√≥n
        avg_prgp = df['PrgP'].mean()
        avg_prgc = df['Carries_PrgC'].mean()
        avg_poss_raw = df['Poss'].mean()
        transition_index = ((avg_prgp + avg_prgc) / avg_poss_raw) if avg_poss_raw > 0 else 0
        
        # ‚úÖ RETORNAR TODAS LAS M√âTRICAS (23 valores)
        return (
            avg_ck,
            var_ck,  # 0 - ‚úÖ CAMBIADO: varianza en vez de promedio
            avg_xg,  # 1
            avg_sca,  # 2
            avg_cross,  # 3
            avg_poss,  # 4
            avg_att_3rd,  # 5
            avg_gf,  # 6
            avg_ga,  # 7
            sh_accuracy,  # 8
            xg_shot,  # 9
            attacking_presence,  # 10
            possession_shot,  # 11
            progressive_pass_ratio,  # 12
            final_third_involvement,  # 13
            assist_sca,  # 14
            creative_efficiency,  # 15
            high_press_intensity,  # 16
            interception_tackle,  # 17
            clearance_ratio,  # 18
            progressive_carry_ratio,  # 19
            carry_pass_balance,  # 20
            offensive_index,  # 21
            transition_index  # 22
        )
    
    # ===========================
    # PROMEDIOS DE LIGA (is_team=False)
    # ===========================
    
    avg_cross = df['Performance_Crs'].mean()
    avg_att_3rd = df['Touches_Att 3rd'].mean()
    avg_sca = df['SCA Types_SCA'].mean()
    avg_xg = df['Expected_xG'].mean()
    
    # ‚úÖ CAMBIO: VARIANZA EN VEZ DE PROMEDIO DE CK
    var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0
    avg_ck = df['Pass Types_CK'].mean()
    
    avg_gf = df['GF'].mean()
    avg_ga = df['GA'].mean()
    
    # ‚úÖ AGREGAR M√âTRICAS B√ÅSICAS PARA NORMALIZACI√ìN
    avg_sh = df['Standard_Sh'].mean() if 'Standard_Sh' in df.columns else 0
    
    return (
        
        var_ck,  # 0 - ‚úÖ CAMBIADO
        avg_xg,  # 1
        avg_sca,  # 2
        avg_cross,  # 3
        avg_att_3rd,  # 4
        avg_gf,  # 5
        avg_ga,  # 6
        avg_sh,  # 7 - NUEVO
        avg_ck
    )

# ===========================
# PROCESAMIENTO DE DATOS CON PPP Y NUEVAS M√âTRICAS
# ===========================

lst_years = ["1819", "1920", "2021", "2122", "2223", "2324", "2425", "2526"]
lst_data = []
y = []

USE_ONE_HOT_ENCODING = True

print("\nüîÑ PROCESANDO DATOS CON PPP Y M√âTRICAS AVANZADAS...")
print("=" * 80)

for i in lst_1:
    
    if i[2] < 5:
        continue
    
    local = i[0]
    away = i[1]
    round_num = i[2]
    season = i[3]
    date = i[4]
    league_code = i[5]
    
    dic_df = {}
    
    # Promedios de liga
    lst_avg = get_average(
        df_database[
            (df_database['season'] == season) & 
            (df_database['round'] < round_num) &
            (df_database['league'] == league_code)
        ],
        is_team=False
    )
    
    # ‚úÖ FUNCI√ìN MEJORADA: Maneja m√©tricas originales y avanzadas
    def create_line(df, is_form=True, is_team=False, use_advanced=True):
        """
        Args:
            df: DataFrame con datos del equipo
            is_form: Si True, toma solo √∫ltimos 8 partidos
            is_team: Si True, normaliza contra promedios de liga
            use_advanced: Si True, incluye m√©tricas avanzadas (23 valores)
                         Si False, solo m√©tricas originales (8 valores)
        """
        if is_form:
            df = df[-6:]
        
        if use_advanced:
            # Retorna 23 valores (todas las m√©tricas)
            return get_average(df, is_team, lst_avg)
        else:
            # Retorna solo 8 valores originales
            result = get_average(df, is_team, lst_avg)
            return result[:9]  # Primeros 8 valores
    
    # Extraer DataFrames
    (team1_home, team1_away, team1_opp_home, team1_opp_away,
     team2_home, team2_away, team2_opp_home, team2_opp_away) = get_dataframes(
        df_database, season, round_num, local, away, league=league_code
    )
    
    # Corners reales
    ck = get_ck(df_database, season, round_num, local, away, league=league_code)
    y.append(ck)
    
    # Head to Head
    index = lst_years.index(season)
    result = lst_years[:index+1]
    team1_h2h, team2_h2h = get_head_2_head(
        df_database, local, away, seasons=result, league=league_code
    )
    
    # ‚úÖ PPP
    local_ppp = get_team_ppp(df_database, local, season, round_num, league=league_code)
    away_ppp = get_team_ppp(df_database, away, season, round_num, league=league_code)
    ppp_diff = local_ppp - away_ppp
    
    dic_df['ppp_local'] = (local_ppp,)
    dic_df['ppp_away'] = (away_ppp,)
    dic_df['ppp_difference'] = (ppp_diff,)
    
    # ‚úÖ FEATURES CON M√âTRICAS AVANZADAS (23 valores cada una)
    dic_df['lst_team1_home_form'] = create_line(team1_home, True, True, use_advanced=True)
    dic_df['lst_team1_home_general'] = create_line(team1_home, False, True, use_advanced=True)
    dic_df['lst_team1_away_form'] = create_line(team1_away, True, True, use_advanced=True)
    dic_df['lst_team1_away_general'] = create_line(team1_away, False, True, use_advanced=True)
    
    dic_df['lst_team2_home_form'] = create_line(team2_home, True, True, use_advanced=True)
    dic_df['lst_team2_home_general'] = create_line(team2_home, False, True, use_advanced=True)
    dic_df['lst_team2_away_form'] = create_line(team2_away, True, True, use_advanced=True)
    dic_df['lst_team2_away_general'] = create_line(team2_away, False, True, use_advanced=True)
    
    dic_df['lst_team1_h2h'] = create_line(team1_h2h, False, True, use_advanced=True)
    dic_df['lst_team2_h2h'] = create_line(team2_h2h, False, True, use_advanced=True)
    
    # ‚úÖ FEATURES CON M√âTRICAS ORIGINALES (8 valores) - SOLO PARA OPONENTES
    dic_df['lst_team1_opp_away'] = create_line(team1_opp_away, False, True, use_advanced=False)
    dic_df['lst_team2_opp_home'] = create_line(team2_opp_home, False, True, use_advanced=False)
    
    # One-Hot Encoding
    if USE_ONE_HOT_ENCODING:
        league_dummies = {
            'league_ESP': 1 if league_code == 'ESP' else 0,
            'league_GER': 1 if league_code == 'GER' else 0,
            'league_FRA': 1 if league_code == 'FRA' else 0,
            'league_ITA': 1 if league_code == 'ITA' else 0,
            'league_NED': 1 if league_code == 'NED' else 0
        }
        
        for key, value in league_dummies.items():
            dic_df[key] = (value,)
    
    # ‚úÖ CONSTRUIR VECTOR DE FEATURES CON NOMBRES DESCRIPTIVOS
    lst_base_advanced = [
        "avg_ck","var_ck",  # ‚úÖ CAMBIADO
        "xg", "sca", "cross", "poss", "att_3rd", "gf", "ga",
        "sh_accuracy", "xg_shot", "attacking_presence", "possession_shot",
        "progressive_pass_ratio", "final_third_involvement", "assist_sca", "creative_efficiency",
        "high_press_intensity", "interception_tackle", "clearance_ratio",
        "progressive_carry_ratio", "carry_pass_balance", "offensive_index", "transition_index"
    ]
    
    lst_base_original = [
        "var_ck","xg", "sca", "cross", "poss", "att_3rd", "gf", "ga","avg_ck"
    ]
    
    lst_features_values = []
    lst_features_names = []
    
    for key in dic_df:
        lst_features_values.extend(list(dic_df[key]))
        
        # Casos especiales
        if key in ['ppp_local', 'ppp_away', 'ppp_difference']:
            lst_features_names.append(key)
        elif key.startswith('league_'):
            lst_features_names.append(key)
        elif key in ['lst_team1_opp_away', 'lst_team2_opp_home']:
            # ‚úÖ M√©tricas ORIGINALES (8 valores)
            lst_features_names.extend([f"{key}_{col}" for col in lst_base_original])
        else:
            # ‚úÖ M√©tricas AVANZADAS (23 valores)
            lst_features_names.extend([f"{key}_{col}" for col in lst_base_advanced])
    
    lst_data.append(lst_features_values)

# ===========================
# CREAR DATAFRAME FINAL
# ===========================

df_data = pd.DataFrame(data=lst_data, columns=lst_features_names)

print(f"\n‚úÖ PROCESAMIENTO COMPLETADO:")
print(f"   Shape inicial: {df_data.shape}")
print(f"   Total partidos: {len(df_data)}")
print(f"   Features totales: {df_data.shape[1]}")

# ===========================
# LIMPIEZA DE DATOS NULOS
# ===========================

print(f"\nüßπ LIMPIANDO DATOS NULOS...")

import numpy as np
nulos_antes_X = df_data.isnull().sum().sum()
nulos_antes_y = np.isnan(y).sum() if isinstance(y, np.ndarray) else sum(pd.isna(y))

print(f"   Nulos en X (antes): {nulos_antes_X}")
print(f"   Nulos en Y (antes): {nulos_antes_y}")

y_array = np.array(y).flatten()

mask_valid_X = ~df_data.isnull().any(axis=1)
mask_valid_y = ~np.isnan(y_array)
mask_combined = mask_valid_X & mask_valid_y

df_data = df_data[mask_combined].reset_index(drop=True)
y_array = y_array[mask_combined]

print(f"\n‚úÖ LIMPIEZA COMPLETADA:")
print(f"   Nulos en X (despu√©s): {df_data.isnull().sum().sum()}")
print(f"   Nulos en Y (despu√©s): {np.isnan(y_array).sum()}")
print(f"   Filas eliminadas: {len(mask_combined) - mask_combined.sum()}")
print(f"   Shape final: {df_data.shape}")

# ===========================
# VERIFICACI√ìN FINAL
# ===========================

print(f"\nüîç VERIFICACI√ìN DE NUEVAS FEATURES:")
print(f"   ‚úÖ Features con 'var_ck': {len([c for c in df_data.columns if 'var_ck' in c])}")
print(f"   ‚úÖ Features con m√©tricas avanzadas: {len([c for c in df_data.columns if any(m in c for m in ['sh_accuracy', 'offensive_index'])])}")
print(f"   ‚úÖ Features de oponentes (8 valores): {len([c for c in df_data.columns if 'opp' in c])}")

y = y_array.tolist()

print("\n" + "=" * 80)
print("‚úÖ PROCESO COMPLETADO - DATOS LISTOS PARA ENTRENAMIENTO")
print("=" * 80)


üîÑ PROCESANDO DATOS CON PPP Y M√âTRICAS AVANZADAS...

‚úÖ PROCESAMIENTO COMPLETADO:
   Shape inicial: (11419, 266)
   Total partidos: 11419
   Features totales: 266

üßπ LIMPIANDO DATOS NULOS...
   Nulos en X (antes): 3133
   Nulos en Y (antes): 0

‚úÖ LIMPIEZA COMPLETADA:
   Nulos en X (despu√©s): 0
   Nulos en Y (despu√©s): 0
   Filas eliminadas: 390
   Shape final: (11029, 266)

üîç VERIFICACI√ìN DE NUEVAS FEATURES:
   ‚úÖ Features con 'var_ck': 12
   ‚úÖ Features con m√©tricas avanzadas: 20
   ‚úÖ Features de oponentes (8 valores): 18

‚úÖ PROCESO COMPLETADO - DATOS LISTOS PARA ENTRENAMIENTO


In [1]:
import pandas as pd
df_database = pd.read_csv("data/database_4_ligas.csv")
df_database["season"] = df_database["season"].astype(str)
df_database_export = df_database.copy()
df_database_export = df_database_export.drop_duplicates(subset=["game", "league"])
df_database_export = df_database_export[["local", "away", "round", "season", "date", "league"]]

lst_1 = df_database_export.values.tolist()

# Excluir temporada 1718 si es necesario
lst_1 = [row for row in lst_1 if row[3] != "1718"]

In [22]:
lst_1[0][3]

'1819'

In [7]:
partidos = [
    ("Elche", "Real Sociedad"),
    ("Girona", "Alav√©s"),
    ("Sevilla", "Osasuna"),
    ("Atl√©tico Madrid", "Levante"),
    ("Espanyol", "Villarreal"),
    ("Athletic Club", "Oviedo"),
    ("Rayo Vallecano", "Real Madrid"),
    ("Mallorca", "Getafe"),
    ("Valencia", "Betis"),
    ("Barcelona", "Celta Vigo")
]

df_esp = predecir_partidos_batch(
    partidos=partidos,
    jornada=12,
    temporada="2526",
    league_code="ESP",
    export_csv=True,
    filename="predicciones_ESP_J122.csv"
)

# Mostrar resumen
mostrar_resumen_batch(df_esp)
    



üéØ PROCESANDO 10 PARTIDOS - ESP | J12 | Temporada 2526

[1/10] Procesando: Elche vs Real Sociedad...

üèüÔ∏è  Elche vs Real Sociedad
üìÖ Temporada 2526 | Jornada 12 | Liga: ESP

üé≤ PREDICCI√ìN MODELO: 7.96 corners totales
   PPP: Elche (1.40) vs Real Sociedad (1.09) | Diff: +0.31

üìä ESTAD√çSTICAS HIST√ìRICAS:
   Elche (Casa): 4.2 CK/partido | xG: 1.54 | Poss: 64.8%
   Real Sociedad (Fuera): 6.6 CK/partido | xG: 1.02 | Poss: 47.4%
   Corners recibidos: Elche (6.4) | Real Sociedad (4.0)
   Total esperado (suma): 10.8 corners

üîÑ HEAD TO HEAD (√∫ltimos 3 partidos):
   Elche: 2.3 CK/partido
   Real Sociedad: 6.0 CK/partido
   Promedio total: 8.3 corners

üìà PROBABILIDADES EXACTAS (Poisson):
    3 corners:  2.93% ‚ñà
    4 corners:  5.83% ‚ñà‚ñà
    5 corners:  9.28% ‚ñà‚ñà‚ñà‚ñà
    6 corners: 12.32% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
    7 corners: 14.02% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà ‚≠ê
    8 corners: 13.96% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
    9 corners: 12.35% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
   10 corners:  9.84% ‚ñà‚ñà‚

Traceback (most recent call last):
  File "c:\Users\Danie\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\construction.py", line 939, in _finalize_columns_and_data
    columns = _validate_or_indexify_columns(contents, columns)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Danie\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\construction.py", line 986, in _validate_or_indexify_columns
    raise AssertionError(
AssertionError: 266 columns passed, passed data had 264 columns

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\Danie\AppData\Local\Temp\ipykernel_21064\4190260364.py", line 382, in predecir_corners
    df_input = pd.DataFrame([lst_features_values], columns=lst_features_names)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Danie\AppData\Local\Programs\Python\Pyt


üé≤ PREDICCI√ìN MODELO: 9.26 corners totales
   PPP: Rayo Vallecano (1.27) vs Real Madrid (2.73) | Diff: -1.45

üìä ESTAD√çSTICAS HIST√ìRICAS:
   Rayo Vallecano (Casa): 8.2 CK/partido | xG: 1.48 | Poss: 54.8%
   Real Madrid (Fuera): 5.2 CK/partido | xG: 2.40 | Poss: 60.2%
   Corners recibidos: Rayo Vallecano (5.3) | Real Madrid (2.2)
   Total esperado (suma): 13.4 corners

üîÑ HEAD TO HEAD (√∫ltimos 3 partidos):
   Rayo Vallecano: 4.7 CK/partido
   Real Madrid: 5.7 CK/partido
   Promedio total: 10.3 corners

üìà PROBABILIDADES EXACTAS (Poisson):
    4 corners:  2.92% ‚ñà
    5 corners:  5.41% ‚ñà‚ñà
    6 corners:  8.34% ‚ñà‚ñà‚ñà‚ñà
    7 corners: 11.03% ‚ñà‚ñà‚ñà‚ñà‚ñà
    8 corners: 12.76% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
    9 corners: 13.13% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà ‚≠ê
   10 corners: 12.15% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
   11 corners: 10.23% ‚ñà‚ñà‚ñà‚ñà‚ñà
   12 corners:  7.89% ‚ñà‚ñà‚ñà
   13 corners:  5.62% ‚ñà‚ñà
   14 corners:  3.71% ‚ñà

‚úÖ Valor m√°s probable: 9 corners (13.13%)
üìä Rango 80% con

In [42]:
def kelly_fraction(p, odds, fraction=0.2):
    b = odds - 1
    q = 1 - p
    f_star = (b * p - q) / b
    f_star = max(f_star, 0)  # evita negativos
    return f_star * fraction  # usa 0.1 para Kelly 10%

kelly_fraction(.60, 1.83, fraction=0.2)


0.023614457831325295

In [8]:
partidos = [
    ("Werder Bremen", "Wolfsburg"),
    ("Hoffenheim", "RB Leipzig"),
    ("Leverkusen", "Heidenheim"),
    ("Hamburger SV", "Dortmund"),
    ("Union Berlin", "Bayern"),
    ("Gladbach", "K√∂ln"),
    ("Freiburg", "St. Pauli"),
    ("Stuttgart", "Augsburg"),
    ("Eint Frankfurt", "Mainz 05")

]

df_ger = predecir_partidos_batch(
    partidos=partidos,
    jornada=10,
    temporada="2526",
    league_code="GER",
    export_csv=True,
    filename="predicciones_GER_J10.csv"
)

# Mostrar resumen
mostrar_resumen_batch(df_ger)


üéØ PROCESANDO 9 PARTIDOS - GER | J10 | Temporada 2526

[1/9] Procesando: Werder Bremen vs Wolfsburg...

üèüÔ∏è  Werder Bremen vs Wolfsburg
üìÖ Temporada 2526 | Jornada 10 | Liga: GER

üé≤ PREDICCI√ìN MODELO: 9.22 corners totales
   PPP: Werder Bremen (1.33) vs Wolfsburg (1.00) | Diff: +0.33

üìä ESTAD√çSTICAS HIST√ìRICAS:
   Werder Bremen (Casa): 3.5 CK/partido | xG: 1.65 | Poss: 51.0%
   Wolfsburg (Fuera): 3.5 CK/partido | xG: 1.20 | Poss: 46.8%
   Corners recibidos: Werder Bremen (7.4) | Wolfsburg (8.2)
   Total esperado (suma): 7.0 corners

üîÑ HEAD TO HEAD (√∫ltimos 3 partidos):
   Werder Bremen: 5.0 CK/partido
   Wolfsburg: 4.0 CK/partido
   Promedio total: 9.0 corners

üìà PROBABILIDADES EXACTAS (Poisson):
    4 corners:  2.98% ‚ñà
    5 corners:  5.49% ‚ñà‚ñà
    6 corners:  8.44% ‚ñà‚ñà‚ñà‚ñà
    7 corners: 11.12% ‚ñà‚ñà‚ñà‚ñà‚ñà
    8 corners: 12.82% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
    9 corners: 13.14% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà ‚≠ê
   10 corners: 12.12% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
   11 corners:

Traceback (most recent call last):
  File "c:\Users\Danie\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\construction.py", line 939, in _finalize_columns_and_data
    columns = _validate_or_indexify_columns(contents, columns)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Danie\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\construction.py", line 986, in _validate_or_indexify_columns
    raise AssertionError(
AssertionError: 266 columns passed, passed data had 264 columns

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\Danie\AppData\Local\Temp\ipykernel_21064\4190260364.py", line 382, in predecir_corners
    df_input = pd.DataFrame([lst_features_values], columns=lst_features_names)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Danie\AppData\Local\Programs\Python\Pyt


‚ùå ERROR: 266 columns passed, passed data had 260 columns
   ‚ùå Error: 266 columns passed, passed data had 260 columns

[6/9] Procesando: Gladbach vs K√∂ln...

üèüÔ∏è  Gladbach vs K√∂ln
üìÖ Temporada 2526 | Jornada 10 | Liga: GER

üé≤ PREDICCI√ìN MODELO: 11.19 corners totales
   PPP: Gladbach (0.67) vs K√∂ln (1.38) | Diff: -0.71

üìä ESTAD√çSTICAS HIST√ìRICAS:
   Gladbach (Casa): 5.0 CK/partido | xG: 1.12 | Poss: 45.0%
   K√∂ln (Fuera): 4.0 CK/partido | xG: 1.28 | Poss: 43.4%
   Corners recibidos: Gladbach (5.8) | K√∂ln (5.0)
   Total esperado (suma): 9.0 corners

üîÑ HEAD TO HEAD (√∫ltimos 3 partidos):
   Gladbach: 4.0 CK/partido
   K√∂ln: 7.3 CK/partido
   Promedio total: 11.3 corners

üìà PROBABILIDADES EXACTAS (Poisson):
    6 corners:  3.76% ‚ñà
    7 corners:  6.01% ‚ñà‚ñà‚ñà
    8 corners:  8.41% ‚ñà‚ñà‚ñà‚ñà
    9 corners: 10.46% ‚ñà‚ñà‚ñà‚ñà‚ñà
   10 corners: 11.71% ‚ñà‚ñà‚ñà‚ñà‚ñà
   11 corners: 11.92% ‚ñà‚ñà‚ñà‚ñà‚ñà ‚≠ê
   12 corners: 11.12% ‚ñà‚ñà‚ñà‚ñà‚ñà
   13 c

In [14]:
partidos = [
    ("Como", "Cagliari"),
    ("Lecce", "Hellas Verona"),
    ("Juventus", "Torino"),
    ("Parma", "Milan"),
    ("Atalanta", "Sassuolo"),
    ("Bologna", "Napoli"),
    ("Genoa", "Fiorentina"),
    ("Roma", "Udinese"),
    ("Inter", "Lazio")
]

df_ita = predecir_partidos_batch(
    partidos=partidos,
    jornada=11,
    temporada="2526",
    league_code="ITA",
    export_csv=True,
    filename="predicciones_ita_J12.csv"
)

# Mostrar resumen
mostrar_resumen_batch(df_ita)
    


üéØ PROCESANDO 9 PARTIDOS - ITA | J11 | Temporada 2526

[1/9] Procesando: Como vs Cagliari...

üèüÔ∏è  Como vs Cagliari
üìÖ Temporada 2526 | Jornada 11 | Liga: ITA

üé≤ PREDICCI√ìN MODELO: 9.33 corners totales
   PPP: Como (1.70) vs Cagliari (1.00) | Diff: +0.70

üìä ESTAD√çSTICAS HIST√ìRICAS:
   Como (Casa): 3.0 CK/partido | xG: 1.48 | Poss: 57.6%
   Cagliari (Fuera): 2.8 CK/partido | xG: 1.00 | Poss: 45.5%
   Corners recibidos: Como (4.8) | Cagliari (5.0)
   Total esperado (suma): 5.8 corners

üîÑ HEAD TO HEAD (√∫ltimos 2 partidos):
   Como: 5.5 CK/partido
   Cagliari: 3.0 CK/partido
   Promedio total: 8.5 corners

üìà PROBABILIDADES EXACTAS (Poisson):
    4 corners:  2.80% ‚ñà
    5 corners:  5.22% ‚ñà‚ñà
    6 corners:  8.12% ‚ñà‚ñà‚ñà‚ñà
    7 corners: 10.82% ‚ñà‚ñà‚ñà‚ñà‚ñà
    8 corners: 12.63% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
    9 corners: 13.10% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà ‚≠ê
   10 corners: 12.22% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
   11 corners: 10.37% ‚ñà‚ñà‚ñà‚ñà‚ñà
   12 corners:  8.07% ‚ñà‚ñà‚ñà‚ñà
 

In [12]:
partidos = [
    ("Twente", "Telstar"),
    ("Excelsior", "Heracles Almelo"),
    ("Volendam", "NAC Breda"),
    ("Fortuna Sittard", "Heerenveen"),
    ("Zwolle", "Sparta R'dam"),
    ("Utrecht", "Ajax"),
    ("NEC Nijmegen", "Groningen"),
    ("AZ Alkmaar", "PSV Eindhoven"),
    ("Go Ahead Eag", "Feyenoord")
]

df_ned = predecir_partidos_batch(
    partidos=partidos,
    jornada=12,
    temporada="2526",
    league_code="NED",
    export_csv=True,
    filename="predicciones_NED_J12.csv"
)

# Mostrar resumen
mostrar_resumen_batch(df_ned)


üéØ PROCESANDO 9 PARTIDOS - NED | J12 | Temporada 2526

[1/9] Procesando: Twente vs Telstar...

üèüÔ∏è  Twente vs Telstar
üìÖ Temporada 2526 | Jornada 12 | Liga: NED

‚ùå ERROR: 266 columns passed, passed data had 264 columns
   ‚ùå Error: 266 columns passed, passed data had 264 columns

[2/9] Procesando: Excelsior vs Heracles Almelo...

üèüÔ∏è  Excelsior vs Heracles Almelo
üìÖ Temporada 2526 | Jornada 12 | Liga: NED

üé≤ PREDICCI√ìN MODELO: 10.66 corners totales
   PPP: Excelsior (0.91) vs Heracles Almelo (0.30) | Diff: +0.61

üìä ESTAD√çSTICAS HIST√ìRICAS:
   Excelsior (Casa): 4.0 CK/partido | xG: 1.26 | Poss: 43.6%
   Heracles Almelo (Fuera): 3.3 CK/partido | xG: 0.73 | Poss: 43.2%
   Corners recibidos: Excelsior (5.5) | Heracles Almelo (7.0)
   Total esperado (suma): 7.3 corners

üîÑ HEAD TO HEAD (√∫ltimos 3 partidos):
   Excelsior: 4.7 CK/partido
   Heracles Almelo: 5.3 CK/partido
   Promedio total: 10.0 corners

üìà PROBABILIDADES EXACTAS (Poisson):
    6 corners:  4.77

Traceback (most recent call last):
  File "c:\Users\Danie\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\construction.py", line 939, in _finalize_columns_and_data
    columns = _validate_or_indexify_columns(contents, columns)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Danie\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\construction.py", line 986, in _validate_or_indexify_columns
    raise AssertionError(
AssertionError: 266 columns passed, passed data had 264 columns

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\Danie\AppData\Local\Temp\ipykernel_24988\4190260364.py", line 382, in predecir_corners
    df_input = pd.DataFrame([lst_features_values], columns=lst_features_names)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Danie\AppData\Local\Programs\Python\Pyt

           Equipo  Partidos  Media_CK  Mediana_CK  Std_CK  CV_%  \
0         Arsenal        10      7.30         8.0    3.10  42.5   
1     Aston Villa        10      5.30         5.0    2.45  46.3   
2     Bournemouth         9      5.56         5.0    1.57  28.3   
3       Brentford        10      5.30         5.0    2.53  47.8   
4        Brighton        10      4.80         4.0    2.18  45.5   
5         Burnley        10      2.60         2.5    1.50  57.6   
6         Chelsea        10      6.10         5.5    2.34  38.4   
7  Crystal Palace        10      3.80         3.5    2.27  59.8   
8         Everton         9      4.11         3.0    2.96  72.0   
9          Fulham        10      5.10         3.5    2.77  54.4   

   Pct_Cerca_Media  Cambios_Bruscos_%   IQR  Rango  ...  Min  Max  Outliers  \
0             40.0               22.2  5.00     10  ...    2   12         0   
1             50.0               22.2  2.75      8  ...    2   10         0   
2             77.8       

Traceback (most recent call last):
  File "c:\Users\Danie\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\construction.py", line 939, in _finalize_columns_and_data
    columns = _validate_or_indexify_columns(contents, columns)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Danie\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\construction.py", line 986, in _validate_or_indexify_columns
    raise AssertionError(
AssertionError: 266 columns passed, passed data had 264 columns

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\Danie\AppData\Local\Temp\ipykernel_24988\4190260364.py", line 382, in predecir_corners
    df_input = pd.DataFrame([lst_features_values], columns=lst_features_names)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Danie\AppData\Local\Programs\Python\Pyt

           Equipo  Partidos  Media_CK  Mediana_CK  Std_CK  CV_%  \
0         Arsenal        10      7.30         8.0    3.10  42.5   
1     Aston Villa        10      5.30         5.0    2.45  46.3   
2     Bournemouth         9      5.56         5.0    1.57  28.3   
3       Brentford        10      5.30         5.0    2.53  47.8   
4        Brighton        10      4.80         4.0    2.18  45.5   
5         Burnley        10      2.60         2.5    1.50  57.6   
6         Chelsea        10      6.10         5.5    2.34  38.4   
7  Crystal Palace        10      3.80         3.5    2.27  59.8   
8         Everton         9      4.11         3.0    2.96  72.0   
9          Fulham        10      5.10         3.5    2.77  54.4   

   Pct_Cerca_Media  Cambios_Bruscos_%   IQR  Rango  ...  Min  Max  Outliers  \
0             40.0               22.2  5.00     10  ...    2   12         0   
1             50.0               22.2  2.75      8  ...    2   10         0   
2             77.8       

In [11]:
partidos = [
    ("Paris FC", "Rennes"),
    ("Marseille", "Brest"),
    ("Le Havre", "Nantes"),
    ("Monaco", "Lens"),
    ("Lorient", "Toulouse"),
    ("Metz", "Nice"),
    ("Angers", "Auxerre"),
    ("Strasbourg", "Lille"),
    ("Lyon", "G")
]

df_fra = predecir_partidos_batch(
    partidos=partidos,
    jornada=12,
    temporada="2526",
    league_code="FRA",
    export_csv=True,
    filename="predicciones_FRA_J12.csv"
)

# Mostrar resumen
mostrar_resumen_batch(df_fra)



üéØ PROCESANDO 9 PARTIDOS - FRA | J12 | Temporada 2526

[1/9] Procesando: Paris FC vs Rennes...

üèüÔ∏è  Paris FC vs Rennes
üìÖ Temporada 2526 | Jornada 12 | Liga: FRA

‚ùå ERROR: 266 columns passed, passed data had 264 columns
   ‚ùå Error: 266 columns passed, passed data had 264 columns

[2/9] Procesando: Marseille vs Brest...

üèüÔ∏è  Marseille vs Brest
üìÖ Temporada 2526 | Jornada 12 | Liga: FRA

üé≤ PREDICCI√ìN MODELO: 7.40 corners totales
   PPP: Marseille (2.00) vs Brest (0.90) | Diff: +1.10

üìä ESTAD√çSTICAS HIST√ìRICAS:
   Marseille (Casa): 4.6 CK/partido | xG: 2.22 | Poss: 59.2%
   Brest (Fuera): 3.4 CK/partido | xG: 1.14 | Poss: 46.4%
   Corners recibidos: Marseille (4.3) | Brest (4.0)
   Total esperado (suma): 8.0 corners

üîÑ HEAD TO HEAD (√∫ltimos 3 partidos):
   Marseille: 2.7 CK/partido
   Brest: 4.7 CK/partido
   Promedio total: 7.3 corners

üìà PROBABILIDADES EXACTAS (Poisson):
    2 corners:  1.67% 
    3 corners:  4.13% ‚ñà‚ñà
    4 corners:  7.64% ‚ñà‚ñà

Traceback (most recent call last):
  File "c:\Users\Danie\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\construction.py", line 939, in _finalize_columns_and_data
    columns = _validate_or_indexify_columns(contents, columns)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Danie\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\construction.py", line 986, in _validate_or_indexify_columns
    raise AssertionError(
AssertionError: 266 columns passed, passed data had 264 columns

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\Danie\AppData\Local\Temp\ipykernel_24988\4190260364.py", line 382, in predecir_corners
    df_input = pd.DataFrame([lst_features_values], columns=lst_features_names)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Danie\AppData\Local\Programs\Python\Pyt

           Equipo  Partidos  Media_CK  Mediana_CK  Std_CK  CV_%  \
0         Arsenal        10      7.30         8.0    3.10  42.5   
1     Aston Villa        10      5.30         5.0    2.45  46.3   
2     Bournemouth         9      5.56         5.0    1.57  28.3   
3       Brentford        10      5.30         5.0    2.53  47.8   
4        Brighton        10      4.80         4.0    2.18  45.5   
5         Burnley        10      2.60         2.5    1.50  57.6   
6         Chelsea        10      6.10         5.5    2.34  38.4   
7  Crystal Palace        10      3.80         3.5    2.27  59.8   
8         Everton         9      4.11         3.0    2.96  72.0   
9          Fulham        10      5.10         3.5    2.77  54.4   

   Pct_Cerca_Media  Cambios_Bruscos_%   IQR  Rango  ...  Min  Max  Outliers  \
0             40.0               22.2  5.00     10  ...    2   12         0   
1             50.0               22.2  2.75      8  ...    2   10         0   
2             77.8       

Traceback (most recent call last):
  File "c:\Users\Danie\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\construction.py", line 939, in _finalize_columns_and_data
    columns = _validate_or_indexify_columns(contents, columns)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Danie\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\construction.py", line 986, in _validate_or_indexify_columns
    raise AssertionError(
AssertionError: 266 columns passed, passed data had 260 columns

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\Danie\AppData\Local\Temp\ipykernel_24988\4190260364.py", line 382, in predecir_corners
    df_input = pd.DataFrame([lst_features_values], columns=lst_features_names)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Danie\AppData\Local\Programs\Python\Pyt

In [9]:
partidos = [
    ("Tottenham", "Manchester Utd"),
    ("Everton", "Fullham"),
    ("West Ham", "Burnley"),
    ("Sunderland", "Arsenal"),
    ("Chelsea", "Wolves"),
    ("Crystal Palace", "Brighton"),
    ("Aston Villa", "Bournemouth"),
    ("Brentford", "Newcastle Utd"),
    ("Nott'ham Forest", "Leeds United"),
    ("Manchester City", "Liverpool")
]

df_eng = predecir_partidos_batch(
    partidos=partidos,
    jornada=12,
    temporada="2526",
    league_code="ENG",
    export_csv=True,
    filename="predicciones_ENG_J12.csv"
)

# Mostrar resumen
mostrar_resumen_batch(df_eng)


üéØ PROCESANDO 10 PARTIDOS - ENG | J12 | Temporada 2526

[1/10] Procesando: Tottenham vs Manchester Utd...

üèüÔ∏è  Tottenham vs Manchester Utd
üìÖ Temporada 2526 | Jornada 12 | Liga: ENG

üé≤ PREDICCI√ìN MODELO: 13.55 corners totales
   PPP: Tottenham (1.70) vs Manchester Utd (1.70) | Diff: +0.00

üìä ESTAD√çSTICAS HIST√ìRICAS:
   Tottenham (Casa): 5.6 CK/partido | xG: 0.86 | Poss: 55.8%
   Manchester Utd (Fuera): 4.2 CK/partido | xG: 1.46 | Poss: 50.0%
   Corners recibidos: Tottenham (4.8) | Manchester Utd (3.8)
   Total esperado (suma): 9.8 corners

üîÑ HEAD TO HEAD (√∫ltimos 3 partidos):
   Tottenham: 8.7 CK/partido
   Manchester Utd: 6.0 CK/partido
   Promedio total: 14.7 corners

üìà PROBABILIDADES EXACTAS (Poisson):
    9 corners:  5.54% ‚ñà‚ñà
   10 corners:  7.51% ‚ñà‚ñà‚ñà
   11 corners:  9.24% ‚ñà‚ñà‚ñà‚ñà
   12 corners: 10.43% ‚ñà‚ñà‚ñà‚ñà‚ñà
   13 corners: 10.87% ‚ñà‚ñà‚ñà‚ñà‚ñà ‚≠ê
   14 corners: 10.52% ‚ñà‚ñà‚ñà‚ñà‚ñà
   15 corners:  9.50% ‚ñà‚ñà‚ñà‚ñà
   16 corn

Traceback (most recent call last):
  File "c:\Users\Danie\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\construction.py", line 939, in _finalize_columns_and_data
    columns = _validate_or_indexify_columns(contents, columns)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Danie\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\construction.py", line 986, in _validate_or_indexify_columns
    raise AssertionError(
AssertionError: 266 columns passed, passed data had 260 columns

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\Danie\AppData\Local\Temp\ipykernel_24988\4190260364.py", line 382, in predecir_corners
    df_input = pd.DataFrame([lst_features_values], columns=lst_features_names)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Danie\AppData\Local\Programs\Python\Pyt


‚ùå ERROR: 266 columns passed, passed data had 264 columns
   ‚ùå Error: 266 columns passed, passed data had 264 columns

[5/10] Procesando: Chelsea vs Wolves...

üèüÔ∏è  Chelsea vs Wolves
üìÖ Temporada 2526 | Jornada 12 | Liga: ENG

üé≤ PREDICCI√ìN MODELO: 10.91 corners totales
   PPP: Chelsea (1.70) vs Wolves (0.20) | Diff: +1.50

üìä ESTAD√çSTICAS HIST√ìRICAS:
   Chelsea (Casa): 7.6 CK/partido | xG: 1.48 | Poss: 61.0%
   Wolves (Fuera): 3.8 CK/partido | xG: 0.62 | Poss: 45.6%
   Corners recibidos: Chelsea (5.6) | Wolves (3.6)
   Total esperado (suma): 11.4 corners

üîÑ HEAD TO HEAD (√∫ltimos 3 partidos):
   Chelsea: 6.0 CK/partido
   Wolves: 4.0 CK/partido
   Promedio total: 10.0 corners

üìà PROBABILIDADES EXACTAS (Poisson):
    6 corners:  4.28% ‚ñà‚ñà
    7 corners:  6.67% ‚ñà‚ñà‚ñà
    8 corners:  9.10% ‚ñà‚ñà‚ñà‚ñà
    9 corners: 11.03% ‚ñà‚ñà‚ñà‚ñà‚ñà
   10 corners: 12.03% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà ‚≠ê
   11 corners: 11.93% ‚ñà‚ñà‚ñà‚ñà‚ñà
   12 corners: 10.85% ‚ñà‚ñà‚ñà‚ñà‚ñà

Traceback (most recent call last):
  File "c:\Users\Danie\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\construction.py", line 939, in _finalize_columns_and_data
    columns = _validate_or_indexify_columns(contents, columns)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Danie\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\core\internals\construction.py", line 986, in _validate_or_indexify_columns
    raise AssertionError(
AssertionError: 266 columns passed, passed data had 264 columns

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\Danie\AppData\Local\Temp\ipykernel_24988\4190260364.py", line 382, in predecir_corners
    df_input = pd.DataFrame([lst_features_values], columns=lst_features_names)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Danie\AppData\Local\Programs\Python\Pyt


üé≤ PREDICCI√ìN MODELO: 7.59 corners totales
   PPP: Crystal Palace (1.60) vs Brighton (1.50) | Diff: +0.10

üìä ESTAD√çSTICAS HIST√ìRICAS:
   Crystal Palace (Casa): 4.0 CK/partido | xG: 2.18 | Poss: 43.2%
   Brighton (Fuera): 5.4 CK/partido | xG: 1.46 | Poss: 52.6%
   Corners recibidos: Crystal Palace (7.0) | Brighton (5.2)
   Total esperado (suma): 9.4 corners

üîÑ HEAD TO HEAD (√∫ltimos 3 partidos):
   Crystal Palace: 3.0 CK/partido
   Brighton: 5.0 CK/partido
   Promedio total: 8.0 corners

üìà PROBABILIDADES EXACTAS (Poisson):
    3 corners:  3.69% ‚ñà
    4 corners:  7.00% ‚ñà‚ñà‚ñà
    5 corners: 10.62% ‚ñà‚ñà‚ñà‚ñà‚ñà
    6 corners: 13.43% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
    7 corners: 14.56% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà ‚≠ê
    8 corners: 13.81% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
    9 corners: 11.64% ‚ñà‚ñà‚ñà‚ñà‚ñà
   10 corners:  8.83% ‚ñà‚ñà‚ñà‚ñà
   11 corners:  6.09% ‚ñà‚ñà‚ñà
   12 corners:  3.85% ‚ñà
   13 corners:  2.25% ‚ñà

‚úÖ Valor m√°s probable: 7 corners (14.56%)
üìä Rango 80% confianza: 4-1