# Premier League V4.5: Re-Optimizing for Draws

A accuracy baixou porque mud√°mos as regras do jogo (pesos) mas mantivemos a estrat√©gia antiga.
Nesta etapa, vamos correr o **Grid Search** novamente, mas desta vez informando o Grid Search de que os empates s√£o importantes.

Imports e Configura√ß√£o

In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
import joblib # Para salvar o modelo
import re
import os
import codecs
import requests
from bs4 import BeautifulSoup
import json
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid")

## 1. Data Acquisition (Recolha de Dados)
Vamos buscar dados reais do `football-data.co.uk`. Vamos carregar v√°rias temporadas consecutivas para que o modelo tenha hist√≥rico suficiente para aprender padr√µes.

* **FTHG**: Full Time Home Goals
* **FTAG**: Full Time Away Goals
* **FTR**: Full Time Result (H=Home, D=Draw, A=Away)

In [None]:
# --- CONFIGURA√á√ÉO ---
DATA_FILE = 'premier_league_full.csv'
XG_FILE = 'premier_league_xg_data.csv'
START_YEAR = 2000
END_YEAR = 2025

# --- FUN√á√ÉO 1: Scraper Robusto (Understat) ---
def scrape_understat_season(year):
    print(f"üï∑Ô∏è A recolher xG de {year}/{year+1}...")
    url = f"https://understat.com/league/EPL/{year}"
    try:
        response = requests.get(url)
        if response.status_code != 200:
            return pd.DataFrame()
        
        match = re.search(r"datesData\s*=\s*JSON\.parse\('(.*?)'\)", response.text)
        if not match:
            print(f"‚ö†Ô∏è Sem dados para {year}")
            return pd.DataFrame()
            
        json_data = codecs.decode(match.group(1), 'unicode_escape')
        data = json.loads(json_data)
        
        matches = []
        for m in data:
            if m['isResult']:
                matches.append({
                    'Date': m['datetime'][:10],
                    'HomeTeam': m['h']['title'],
                    'AwayTeam': m['a']['title'],
                    'Home_xG': float(m['xG']['h']),
                    'Away_xG': float(m['xG']['a'])
                })
        return pd.DataFrame(matches)
    except Exception as e:
        print(f"‚ö†Ô∏è Erro no ano {year}: {e}")
        return pd.DataFrame()

# --- FUN√á√ÉO 2: Carregar Dados Principais (Football-Data) ---
def get_main_data(start, end):
    if os.path.exists(DATA_FILE):
        print(f"üìÇ Carregando dados locais: {DATA_FILE}")
        df = pd.read_csv(DATA_FILE)
        # Importante: N√£o converter data aqui ainda para controlar formato no main
        return df
    
    print("üåê A descarregar dados do Football-Data...")
    dfs = []
    base_url = "https://www.football-data.co.uk/mmz4281/{}/{}.csv"
    for year in range(start, end + 1):
        season = f"{str(year)[-2:]}{str(year+1)[-2:]}"
        try:
            df = pd.read_csv(base_url.format(season, "E0"))
            # For√ßar convers√£o imediata para evitar problemas de mistura
            df['Date'] = pd.to_datetime(df['Date'], dayfirst=True, errors='coerce')
            dfs.append(df)
        except: pass
        
    full_df = pd.concat(dfs, ignore_index=True).dropna(subset=['Date', 'FTR'])
    full_df.to_csv(DATA_FILE, index=False)
    return full_df.sort_values('Date').reset_index(drop=True)

# --- FUN√á√ÉO 3: Limpeza de Nomes ---
def clean_team_name(name):
    name_map = {
        'Manchester United': 'Man United', 'Manchester City': 'Man City',
        'Newcastle United': 'Newcastle', 'West Ham United': 'West Ham', 'West Ham': 'West Ham',
        'Wolverhampton Wanderers': 'Wolves', 'Brighton': 'Brighton',
        'Leicester City': 'Leicester', 'Leeds United': 'Leeds',
        'Tottenham Hotspur': 'Tottenham', 'Tottenham': 'Tottenham', 
        'Nottingham Forest': "Nott'm Forest", 'Sheffield United': 'Sheffield United', 
        'Luton': 'Luton', 'Brentford': 'Brentford', 'Bournemouth': 'Bournemouth',
        'Ipswich Town': 'Ipswich', 'Hull City': 'Hull', 'Stoke City': 'Stoke',
        'Swansea City': 'Swansea', 'Cardiff City': 'Cardiff',
        'Huddersfield Town': 'Huddersfield', 'West Bromwich Albion': 'West Brom',
        'Norwich City': 'Norwich', 'Queens Park Rangers': 'QPR'
    }
    return name_map.get(name, name)

# ==========================================
# üöÄ EXECU√á√ÉO E LIMPEZA (A PARTE CR√çTICA)
# ==========================================

# 1. Carregar Dados Principais
df = get_main_data(START_YEAR, END_YEAR)

# Limpeza de Datas e Duplicados no Dataset Principal
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True, errors='coerce')
df = df.dropna(subset=['Date'])
# Remove duplicados exatos no ficheiro principal
df = df.drop_duplicates(subset=['Date', 'HomeTeam', 'AwayTeam'])

# 2. Carregar ou Sacar xG
if os.path.exists(XG_FILE):
    print("üìÇ Carregando xG local...")
    df_xg = pd.read_csv(XG_FILE)
else:
    print("üåê A iniciar scraping xG...")
    dfs_xg = [scrape_understat_season(y) for y in range(START_YEAR, END_YEAR)]
    df_xg = pd.concat(dfs_xg, ignore_index=True)
    df_xg['HomeTeam'] = df_xg['HomeTeam'].apply(clean_team_name)
    df_xg['AwayTeam'] = df_xg['AwayTeam'].apply(clean_team_name)
    df_xg.to_csv(XG_FILE, index=False)

# 3. PREPARA√á√ÉO PARA MERGE
df_xg['Date'] = pd.to_datetime(df_xg['Date']).dt.normalize()
df['Date'] = df['Date'].dt.normalize()

# --- CORRE√á√ÉO: Remover duplicados no xG ANTES do Merge ---
print(f"üìä Linhas xG antes da limpeza: {len(df_xg)}")
df_xg = df_xg.drop_duplicates(subset=['Date', 'HomeTeam', 'AwayTeam'], keep='first')
print(f"üìâ Linhas xG limpas: {len(df_xg)}")

# Aplicar limpeza de nomes
df['HomeTeam'] = df['HomeTeam'].apply(clean_team_name)
df['AwayTeam'] = df['AwayTeam'].apply(clean_team_name)
df_xg['HomeTeam'] = df_xg['HomeTeam'].apply(clean_team_name)
df_xg['AwayTeam'] = df_xg['AwayTeam'].apply(clean_team_name)

# Remover colunas antigas de xG no DF principal
cols_exclude = [c for c in df.columns if 'xG' in c]
df_clean = df.drop(columns=cols_exclude)

# 4. MERGE FINAL
print("üîÑ A realizar o Merge...")
df_final = df_clean.merge(
    df_xg[['Date', 'HomeTeam', 'AwayTeam', 'Home_xG', 'Away_xG']],
    on=['Date', 'HomeTeam', 'AwayTeam'],
    how='left'
)

# 5. REMOVER FUTURO (Seguran√ßa contra erro de datas)
hoje = pd.Timestamp.now().normalize()
antes = len(df_final)
df_final = df_final[df_final['Date'] <= hoje]
print(f"üìÖ Jogos removidos (futuro/datas erradas): {antes - len(df_final)}")

# Ordenar Cronologicamente
df = df_final.sort_values(['Date', 'HomeTeam', 'AwayTeam']).reset_index(drop=True)

# Estat√≠stica
missing_count = df['Home_xG'].isna().sum()
print(f"‚úÖ Merge conclu√≠do! Jogos com xG: {len(df) - missing_count} / {len(df)}")
print(f"üìâ Jogos sem xG (Preenchidos com 1.0): {missing_count}")

# Preencher vazios
df = df.fillna({'Home_xG': 1.0, 'Away_xG': 1.0})

print("üîç A verificar duplicados no final:")
display(df.tail(3))

## 2. Feature Engineering Completa (ELO + Stats + Odds)

Aqui adicionamos as colunas B365H, B365D, B365A (Odds da Bet365).

In [None]:
# [CELL: Feature Engineering Update]
def feature_engineering(df):
    print("‚öôÔ∏è Generating Features with Double Chance Logic...")
    df = df.copy()
    
    # --- 1. LEAGUE STANDINGS ---
    df['Season'] = df['Date'].apply(lambda x: x.year if x.month > 7 else x.year - 1)
    df = df.sort_values('Date')
    
    standings = {} 
    df['Home_Pts'] = 0; df['Away_Pts'] = 0
    df['Home_Pos'] = 10; df['Away_Pos'] = 10
    
    for i, row in df.iterrows():
        season = row['Season']
        h, a, res = row['HomeTeam'], row['AwayTeam'], row['FTR']
        if season not in standings: standings[season] = {}
        if h not in standings[season]: standings[season][h] = 0
        if a not in standings[season]: standings[season][a] = 0
        
        df.at[i, 'Home_Pts'] = standings[season][h]
        df.at[i, 'Away_Pts'] = standings[season][a]
        
        teams_sorted = sorted(standings[season].items(), key=lambda x: x[1], reverse=True)
        ranks = {t: r+1 for r, (t, p) in enumerate(teams_sorted)}
        df.at[i, 'Home_Pos'] = ranks.get(h, 15)
        df.at[i, 'Away_Pos'] = ranks.get(a, 15)
        
        pts_h = 3 if res == 'H' else 1 if res == 'D' else 0
        pts_a = 3 if res == 'A' else 1 if res == 'D' else 0
        standings[season][h] += pts_h
        standings[season][a] += pts_a

    df['PtsDiff'] = df['Home_Pts'] - df['Away_Pts']
    df['PosDiff'] = df['Home_Pos'] - df['Away_Pos']

    # --- 2. REFEREE BIAS ---
    if 'Referee' in df.columns:
        le_ref = LabelEncoder()
        df['Referee'] = df['Referee'].fillna('Unknown')
        df['Ref_ID'] = le_ref.fit_transform(df['Referee'])
        ref_stats = df.groupby('Referee')[['HY', 'AY', 'HR', 'AR']].mean()
        ref_stats['Ref_Avg_Cards'] = ref_stats['HY'] + ref_stats['AY'] + 2*(ref_stats['HR'] + ref_stats['AR'])
        df = df.merge(ref_stats['Ref_Avg_Cards'], on='Referee', how='left')
    else:
        df['Ref_ID'] = 0
        df['Ref_Avg_Cards'] = 3.5

    # --- 3. ELO RATING ---
    df['HomeElo'] = 1500.0; df['AwayElo'] = 1500.0
    elo_dict = {}
    k_factor = 20
    
    for i, row in df.iterrows():
        h, a, res = row['HomeTeam'], row['AwayTeam'], row['FTR']
        h_elo = elo_dict.get(h, 1500.0)
        a_elo = elo_dict.get(a, 1500.0)
        df.at[i, 'HomeElo'] = h_elo
        df.at[i, 'AwayElo'] = a_elo
        actual = 1 if res == 'H' else 0.5 if res == 'D' else 0
        exp = 1 / (1 + 10**((a_elo - h_elo)/400))
        update = k_factor * (actual - exp)
        elo_dict[h] = h_elo + update
        elo_dict[a] = a_elo - update
    df['EloDiff'] = df['HomeElo'] - df['AwayElo']

    # --- 4. ROLLING STATS ---
    cols_to_avg = ['FTHG', 'FTAG', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC', 'HF', 'AF', 'HY', 'AY', 'HR', 'AR', 
                   'Home_xG', 'Away_xG', 'Home_PPDA', 'Away_PPDA', 'Home_Deep', 'Away_Deep']
    
    cols_to_avg = [c for c in cols_to_avg if c in df.columns]
    
    rename_h = {'FTHG': 'Goals', 'FTAG': 'Conceded', 'HS': 'Shots', 'AS': 'ShotsConceded', 
                'HST': 'ShotsTarget', 'AST': 'STConceded', 'HC': 'Corners', 'AC': 'CornersConceded',
                'HF': 'Fouls', 'AF': 'FoulsSuffered', 'HY': 'Yellows', 'AY': 'YellowsOpp', 
                'HR': 'Reds', 'AR': 'RedsOpp', 'Home_xG': 'xG_For', 'Away_xG': 'xG_Against',
                'Home_PPDA': 'PPDA_For', 'Away_PPDA': 'PPDA_Allowed',
                'Home_Deep': 'Deep_For', 'Away_Deep': 'Deep_Allowed'}
                
    rename_a = {'FTAG': 'Goals', 'FTHG': 'Conceded', 'AS': 'Shots', 'HS': 'ShotsConceded', 
                'AST': 'ShotsTarget', 'HST': 'STConceded', 'AC': 'Corners', 'HC': 'CornersConceded',
                'AF': 'Fouls', 'HF': 'FoulsSuffered', 'AY': 'Yellows', 'HY': 'YellowsOpp',
                'AR': 'Reds', 'HR': 'RedsOpp', 'Away_xG': 'xG_For', 'Home_xG': 'xG_Against',
                'Away_PPDA': 'PPDA_For', 'Home_PPDA': 'PPDA_Allowed',
                'Away_Deep': 'Deep_For', 'Home_Deep': 'Deep_Allowed'}
    
    home_stats = df[['Date', 'HomeTeam'] + cols_to_avg].rename(columns={'HomeTeam': 'Team'}).rename(columns=rename_h)
    away_stats = df[['Date', 'AwayTeam'] + cols_to_avg].rename(columns={'AwayTeam': 'Team'}).rename(columns=rename_a)
    all_stats = pd.concat([home_stats, away_stats]).sort_values(['Team', 'Date'])
    
    metrics = list(set(rename_h.values()))
    metrics = [m for m in metrics if m in all_stats.columns]

    for col in metrics:
        all_stats[f'Avg_{col}_L5'] = all_stats.groupby('Team')[col].transform(lambda x: x.shift(1).rolling(5, min_periods=3).mean()).fillna(0)
    
    feature_cols = ['Date', 'Team'] + [c for c in all_stats.columns if 'Avg_' in c]
    df = df.merge(all_stats[feature_cols], left_on=['Date', 'HomeTeam'], right_on=['Date', 'Team'], how='left')
    df = df.rename(columns={c: f'Home_{c}' for c in feature_cols if c not in ['Date', 'Team']}).drop(columns=['Team'])
    df = df.merge(all_stats[feature_cols], left_on=['Date', 'AwayTeam'], right_on=['Date', 'Team'], how='left')
    df = df.rename(columns={c: f'Away_{c}' for c in feature_cols if c not in ['Date', 'Team']}).drop(columns=['Team'])

    # --- 5. CLEANUP & FEATURES FINAIS ---
    df['Rest_Home'] = df.groupby('HomeTeam')['Date'].diff().dt.days.fillna(7).clip(upper=15)
    df['Rest_Away'] = df.groupby('AwayTeam')['Date'].diff().dt.days.fillna(7).clip(upper=15)
    
    # === NEW: IMPLIED ODDS INCLUDING DOUBLE CHANCE ===
    if 'B365H' in df.columns:
        # Standard Implied Probabilities (Inverse of Odds)
        df['Imp_Home'] = 1 / df['B365H']
        df['Imp_Draw'] = 1 / df['B365D']
        df['Imp_Away'] = 1 / df['B365A']
        
        # Synthetic Double Chance Implied Probabilities (Estimation for training)
        # This gives the model a sense of "Safety" for a result
        # 1X (Home or Draw) prob is roughly Sum of Prob(H) + Prob(D)
        df['Imp_1X'] = df['Imp_Home'] + df['Imp_Draw']
        df['Imp_X2'] = df['Imp_Draw'] + df['Imp_Away']
        df['Imp_12'] = df['Imp_Home'] + df['Imp_Away'] # Home or Away (No Draw)

    # --- 6. DIFFS T√ÅTICOS ---
    if 'Home_Avg_Deep_For_L5' in df.columns:
        df['Deep_Advantage'] = df['Home_Avg_Deep_For_L5'] - df['Away_Avg_Deep_For_L5']
        df['PPDA_Diff'] = df['Home_Avg_PPDA_For_L5'] - df['Away_Avg_PPDA_For_L5']

    features_needed = [
        'HomeElo', 'AwayElo', 'EloDiff', 
        'Rest_Home', 'Rest_Away', 
        'Imp_Home', 'Imp_Draw', 'Imp_Away',
        'Imp_1X', 'Imp_X2', 'Imp_12', # <--- ADDED NEW FEATURES HERE
        'Home_Pts', 'Away_Pts', 'Home_Pos', 'Away_Pos', 'PtsDiff', 'PosDiff',
        'Ref_ID', 'Ref_Avg_Cards'
    ]
    
    if 'Deep_Advantage' in df.columns:
        features_needed += ['Deep_Advantage', 'PPDA_Diff']

    features_needed += [c for c in df.columns if 'Home_Avg_' in c or 'Away_Avg_' in c]
    
    existing_features = [f for f in features_needed if f in df.columns]
    
    df_clean = df.dropna(subset=['FTR', 'Imp_Home']).copy()
    df_clean[existing_features] = df_clean[existing_features].fillna(0)
    
    return df_clean, existing_features, elo_dict

# Re-run feature engineering
df_ready, features, current_elos = feature_engineering(df)
print(f"‚úÖ Features updated. Total features: {len(features)}")

## 3. Prepara√ß√£o e Treino do Modelo
Treino Intensivo: Grid Search (Hyperparameter Tuning) Aqui √© onde "apertamos" o modelo. Vamos testar v√°rias combina√ß√µes. Nota: Isto pode demorar 2 ou 3 minutos a correr.

In [None]:
# [CELL: Training & Evaluation]

# 1. Prepare Data
target = 'Target'
le = LabelEncoder()
df_ready['Target'] = le.fit_transform(df_ready['FTR']) # 0=Away, 1=Draw, 2=Home

# Split 80/20 (Chronological split to preserve time order)
split_index = int(len(df_ready) * 0.80)
train = df_ready.iloc[:split_index]
test = df_ready.iloc[split_index:]

X_train, y_train = train[features], train['Target']
X_test, y_test = test[features], test['Target']

print(f"üèãÔ∏è Training on {len(X_train)} games...")

# --- MODEL 1: MULTI-CLASS (O C√©rebro Geral) ---
# Prever as probabilidades exatas de Home/Draw/Away
print("   ... Fitting Multi-Class Model (XGBoost)")
model_multi = xgb.XGBClassifier(
    n_estimators=300,        
    learning_rate=0.03,       
    max_depth=4,              
    subsample=0.8,
    colsample_bytree=0.8,
    objective='multi:softprob',
    random_state=42
)
# Damos um ligeiro peso extra aos empates para o modelo n√£o os ignorar
sample_weights = np.ones(len(y_train))
draw_code = le.transform(['D'])[0]
sample_weights[y_train == draw_code] = 1.20 
model_multi.fit(X_train, y_train, sample_weight=sample_weights)

# --- MODEL 2: BINARY SNIPER (Home Win vs Rest) ---
# Especialista em dizer se a Casa ganha mesmo
print("   ... Fitting Binary Sniper (Home Win Only)")
y_train_win = (y_train == 2).astype(int) 
y_test_win = (y_test == 2).astype(int)

model_sniper = xgb.XGBClassifier(
    n_estimators=200,
    learning_rate=0.02, 
    max_depth=4,
    eval_metric='logloss',
    random_state=42
)
model_sniper.fit(X_train, y_train_win)

# --- MODEL 3: BINARY SHIELD (Double Chance 1X vs Away) ---
# Especialista em dizer "N√£o Perde" (Casa ou Empate)
# Se Target != 0 (Away), ent√£o √© 1 (Draw) ou 2 (Home), logo √© 1X.
print("   ... Fitting Binary Shield (1X - Double Chance)")
y_train_1x = (y_train != 0).astype(int)
y_test_1x = (y_test != 0).astype(int)

model_shield = xgb.XGBClassifier(
    n_estimators=200,
    learning_rate=0.02, 
    max_depth=4,
    eval_metric='logloss',
    random_state=42
)
model_shield.fit(X_train, y_train_1x)

print("üèÜ All Models Trained Successfully.")

## Gr√°ficos

In [None]:
# [CELL: Visualization]
print("\nüìä EVALUATION REPORT")
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# 1. Multi-Class Evaluation
preds_multi = model_multi.predict(X_test)
acc_multi = accuracy_score(y_test, preds_multi)
cm_multi = confusion_matrix(y_test, preds_multi)
labels_multi = ['Away', 'Draw', 'Home']
sns.heatmap(cm_multi, annot=True, fmt='d', cmap='Blues', 
            xticklabels=labels_multi, yticklabels=labels_multi, ax=axes[0])
axes[0].set_title(f'General Model (Acc: {acc_multi:.1%})')
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('Actual')

# 2. Sniper Evaluation (Win?)
preds_sniper = model_sniper.predict(X_test)
acc_sniper = accuracy_score(y_test_win, preds_sniper)
cm_sniper = confusion_matrix(y_test_win, preds_sniper)
sns.heatmap(cm_sniper, annot=True, fmt='d', cmap='Greens', 
            xticklabels=['Not Win', 'Home Win'], yticklabels=['Not Win', 'Home Win'], ax=axes[1])
axes[1].set_title(f'Sniper Model (Win Only) (Acc: {acc_sniper:.1%})')

# 3. Shield Evaluation (1X?)
preds_shield = model_shield.predict(X_test)
acc_shield = accuracy_score(y_test_1x, preds_shield)
cm_shield = confusion_matrix(y_test_1x, preds_shield)
sns.heatmap(cm_shield, annot=True, fmt='d', cmap='Oranges', 
            xticklabels=['Away Win', '1X (Home/Draw)'], yticklabels=['Away Win', '1X (Home/Draw)'], ax=axes[2])
axes[2].set_title(f'Shield Model (1X) (Acc: {acc_shield:.1%})')

plt.tight_layout()
plt.show()

In [None]:
# [CELL: Feature Importance]
feature_imp = pd.DataFrame({
    'Feature': features,
    'Importance': model_multi.feature_importances_
}).sort_values('Importance', ascending=False).reset_index(drop=True)

print("üìã TOP FEATURES (General Model):")
feature_imp['Importance %'] = (feature_imp['Importance'] * 100).round(2)
display(feature_imp[['Feature', 'Importance %']])

## 4. Aplica√ß√£o na "Vida Real"
Aqui est√° a fun√ß√£o final. Ela usa o dicion√°rio `current_elo` (que cont√©m os valores mais recentes ap√≥s o √∫ltimo jogo do dataset) para fazer previs√µes sobre jogos futuros.

In [None]:
# [CELL: Prediction Function v3.0]
def predict_match_advanced(date_str, home_team, away_team, 
                           odd_h, odd_d, odd_a, 
                           odd_1x=None, odd_12=None, odd_x2=None):
    
    match_date = pd.to_datetime(date_str)
    print(f"\nüîÆ ADVANCED PREDICTION: {home_team} vs {away_team} ({date_str})")
    print("=" * 65)
    
    # --- 1. PREPARE INPUT DATA ---
    past_data = df_ready[df_ready['Date'] < match_date].copy()
    if past_data.empty:
        print("‚ö†Ô∏è Error: No historical data available.")
        return

    # Helper: Days since last game
    def get_days_since_last(team):
        team_games = past_data[(past_data['HomeTeam'] == team) | (past_data['AwayTeam'] == team)]
        if team_games.empty: return 7
        return min(max((match_date - team_games.iloc[-1]['Date']).days, 3), 15)

    input_data = {}
    
    # A) ELO & Rest
    h_elo = current_elos.get(home_team, 1500)
    a_elo = current_elos.get(away_team, 1500)
    input_data['HomeElo'] = h_elo
    input_data['AwayElo'] = a_elo
    input_data['EloDiff'] = h_elo - a_elo
    input_data['Rest_Home'] = get_days_since_last(home_team)
    input_data['Rest_Away'] = get_days_since_last(away_team)
    
    # B) Odds Input (Features)
    input_data['Imp_Home'] = 1/odd_h
    input_data['Imp_Draw'] = 1/odd_d
    input_data['Imp_Away'] = 1/odd_a
    
    # Mathematical Fallbacks for DC if not provided
    input_data['Imp_1X'] = 1/odd_1x if odd_1x else (1/odd_h + 1/odd_d)
    input_data['Imp_X2'] = 1/odd_x2 if odd_x2 else (1/odd_d + 1/odd_a)
    input_data['Imp_12'] = 1/odd_12 if odd_12 else (1/odd_h + 1/odd_a)
    
    # C) Historical Stats (L5)
    def fill_stats(team, prefix_h, prefix_a):
        games = past_data[(past_data['HomeTeam'] == team) | (past_data['AwayTeam'] == team)]
        if games.empty: return
        last = games.iloc[-1]
        for f in features:
            if prefix_h in f: 
                clean = f.replace(prefix_h, "")
                val = last[f"Home_{clean}"] if last['HomeTeam'] == team else last.get(f"Away_{clean}", 0)
                input_data[f] = val
            elif prefix_a in f:
                clean = f.replace(prefix_a, "")
                val = last[f"Home_{clean}"] if last['HomeTeam'] == team else last.get(f"Away_{clean}", 0)
                input_data[f] = val

    fill_stats(home_team, "Home_", "XX_IGNORE_XX")
    fill_stats(away_team, "XX_IGNORE_XX", "Away_")

    # D) Manual Diffs
    if 'Deep_Advantage' in features:
        input_data['Deep_Advantage'] = input_data.get('Home_Avg_Deep_For_L5', 0) - input_data.get('Away_Avg_Deep_For_L5', 0)
    if 'PPDA_Diff' in features:
        input_data['PPDA_Diff'] = input_data.get('Home_Avg_PPDA_For_L5', 0) - input_data.get('Away_Avg_PPDA_For_L5', 0)

    # Fill NaNs
    for f in features: 
        if f not in input_data: input_data[f] = df_ready[f].mean()

    # --- 2. EXECUTE MODELS ---
    X_new = pd.DataFrame([input_data])[features]
    
    # Model 1: General (Odds Justas)
    probs = model_multi.predict_proba(X_new)[0] # [Away, Draw, Home]
    prob_a, prob_d, prob_h = probs[0], probs[1], probs[2]
    
    # Model 2: Sniper (Home Win Confidence)
    conf_win = model_sniper.predict_proba(X_new)[0][1] # Prob of Class 1 (Win)
    
    # Model 3: Shield (1X Confidence)
    conf_1x = model_shield.predict_proba(X_new)[0][1] # Prob of Class 1 (1X)

    # Derived Probs for DC (Combinando Math + Modelos Espec√≠ficos)
    prob_1x_final = ( (prob_h + prob_d) + conf_1x ) / 2
    prob_x2_final = prob_d + prob_a
    prob_12_final = prob_h + prob_a

    # --- 3. REPORTING ---
    print(f"üìä PROBABILITIES (AI Consensus):")
    print(f"   üè† Home Win: {prob_h:.1%}  (Sniper Confidence: {conf_win:.1%})")
    print(f"   ü§ù Draw:     {prob_d:.1%}")
    print(f"   ‚úàÔ∏è Away Win: {prob_a:.1%}")
    print("-" * 65)

    # Store opportunities to sort best at the end
    opportunities = []

    def check_value(name, odd, prob, type_bet):
        if not odd or odd <= 1: return
        fair = 1/prob if prob > 0 else 99
        is_value = odd > fair
        
        # Calculate EV (Expected Value)
        ev = (prob * odd) - 1
        
        status = "üíé VALUE" if is_value else "Bad Price"
        print(f"   ‚Ä¢ {name:<15} | Odd: {odd:.2f} | AI Fair: {fair:.2f} | {status}")
        
        # Add to list for final decision
        opportunities.append({
            "name": name,
            "odd": odd,
            "prob": prob,
            "ev": ev,
            "is_value": is_value,
            "type": type_bet
        })

    print("üí∞ MARKET ANALYSIS:")
    check_value(f"Win {home_team}", odd_h, prob_h, "HOME")
    check_value("Draw", odd_d, prob_d, "DRAW")
    check_value(f"Win {away_team}", odd_a, prob_a, "AWAY")
    print("   . . .")
    
    if odd_1x: check_value("DC 1X", odd_1x, prob_1x_final, "1X")
    if odd_x2: check_value("DC X2", odd_x2, prob_x2_final, "X2")
    if odd_12: check_value("DC 12 (No Draw)", odd_12, prob_12_final, "12") # <--- NOVO

    print("-" * 65)
    print("üß† FINAL VERDICT:")
    
    # 1. Sort by Expected Value (EV) descending
    opportunities.sort(key=lambda x: x['ev'], reverse=True)
    best_op = opportunities[0]
    
    # 2. Logic for Recommendation
    if best_op['is_value'] and best_op['ev'] > 0.05:
        # Strong Value Found
        print(f"üöÄ BEST BET: {best_op['name']} (Value Bet!)")
        print(f"   Reason: The odd {best_op['odd']} is higher than fair price {1/best_op['prob']:.2f}")
    
    elif best_op['is_value']:
        # Small Value Found
        print(f"‚úÖ GOOD OPTION: {best_op['name']} (Small Value)")
        print(f"   Reason: Marginal value found. Good for accas.")
        
    else:
        # No Value Found -> Recommend based on pure Probability
        # Sort by Probability instead of EV
        opportunities.sort(key=lambda x: x['prob'], reverse=True)
        safest_op = opportunities[0]
        
        print(f"ü§∑ NO VALUE FOUND (Odds are tight).")
        print(f"üëâ SAFEST PICK: {safest_op['name']} (Prob: {safest_op['prob']:.1%})")
        print(f"   ‚ö†Ô∏è Warning: Market price ({safest_op['odd']}) is slightly below fair odds.")

# Example Usage:
predict_match_advanced('2025-12-08', 'Wolves', 'Man United', 
                       odd_h=4.45, odd_d=3.93, odd_a=1.67, 
                       odd_1x=2.02, odd_12=1.22, odd_x2=1.18)