# Premier League V4.5: Re-Optimizing for Draws

A accuracy baixou porque mud√°mos as regras do jogo (pesos) mas mantivemos a estrat√©gia antiga.
Nesta etapa, vamos correr o **Grid Search** novamente, mas desta vez informando o Grid Search de que os empates s√£o importantes.

Imports e Configura√ß√£o

In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
import re
import os
import codecs
import requests
from bs4 import BeautifulSoup
import json
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid")

# --- CONFIGURA√á√ÉO ---
DATA_FILE = 'uk_ger_football_full.csv' # Nome atualizado
XG_FILE = 'uk_ger_football_xg.csv'
START_YEAR = 2014 # Bundesliga xG no Understat √© s√≥lido a partir daqui
END_YEAR = 2025

## 1. Data Acquisition (Recolha de Dados)
Vamos buscar dados reais do `football-data.co.uk`. Vamos carregar v√°rias temporadas consecutivas para que o modelo tenha hist√≥rico suficiente para aprender padr√µes.

* **FTHG**: Full Time Home Goals
* **FTAG**: Full Time Away Goals
* **FTR**: Full Time Result (H=Home, D=Draw, A=Away)

In [None]:
# --- FUN√á√ÉO 1: Scraper Understat ---
def scrape_understat_season(year, league_name):
    print(f"üï∑Ô∏è A recolher xG ({league_name}) de {year}/{year+1}...")
    url = f"https://understat.com/league/{league_name}/{year}"
    try:
        response = requests.get(url)
        if response.status_code != 200: return pd.DataFrame()
        
        match = re.search(r"datesData\s*=\s*JSON\.parse\('(.*?)'\)", response.text)
        if not match: 
            print(f"   ‚ö†Ô∏è Padr√£o n√£o encontrado para {year}")
            return pd.DataFrame()
            
        json_data = codecs.decode(match.group(1), 'unicode_escape')
        data = json.loads(json_data)
        
        matches = []
        for m in data:
            if m['isResult']:
                matches.append({
                    'Date': m['datetime'][:10],
                    'HomeTeam': m['h']['title'],
                    'AwayTeam': m['a']['title'],
                    'Home_xG': float(m['xG']['h']),
                    'Away_xG': float(m['xG']['a']),
                    'League': league_name
                })
        return pd.DataFrame(matches)
    except Exception as e:
        print(f"‚ö†Ô∏è Erro no ano {year} ({league_name}): {e}")
        return pd.DataFrame()

# --- FUN√á√ÉO 2: Carregar Dados Football-Data (E0 + D1) ---
def get_main_data(start, end):
    if os.path.exists(DATA_FILE):
        print(f"üìÇ Carregando dados locais: {DATA_FILE}")
        df = pd.read_csv(DATA_FILE)
        return df
    
    print("üåê A descarregar dados (Premier League + Bundesliga)...")
    dfs = []
    base_url = "https://www.football-data.co.uk/mmz4281/{}/{}.csv"
    # E0 = Premier League, D1 = Bundesliga 1
    divisions = ['E0', 'D1'] 
    
    for year in range(start, end + 1):
        season = f"{str(year)[-2:]}{str(year+1)[-2:]}"
        for div in divisions:
            try:
                url = base_url.format(season, div)
                df = pd.read_csv(url)
                df['Div'] = div
                df['Date'] = pd.to_datetime(df['Date'], dayfirst=True, errors='coerce')
                dfs.append(df)
            except: pass
        
    full_df = pd.concat(dfs, ignore_index=True).dropna(subset=['Date', 'FTR'])
    full_df.to_csv(DATA_FILE, index=False)
    return full_df.sort_values('Date').reset_index(drop=True)

# --- FUN√á√ÉO 3: Limpeza de Nomes (Inglaterra + Alemanha) ---
def clean_team_name(name):
    name_map = {
        # --- INGLATERRA ---
        'Manchester United': 'Man United', 'Manchester City': 'Man City',
        'Newcastle United': 'Newcastle', 'West Ham United': 'West Ham', 
        'Wolverhampton Wanderers': 'Wolves', 'Brighton': 'Brighton',
        'Leicester City': 'Leicester', 'Leeds United': 'Leeds',
        'Tottenham Hotspur': 'Tottenham', 'Nottingham Forest': "Nott'm Forest", 
        'Sheffield United': 'Sheffield United', 'Luton': 'Luton', 
        'Brentford': 'Brentford', 'Bournemouth': 'Bournemouth',
        'Ipswich Town': 'Ipswich', 'Hull City': 'Hull', 'Stoke City': 'Stoke',
        'West Bromwich Albion': 'West Brom', 'Norwich City': 'Norwich',
        'Queens Park Rangers': 'QPR', 'Blackburn Rovers': 'Blackburn',
        
        # --- ALEMANHA (Mapeamento Vital!) ---
        'Bayern Munich': 'Bayern Munich', 'Bayern M√ºnchen': 'Bayern Munich',
        'Borussia Dortmund': 'Borussia Dortmund', 'Dortmund': 'Borussia Dortmund',
        'Bayer Leverkusen': 'Bayer Leverkusen', 'Leverkusen': 'Bayer Leverkusen',
        'RB Leipzig': 'RB Leipzig', 'Leipzig': 'RB Leipzig',
        'Borussia Monchengladbach': 'Borussia M.Gladbach', "M'gladbach": 'Borussia M.Gladbach', 'M√∂nchengladbach': 'Borussia M.Gladbach',
        'Eintracht Frankfurt': 'Eintracht Frankfurt', 'Frankfurt': 'Eintracht Frankfurt',
        'Wolfsburg': 'Wolfsburg', 'VfL Wolfsburg': 'Wolfsburg',
        'Hoffenheim': 'Hoffenheim', 'TSG Hoffenheim': 'Hoffenheim',
        'Werder Bremen': 'Werder Bremen', 'Bremen': 'Werder Bremen',
        'Mainz 05': 'Mainz 05', 'Mainz': 'Mainz 05',
        'Augsburg': 'Augsburg', 'FC Augsburg': 'Augsburg',
        'Stuttgart': 'VfB Stuttgart', 'VfB Stuttgart': 'VfB Stuttgart',
        'Freiburg': 'Freiburg', 'SC Freiburg': 'Freiburg',
        'Union Berlin': 'Union Berlin', 'FC Union Berlin': 'Union Berlin',
        'Bochum': 'VfL Bochum', 'VfL Bochum': 'VfL Bochum',
        'Heidenheim': 'Heidenheim', 'FC Heidenheim': 'Heidenheim',
        'Darmstadt': 'Darmstadt', 'SV Darmstadt 98': 'Darmstadt',
        'Koln': 'FC Koln', 'FC K√∂ln': 'FC Koln', 'Cologne': 'FC Koln',
        'Hertha': 'Hertha Berlin', 'Hertha BSC': 'Hertha Berlin',
        'Schalke 04': 'Schalke 04', 'Schalke': 'Schalke 04',
        'Hamburger SV': 'Hamburg', 'Hamburg': 'Hamburg',
        'St. Pauli': 'St. Pauli', 'St Pauli': 'St. Pauli',
        'Holstein Kiel': 'Holstein Kiel'
    }
    return name_map.get(name, name)

# ==========================================
# üöÄ EXECU√á√ÉO
# ==========================================

# 1. Carregar Dados Principais
df = get_main_data(START_YEAR, END_YEAR)
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True, errors='coerce')
df = df.dropna(subset=['Date'])
df = df.drop_duplicates(subset=['Date', 'HomeTeam', 'AwayTeam'])

# 2. Carregar ou Sacar xG
if os.path.exists(XG_FILE):
    print("üìÇ Carregando xG local...")
    df_xg = pd.read_csv(XG_FILE)
else:
    print("üåê A iniciar scraping xG (PL + Bundesliga)...")
    dfs_xg = []
    for y in range(START_YEAR, END_YEAR):
        dfs_xg.append(scrape_understat_season(y, 'EPL'))
        dfs_xg.append(scrape_understat_season(y, 'Bundesliga')) # <--- Mudan√ßa aqui
        
    df_xg = pd.concat(dfs_xg, ignore_index=True)
    if not df_xg.empty:
        df_xg['HomeTeam'] = df_xg['HomeTeam'].apply(clean_team_name)
        df_xg['AwayTeam'] = df_xg['AwayTeam'].apply(clean_team_name)
        df_xg.to_csv(XG_FILE, index=False)
    else:
        print("‚ö†Ô∏è AVISO: xG vazio.")
        df_xg = pd.DataFrame(columns=['Date', 'HomeTeam', 'AwayTeam', 'Home_xG', 'Away_xG'])

# 3. MERGE
if not df_xg.empty:
    df_xg['Date'] = pd.to_datetime(df_xg['Date']).dt.normalize()
    df['Date'] = df['Date'].dt.normalize()
    df_xg = df_xg.drop_duplicates(subset=['Date', 'HomeTeam', 'AwayTeam'], keep='first')

    cols_exclude = [c for c in df.columns if 'xG' in c]
    df_clean = df.drop(columns=cols_exclude)

    print("üîÑ A realizar o Merge...")
    df_final = df_clean.merge(
        df_xg[['Date', 'HomeTeam', 'AwayTeam', 'Home_xG', 'Away_xG']],
        on=['Date', 'HomeTeam', 'AwayTeam'],
        how='left'
    )
else:
    df_final = df.copy()

hoje = pd.Timestamp.now().normalize()
df_final = df_final[df_final['Date'] <= hoje]
df = df_final.sort_values(['Date', 'HomeTeam', 'AwayTeam']).reset_index(drop=True)
# Preencher xG em falta (deve ser pouco agora)
df = df.fillna({'Home_xG': 1.0, 'Away_xG': 1.0})

print(f"‚úÖ Total Jogos (PL + Bundesliga): {len(df)}")
display(df.tail(3))

## 2. Feature Engineering Completa (ELO + Stats + Odds)

Aqui adicionamos as colunas B365H, B365D, B365A (Odds da Bet365).

In [None]:
# [CELL: Feature Engineering Multi-Liga]
def feature_engineering(df):
    print("‚öôÔ∏è Gerando Features (PL + Bundesliga)...")
    df = df.copy()
    
    # Encode Division (E0=0 [PL], D1=1 [Bundesliga])
    # Se houver mais, ele atribui numeros sequenciais
    le_div = LabelEncoder()
    df['Div_Code'] = le_div.fit_transform(df['Div'])
    
    # 1. CLASSIFICA√á√ÉO (Separada por Liga)
    df['Season'] = df['Date'].apply(lambda x: x.year if x.month > 7 else x.year - 1)
    df = df.sort_values('Date')
    
    standings = {} 
    df['Home_Pts'] = 0; df['Away_Pts'] = 0
    df['Home_Pos'] = 10; df['Away_Pos'] = 10
    
    for i, row in df.iterrows():
        season = row['Season']
        div = row['Div'] # Chave da liga
        h, a, res = row['HomeTeam'], row['AwayTeam'], row['FTR']
        
        if season not in standings: standings[season] = {}
        if div not in standings[season]: standings[season][div] = {}
        
        if h not in standings[season][div]: standings[season][div][h] = 0
        if a not in standings[season][div]: standings[season][div][a] = 0
        
        df.at[i, 'Home_Pts'] = standings[season][div][h]
        df.at[i, 'Away_Pts'] = standings[season][div][a]
        
        # Posi√ß√£o na Liga
        teams_sorted = sorted(standings[season][div].items(), key=lambda x: x[1], reverse=True)
        ranks = {t: r+1 for r, (t, p) in enumerate(teams_sorted)}
        df.at[i, 'Home_Pos'] = ranks.get(h, 10)
        df.at[i, 'Away_Pos'] = ranks.get(a, 10)
        
        pts_h = 3 if res == 'H' else 1 if res == 'D' else 0
        pts_a = 3 if res == 'A' else 1 if res == 'D' else 0
        standings[season][div][h] += pts_h
        standings[season][div][a] += pts_a

    df['PtsDiff'] = df['Home_Pts'] - df['Away_Pts']
    df['PosDiff'] = df['Home_Pos'] - df['Away_Pos']

    # 2. ELO RATING (GLOBAL)
    # Isto permite comparar Bayern vs Man City
    df['HomeElo'] = 1500.0; df['AwayElo'] = 1500.0
    elo_dict = {}
    k_factor = 20
    
    for i, row in df.iterrows():
        h, a, res = row['HomeTeam'], row['AwayTeam'], row['FTR']
        h_elo = elo_dict.get(h, 1500.0)
        a_elo = elo_dict.get(a, 1500.0)
        
        df.at[i, 'HomeElo'] = h_elo
        df.at[i, 'AwayElo'] = a_elo
        
        actual = 1 if res == 'H' else 0.5 if res == 'D' else 0
        exp = 1 / (1 + 10**((a_elo - h_elo)/400))
        update = k_factor * (actual - exp)
        
        elo_dict[h] = h_elo + update
        elo_dict[a] = a_elo - update
        
    df['EloDiff'] = df['HomeElo'] - df['AwayElo']

    # 3. ROLLING STATS (Normal)
    cols_to_avg = ['FTHG', 'FTAG', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC', 'Home_xG', 'Away_xG']
    cols_to_avg = [c for c in cols_to_avg if c in df.columns]
    
    rename_h = {'FTHG': 'Goals', 'FTAG': 'Conceded', 'HS': 'Shots', 'AS': 'ShotsConceded', 
                'HST': 'ShotsTarget', 'AST': 'STConceded', 'HC': 'Corners', 'AC': 'CornersConceded',
                'Home_xG': 'xG_For', 'Away_xG': 'xG_Against'}
                
    rename_a = {'FTAG': 'Goals', 'FTHG': 'Conceded', 'AS': 'Shots', 'HS': 'ShotsConceded', 
                'AST': 'ShotsTarget', 'HST': 'STConceded', 'AC': 'Corners', 'HC': 'CornersConceded',
                'Away_xG': 'xG_For', 'Home_xG': 'xG_Against'}
    
    home_stats = df[['Date', 'HomeTeam'] + cols_to_avg].rename(columns={'HomeTeam': 'Team'}).rename(columns=rename_h)
    away_stats = df[['Date', 'AwayTeam'] + cols_to_avg].rename(columns={'AwayTeam': 'Team'}).rename(columns=rename_a)
    all_stats = pd.concat([home_stats, away_stats]).sort_values(['Team', 'Date'])
    
    metrics = list(set(rename_h.values()))
    metrics = [m for m in metrics if m in all_stats.columns]

    for col in metrics:
        all_stats[f'Avg_{col}_L5'] = all_stats.groupby('Team')[col].transform(lambda x: x.shift(1).rolling(5, min_periods=3).mean()).fillna(0)
    
    feature_cols = ['Date', 'Team'] + [c for c in all_stats.columns if 'Avg_' in c]
    df = df.merge(all_stats[feature_cols], left_on=['Date', 'HomeTeam'], right_on=['Date', 'Team'], how='left')
    df = df.rename(columns={c: f'Home_{c}' for c in feature_cols if c not in ['Date', 'Team']}).drop(columns=['Team'])
    df = df.merge(all_stats[feature_cols], left_on=['Date', 'AwayTeam'], right_on=['Date', 'Team'], how='left')
    df = df.rename(columns={c: f'Away_{c}' for c in feature_cols if c not in ['Date', 'Team']}).drop(columns=['Team'])

    # 4. EXTRAS
    df['Rest_Home'] = df.groupby('HomeTeam')['Date'].diff().dt.days.fillna(7).clip(upper=15)
    df['Rest_Away'] = df.groupby('AwayTeam')['Date'].diff().dt.days.fillna(7).clip(upper=15)
    
    if 'B365H' in df.columns:
        df['Imp_Home'] = 1 / df['B365H']
        df['Imp_Draw'] = 1 / df['B365D']
        df['Imp_Away'] = 1 / df['B365A']
        df['Imp_1X'] = df['Imp_Home'] + df['Imp_Draw']
        df['Imp_X2'] = df['Imp_Draw'] + df['Imp_Away']
        df['Imp_12'] = df['Imp_Home'] + df['Imp_Away']

    features_needed = [
        'Div_Code',
        'HomeElo', 'AwayElo', 'EloDiff', 
        'Rest_Home', 'Rest_Away', 
        'Imp_Home', 'Imp_Draw', 'Imp_Away',
        'Imp_1X', 'Imp_X2', 'Imp_12',
        'Home_Pts', 'Away_Pts', 'Home_Pos', 'Away_Pos', 'PtsDiff', 'PosDiff'
    ]
    features_needed += [c for c in df.columns if 'Home_Avg_' in c or 'Away_Avg_' in c]
    
    existing_features = [f for f in features_needed if f in df.columns]
    
    # Limpeza Final
    print("üßπ A limpar valores infinitos/nulos para o XGBoost...")
    df_clean = df.dropna(subset=['FTR', 'Imp_Home']).copy()
    df_clean[existing_features] = df_clean[existing_features].fillna(0)
    df_clean.replace([np.inf, -np.inf], 0, inplace=True)
    
    return df_clean, existing_features, elo_dict, le_div

# Executar
df_ready, features, current_elos, le_div = feature_engineering(df)
print(f"‚úÖ Features updated. Total features: {len(features)}")

## 3. Prepara√ß√£o e Treino do Modelo
Treino Intensivo: Grid Search (Hyperparameter Tuning) Aqui √© onde "apertamos" o modelo. Vamos testar v√°rias combina√ß√µes. Nota: Isto pode demorar 2 ou 3 minutos a correr.

In [None]:
# [CELL: Treino Otimizado TOTAL (Grid Search para Normal E Sniper) - CORRIGIDO]

# --- CORRE√á√ÉO DO ERRO (CRUCIAL) ---
# Substituir valores infinitos (divis√£o por zero) por 0 para o XGBoost n√£o falhar
print("üßπ A limpar valores infinitos/nulos...")
df_ready.replace([np.inf, -np.inf], 0, inplace=True)
df_ready.fillna(0, inplace=True)

# 1. Preparar Dados
target = 'Target'
le = LabelEncoder()
df_ready['Target'] = le.fit_transform(df_ready['FTR']) # 0=Away, 1=Draw, 2=Home

# Split 80/20 (Cronol√≥gico)
split_index = int(len(df_ready) * 0.80)
train = df_ready.iloc[:split_index]
test = df_ready.iloc[split_index:]

X_train, y_train = train[features], train['Target']
X_test, y_test = test[features], test['Target']

print(f"üèãÔ∏è A iniciar Otimiza√ß√£o Dupla em {len(X_train)} jogos...")
print("    (Isto vai demorar 2-3 minutos, estamos a otimizar dois c√©rebros...)")

# Configura√ß√£o da Valida√ß√£o Cruzada (Para ambos)
tscv = TimeSeriesSplit(n_splits=3)

# --- 1. OTIMIZAR MODELO NORMAL (1X2) ---
print("\nüîç A otimizar Modelo Normal (1X2)...")
xgb_multi = xgb.XGBClassifier(
    objective='multi:softprob',
    random_state=42,
    eval_metric='mlogloss'
)

param_grid_multi = {
    'n_estimators': [200, 300],
    'max_depth': [3, 4],
    'learning_rate': [0.01, 0.03],
    'subsample': [0.8]
}

grid_multi = GridSearchCV(
    estimator=xgb_multi,
    param_grid=param_grid_multi,
    cv=tscv,
    scoring='neg_log_loss',
    n_jobs=-1,
    verbose=1
)

# Pesos para ajudar nos empates
sample_weights = np.ones(len(y_train))
draw_code = le.transform(['D'])[0]
sample_weights[y_train == draw_code] = 1.15

grid_multi.fit(X_train, y_train, sample_weight=sample_weights)
model_multi = grid_multi.best_estimator_
print(f"‚úÖ Melhores Params (Normal): {grid_multi.best_params_}")

# --- 2. OTIMIZAR MODELO SNIPER (Bin√°rio: Casa vs N√£o Casa) ---
print("\nüîç A otimizar Modelo Sniper (Bin√°rio)...")
y_train_win = (y_train == 2).astype(int)
y_test_win = (y_test == 2).astype(int)

xgb_sniper = xgb.XGBClassifier(
    objective='binary:logistic', # Bin√°rio
    random_state=42,
    eval_metric='logloss'
)

# Grelha ligeiramente diferente para bin√°rio (pode ser mais agressivo)
param_grid_sniper = {
    'n_estimators': [150, 200, 250],
    'max_depth': [3, 4, 5], # Sniper pode tentar ver padr√µes mais fundos
    'learning_rate': [0.01, 0.02, 0.03],
    'subsample': [0.8]
}

grid_sniper = GridSearchCV(
    estimator=xgb_sniper,
    param_grid=param_grid_sniper,
    cv=tscv,
    scoring='neg_log_loss', # Minimizar o erro de probabilidade
    n_jobs=-1,
    verbose=1
)

grid_sniper.fit(X_train, y_train_win)
model_sniper = grid_sniper.best_estimator_
print(f"‚úÖ Melhores Params (Sniper): {grid_sniper.best_params_}")

# --- 3. MODELO SHIELD (Bin√°rio Auxiliar - R√°pido) ---
# Usamos os melhores par√¢metros do Sniper para o Shield para poupar tempo
print("\nüõ°Ô∏è A treinar Modelo Shield (Configura√ß√£o do Sniper)...")
y_train_1x = (y_train != 0).astype(int)
model_shield = xgb.XGBClassifier(**grid_sniper.best_params_, objective='binary:logistic', random_state=42)
model_shield.fit(X_train, y_train_1x)


# --- 4. VISUALIZA√á√ÉO LADO A LADO ---
print("\nüìä RELAT√ìRIO VISUAL FINAL")
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Gr√°fico 1: Modelo Normal
preds_multi = model_multi.predict(X_test)
acc_multi = accuracy_score(y_test, preds_multi)
cm_multi = confusion_matrix(y_test, preds_multi)
labels_multi = ['Away', 'Draw', 'Home']

sns.heatmap(cm_multi, annot=True, fmt='d', cmap='Blues', 
            xticklabels=labels_multi, yticklabels=labels_multi, ax=axes[0])
axes[0].set_title(f'Modelo Normal Otimizado (Acc: {acc_multi:.1%})')
axes[0].set_xlabel('Previsto')
axes[0].set_ylabel('Realidade')

# Gr√°fico 2: Modelo Sniper
preds_sniper = model_sniper.predict(X_test)
acc_sniper = accuracy_score(y_test_win, preds_sniper)
cm_sniper = confusion_matrix(y_test_win, preds_sniper)
labels_sniper = ['N√£o Ganha', 'Vit√≥ria Casa']

sns.heatmap(cm_sniper, annot=True, fmt='d', cmap='Greens', 
            xticklabels=labels_sniper, yticklabels=labels_sniper, ax=axes[1])
axes[1].set_title(f'Modelo Sniper Otimizado (Acc: {acc_sniper:.1%})')
axes[1].set_xlabel('Previsto')
axes[1].set_ylabel('Realidade')

plt.tight_layout()
plt.show()

In [None]:
# [CELL: Feature Importance]
feature_imp = pd.DataFrame({
    'Feature': features,
    'Importance': model_multi.feature_importances_
}).sort_values('Importance', ascending=False).reset_index(drop=True)

print("üìã TOP FEATURES (General Model):")
feature_imp['Importance %'] = (feature_imp['Importance'] * 100).round(2)
display(feature_imp[['Feature', 'Importance %']])

## 4. Aplica√ß√£o na "Vida Real"
Aqui est√° a fun√ß√£o final. Ela usa o dicion√°rio `current_elo` (que cont√©m os valores mais recentes ap√≥s o √∫ltimo jogo do dataset) para fazer previs√µes sobre jogos futuros.

In [None]:
# [CELL: Prediction Function v3.8 - Odds Duplas + Comparador Completo]
def predict_match_advanced(date_str, home_team, away_team, 
                           odd_h, odd_d, odd_a, 
                           division='E0', # 'E0'=Premier League, 'D1'=Bundesliga
                           odd_1x=None, odd_12=None, odd_x2=None):
    
    match_date = pd.to_datetime(date_str)
    
    # Nome bonito para a liga
    div_map = {'E0': 'Premier League', 'D1': 'Bundesliga'}
    div_name = div_map.get(division, division)
    
    print(f"\nüîÆ PREVIS√ÉO AVAN√áADA ({div_name}): {home_team} vs {away_team} ({date_str})")
    print("=" * 85)
    
    # --- 1. PREPARAR INPUT (Ir buscar hist√≥rico) ---
    past_data = df_ready[df_ready['Date'] < match_date].copy()
    if past_data.empty: 
        print("‚ö†Ô∏è Erro: Sem dados hist√≥ricos suficientes.")
        return

    # Helper: Dias desde o √∫ltimo jogo
    def get_days_since_last(team):
        team_games = past_data[(past_data['HomeTeam'] == team) | (past_data['AwayTeam'] == team)]
        if team_games.empty: return 7
        return min(max((match_date - team_games.iloc[-1]['Date']).days, 3), 15)

    input_data = {}
    
    # A) ELO & Contexto
    h_elo, a_elo = current_elos.get(home_team, 1500), current_elos.get(away_team, 1500)
    input_data['HomeElo'], input_data['AwayElo'] = h_elo, a_elo
    input_data['EloDiff'] = h_elo - a_elo
    input_data['Rest_Home'], input_data['Rest_Away'] = get_days_since_last(home_team), get_days_since_last(away_team)
    
    # B) C√ìDIGO DA DIVIS√ÉO
    try:
        input_data['Div_Code'] = le_div.transform([division])[0]
    except:
        input_data['Div_Code'] = 0 if division == 'E0' else 1
    
    # C) ODDS DE ENTRADA (Usadas para gerar as features de probabilidade impl√≠cita)
    # Estas features s√£o o que o modelo usa para "sentir" o mercado
    input_data['Imp_Home'] = 1/odd_h
    input_data['Imp_Draw'] = 1/odd_d
    input_data['Imp_Away'] = 1/odd_a
    
    # Se forneceres as odds DC, usamos essas. Sen√£o, estimamos matematicamente.
    input_data['Imp_1X'] = 1/odd_1x if odd_1x else (1/odd_h + 1/odd_d)
    input_data['Imp_X2'] = 1/odd_x2 if odd_x2 else (1/odd_d + 1/odd_a)
    input_data['Imp_12'] = 1/odd_12 if odd_12 else (1/odd_h + 1/odd_a)
    
    # D) Stats Hist√≥ricas (L5)
    def fill_stats(team, prefix_h, prefix_a):
        games = past_data[(past_data['HomeTeam'] == team) | (past_data['AwayTeam'] == team)]
        if games.empty: return
        last = games.iloc[-1]
        for f in features:
            if prefix_h in f: 
                clean = f.replace(prefix_h, "")
                input_data[f] = last[f"Home_{clean}"] if last['HomeTeam'] == team else last.get(f"Away_{clean}", 0)
            elif prefix_a in f:
                clean = f.replace(prefix_a, "")
                input_data[f] = last[f"Home_{clean}"] if last['HomeTeam'] == team else last.get(f"Away_{clean}", 0)

    fill_stats(home_team, "Home_", "XX_IGNORE_XX")
    fill_stats(away_team, "XX_IGNORE_XX", "Away_")

    if 'Deep_Advantage' in features:
        input_data['Deep_Advantage'] = input_data.get('Home_Avg_Deep_For_L5', 0) - input_data.get('Away_Avg_Deep_For_L5', 0)
    if 'PPDA_Diff' in features:
        input_data['PPDA_Diff'] = input_data.get('Home_Avg_PPDA_For_L5', 0) - input_data.get('Away_Avg_PPDA_For_L5', 0)

    # Preencher vazios com a m√©dia do treino
    for f in features: 
        if f not in input_data: input_data[f] = df_ready[f].mean()

    # --- 2. EXECUTAR MODELOS ---
    X_new = pd.DataFrame([input_data])[features]
    
    # Modelo Principal (1X2)
    probs = model_multi.predict_proba(X_new)[0] 
    prob_a, prob_d, prob_h = probs[0], probs[1], probs[2]
    
    # Modelos Bin√°rios (Sniper e Shield)
    conf_win_home = model_sniper.predict_proba(X_new)[0][1] # Confian√ßa Home Win
    conf_safe_home = model_shield.predict_proba(X_new)[0][1] # Confian√ßa 1X
    
    # --- 3. RELAT√ìRIO ---
    print(f"üìä PROBABILIDADES (Consenso IA):")
    print(f"   üè† Casa ({home_team}): {prob_h:.1%} (Sniper: {conf_win_home:.1%})")
    print(f"   ü§ù Empate:           {prob_d:.1%}")
    print(f"   ‚úàÔ∏è Fora ({away_team}): {prob_a:.1%}")
    print("-" * 85)

    opportunities = []

    # Fun√ß√£o Auxiliar de An√°lise
    def analyze(name, odd, prob, bet_type):
        if not odd or odd <= 1: return
        
        # Percentagens
        implied_prob = 1 / odd  # O que a casa acha
        ai_prob = prob          # O que a IA acha
        
        # C√°lculo da Odd Justa da IA (Fair Odd)
        fair = 1/prob if prob > 0 else 99.0
        
        # EV (Valor Esperado)
        ev = (ai_prob * odd) - 1
        is_value = ev > 0
        
        status = "üíé VALOR" if is_value else ("‚úÖ JUSTO" if ev > -0.05 else "‚ùå FRACO")
        
        # Print formatado: Mostra a Odd da Casa vs Odd Justa da IA
        print(f"   ‚Ä¢ {name:<22} | Odd: {odd:.2f} ({implied_prob:.1%}) | IA: {fair:.2f} ({ai_prob:.1%}) | {status}")
        
        opportunities.append({"name": name, "odd": odd, "prob": prob, "ev": ev, "is_value": is_value, "type": bet_type})

    print("üí∞ SCANNER DE MERCADO (Comparador: Bookie vs IA):")
    analyze(f"Vitoria {home_team}", odd_h, prob_h, "HOME")
    analyze("Empate", odd_d, prob_d, "DRAW")
    analyze(f"Vitoria {away_team}", odd_a, prob_a, "AWAY")
    
    # Mercados Duplos (Calculados com ajuda do modelo Shield)
    # Probabilidade 1X = M√©dia entre (Prob Casa + Prob Empate) e (Confian√ßa do Modelo 'N√£o Perde')
    prob_1x = ((prob_h+prob_d) + conf_safe_home)/2
    
    if odd_1x: analyze(f"DC 1X ({home_team}/Emp)", odd_1x, prob_1x, "1X")
    if odd_12: analyze("DC 12 (Sem Empate)", odd_12, (prob_h+prob_a), "12")
    if odd_x2: analyze(f"DC X2 ({away_team}/Emp)", odd_x2, (prob_a+prob_d), "X2")
    
    print("-" * 85)
    
    # --- 4. VEREDICTO FINAL √öNICO ---
    # Ordenar todas as op√ß√µes por Valor Esperado (EV)
    opportunities.sort(key=lambda x: x['ev'], reverse=True)
    best_value = opportunities[0] if opportunities else None
    
    # Encontrar a op√ß√£o mais prov√°vel (Probabilidade Pura)
    most_likely = sorted(opportunities, key=lambda x: x['prob'], reverse=True)[0]
    
    print("üèÜ ESCOLHA FINAL DO MODELO:")
    
    final_pick = None
    reason = ""
    
    # L√≥gica de Prioridade de Decis√£o:
    
    # 1. Valor Matem√°tico Claro (EV > 2%)
    if best_value and best_value['ev'] > 0.02:
        final_pick = best_value
        reason = f"Encontrado Valor Matem√°tico! A odd paga mais ({1/final_pick['odd']:.1%}) do que a probabilidade real ({final_pick['prob']:.1%})."
        
    # 2. Banker (Probabilidade > 60% e Odd > 1.40)
    elif most_likely['prob'] > 0.60 and most_likely['odd'] > 1.40:
        final_pick = most_likely
        reason = f"Aposta de Confian√ßa (Banker). Sem grande valor, mas com probabilidade muito alta de acerto."
        
    # 3. Seguran√ßa (Dupla Chance > 75%)
    elif most_likely['type'] in ['1X', 'X2'] and most_likely['prob'] > 0.75:
        final_pick = most_likely
        reason = f"Aposta de Seguran√ßa. O jogo √© perigoso, a prote√ß√£o √© a melhor op√ß√£o."
        
    # 4. Fallback
    else:
        if best_value['ev'] < -0.10: # Se o valor for p√©ssimo, vai pela probabilidade
            final_pick = most_likely
            reason = "Mercado sem valor. Esta √© simplesmente a op√ß√£o estatisticamente mais prov√°vel."
        else: # Se o valor for razo√°vel (perto de justo), vai pelo valor
            final_pick = best_value
            reason = "Jogo muito equilibrado. Esta √© a op√ß√£o 'menos m√°' matematicamente."

    print(f"üëâ {final_pick['name']} (Odd: {final_pick['odd']})")
    print(f"   üìù Motivo: {reason}")
    print(f"   üìâ Confian√ßa IA: {final_pick['prob']:.1%}")

# --- EXEMPLOS DE USO ---

# Premier League (Wolves vs Man Utd)
predict_match_advanced('2025-12-08', 'Wolves', 'Man United', 
                       odd_h=5.00, odd_d=4.20, odd_a=1.56, 
                       division='E0', 
                       odd_1x=2.25, odd_12=1.19, odd_x2=1.15)

# Bundesliga (Dortmund vs Bayern)
predict_match_advanced('2025-05-12', 'Borussia Dortmund', 'Bayern Munich', 
                       odd_h=3.50, odd_d=3.80, odd_a=2.00, 
                       division='D1',
                       odd_1x=1.80, odd_12=1.25, odd_x2=1.30)