# ‚öΩ Premier League & Champions League Predictor (v5.0)

Este notebook implementa um sistema de Machine Learning para prever resultados de futebol.
O sistema utiliza:
1. **XGBoost** (Classifica√ß√£o Multi-classe e Bin√°ria)
2. **Expected Goals (xG)** extra√≠dos do Understat.
3. **Elo Ratings** din√¢micos calculados jogo-a-jogo.
4. **Valores de Mercado** hist√≥ricos do Transfermarkt.

**Objetivo:** Maximizar a precis√£o e detetar "Value Bets" contra as casas de apostas.

Imports e Configura√ß√£o

In [None]:
# --- IMPORTS ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
import os
import warnings

# Scikit-learn
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.metrics import accuracy_score, confusion_matrix

# --- AS TUAS FUN√á√ïES PERSONALIZADAS ---
# (Certifica-te que o ficheiro data_utils.py est√° na mesma pasta)
from data_utils import clean_team_name, scrape_understat_season, get_main_data, prepare_market_values, get_understat_data

# --- CONFIGURA√á√ÉO ---
sns.set_style("whitegrid")
warnings.filterwarnings('ignore')

DATA_FILE = 'europe_football_full.csv' 
XG_FILE = 'europe_football_xg.csv'
MARKET_VALUE_FILE = 'market_values.csv'
START_YEAR = 2014 
END_YEAR = 2025

## 1. Aquisi√ß√£o e Processamento de Dados (ETL)
Nesta etapa carregamos os dados das ligas, xG e valores de mercado. Se os ficheiros n√£o existirem, o sistema faz o download/scraping automaticamente.

In [None]:
# 1. Preparar Valores de Mercado (Transfermarkt)
prepare_market_values()

# 2. Carregar Dados das Ligas (Football-Data)
df_main = get_main_data(START_YEAR, END_YEAR)

# 3. Carregar/Scrapar xG (Understat)
# Agora √© s√≥ uma linha! A fun√ß√£o gere o ficheiro e o download sozinha.
df_understat = get_understat_data(START_YEAR, END_YEAR)

# 4. MERGE FINAL (Ligas + xG + Champions)
if not df_understat.empty:
    df_understat['Date'] = pd.to_datetime(df_understat['Date']).dt.normalize()
    df_main['Date'] = df_main['Date'].dt.normalize()
    
    # Separar Ligas vs Champions
    df_leagues = df_understat[df_understat['League'] != 'Champions_League']
    df_cl = df_understat[df_understat['League'] == 'Champions_League'].copy()
    df_cl['Div'] = 'CL'
    
    # Calcular FTR para a Champions (Understat n√£o tem FTR expl√≠cito)
    df_cl['FTR'] = df_cl.apply(lambda r: 'H' if r['FTHG'] > r['FTAG'] else ('A' if r['FTHG'] < r['FTAG'] else 'D'), axis=1)
    
    # Preparar colunas comuns
    cols_common = ['Date', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR', 'Div', 'Home_xG', 'Away_xG']
    df_cl_clean = df_cl[cols_common]
    
    # Merge nas Ligas
    print("üîÑ A realizar Merge (Ligas)...")
    cols_exclude = [c for c in df_main.columns if 'xG' in c]
    df_main = df_main.drop(columns=cols_exclude)
    
    df_final = df_main.merge(
        df_leagues[['Date', 'HomeTeam', 'AwayTeam', 'Home_xG', 'Away_xG']],
        on=['Date', 'HomeTeam', 'AwayTeam'],
        how='left'
    )
    
    # Adicionar Champions (Append)
    print(f"üá™üá∫ A adicionar {len(df_cl_clean)} jogos da Champions League...")
    df_final = pd.concat([df_final, df_cl_clean], ignore_index=True)
else:
    df_final = df_main.copy()

# Ordena√ß√£o Cronol√≥gica e Limpeza Final
hoje = pd.Timestamp.now().normalize()
df_final = df_final[df_final['Date'] <= hoje]
df = df_final.sort_values(['Date']).reset_index(drop=True)
df = df.fillna({'Home_xG': 1.0, 'Away_xG': 1.0})

print(f"‚úÖ Total Jogos Processados: {len(df)}")
display(df.tail(3))

## 2. Feature Engineering
Cria√ß√£o de m√©tricas avan√ßadas:
* **Elo Ratings:** Atualizados a cada partida.
* **Rolling Stats:** M√©dias m√≥veis de xG e Golos (√∫ltimos 5 jogos).
* **Fadiga:** Jogos com menos de 4 dias de descanso.

In [None]:
# [CELL: Feature Engineering v7.0 - Champions League Logic]
def feature_engineering(df):
    print("‚öôÔ∏è Gerando Features (Com L√≥gica Champions League)...")
    df = df.copy()
    
    # 1. PREPARA√á√ÉO
    df['Season'] = df['Date'].apply(lambda x: x.year if x.month > 7 else x.year - 1).astype(int)
    df = df.sort_values('Date')
    
    le_div = LabelEncoder()
    df['Div_Code'] = le_div.fit_transform(df['Div'])
    print(f"   Ligas: {le_div.classes_}") # Verifica se 'CL' aparece aqui
    
    # ---------------------------------------------------------
    # 2. MARKET VALUE
    # ---------------------------------------------------------
    real_values = {}
    if os.path.exists('market_values.csv'):
        try:
            mv_df = pd.read_csv('market_values.csv')
            mv_df.columns = [c.strip().capitalize() for c in mv_df.columns] 
            if 'Year' in mv_df.columns: mv_df.rename(columns={'Year': 'Season'}, inplace=True)
            
            if 'Season' in mv_df.columns and 'Value' in mv_df.columns:
                def normalize_tm_name(name):
                    name = str(name).lower()
                    if 'manchester city' in name: return 'Man City'
                    if 'manchester united' in name: return 'Man United'
                    if 'paris saint-germain' in name: return 'Paris Saint Germain'
                    if 'leverkusen' in name: return 'Bayer Leverkusen'
                    if 'monchengladbach' in name: return 'Borussia M.Gladbach'
                    if 'inter' in name: return 'Inter'
                    if 'milan' in name: return 'AC Milan'
                    if 'sporting cp' in name: return 'Sporting CP'
                    if 'benfica' in name: return 'Benfica'
                    if 'porto' in name: return 'Porto'
                    return name 
                
                for _, row in mv_df.iterrows():
                    try: s = int(row['Season'])
                    except: continue
                    t = normalize_tm_name(row['Team'])
                    v = row['Value']
                    if s not in real_values: real_values[s] = {}
                    real_values[s][t] = v
                    real_values[s][row['Team']] = v 
        except: pass

    def get_market_value(team, season):
        if season in real_values:
            if team in real_values[season]: return real_values[season][team]
            tc = clean_team_name(team) 
            if tc in real_values[season]: return real_values[season][tc]
            for key in real_values[season]:
                if isinstance(key, str) and (team in key or key in team): return real_values[season][key]

        tier_1 = ['Man City', 'Arsenal', 'Liverpool', 'Real Madrid', 'Barcelona', 'Bayern Munich', 'Paris Saint Germain', 'Inter']
        tier_2 = ['Man United', 'Chelsea', 'Tottenham', 'Newcastle', 'Atletico Madrid', 'Borussia Dortmund', 'Bayer Leverkusen', 'RB Leipzig', 'Juventus', 'AC Milan', 'Napoli', 'Benfica', 'Porto', 'Sporting CP']
        if team in tier_1: return 900
        if team in tier_2: return 500
        return 150 

    df['Home_Value'] = df.apply(lambda x: get_market_value(x['HomeTeam'], x['Season']), axis=1)
    df['Away_Value'] = df.apply(lambda x: get_market_value(x['AwayTeam'], x['Season']), axis=1)
    df['Value_Ratio'] = np.log1p(df['Home_Value']) - np.log1p(df['Away_Value'])

    # ---------------------------------------------------------
    # 3. PONTOS E MOTIVA√á√ÉO (Adaptado para CL)
    # ---------------------------------------------------------
    standings = {} 
    df['Home_Pts'] = 0; df['Away_Pts'] = 0
    df['Home_Pos'] = 10; df['Away_Pos'] = 10
    df['Home_Game_Num'] = 0; df['Away_Game_Num'] = 0
    
    # Feature: Tipo de Competi√ß√£o (0=Liga, 1=Ta√ßa/CL)
    df['Is_Cup'] = df['Div'].apply(lambda x: 1 if x == 'CL' else 0)

    for i, row in df.iterrows():
        season = row['Season']
        div = row['Div']
        h, a, res = row['HomeTeam'], row['AwayTeam'], row['FTR']
        
        if season not in standings: standings[season] = {}
        if div not in standings[season]: standings[season][div] = {}
        if h not in standings[season][div]: standings[season][div][h] = {'pts': 0, 'games': 0}
        if a not in standings[season][div]: standings[season][div][a] = {'pts': 0, 'games': 0}
        
        # S√≥ usamos pontos se for Liga. Se for CL, pomos dummy (0)
        if div != 'CL':
            df.at[i, 'Home_Pts'] = standings[season][div][h]['pts']
            df.at[i, 'Away_Pts'] = standings[season][div][a]['pts']
            
            teams_sorted = sorted(standings[season][div].items(), key=lambda x: x[1]['pts'], reverse=True)
            ranks = {t: r+1 for r, (t, data) in enumerate(teams_sorted)}
            df.at[i, 'Home_Pos'] = ranks.get(h, 10)
            df.at[i, 'Away_Pos'] = ranks.get(a, 10)
        else:
            df.at[i, 'Home_Pts'] = 0
            df.at[i, 'Away_Pts'] = 0
            df.at[i, 'Home_Pos'] = 1 # Dummy, na CL todos querem ganhar
            df.at[i, 'Away_Pos'] = 1
            
        df.at[i, 'Home_Game_Num'] = standings[season][div][h]['games'] + 1
        df.at[i, 'Away_Game_Num'] = standings[season][div][a]['games'] + 1
        
        pts_h = 3 if res == 'H' else 1 if res == 'D' else 0
        pts_a = 3 if res == 'A' else 1 if res == 'D' else 0
        standings[season][div][h]['pts'] += pts_h
        standings[season][div][a]['pts'] += pts_a
        standings[season][div][h]['games'] += 1
        standings[season][div][a]['games'] += 1

    def get_motivation(game_num, pos, is_cup):
        if is_cup: return 1.3 # Champions League = Motiva√ß√£o M√°xima
        if game_num < 30: return 1.0 
        if pos <= 6: return 1.2 
        if pos >= 16: return 1.3 
        return 0.5 

    df['Home_Motiv'] = df.apply(lambda x: get_motivation(x['Home_Game_Num'], x['Home_Pos'], x['Is_Cup']), axis=1)
    df['Away_Motiv'] = df.apply(lambda x: get_motivation(x['Away_Game_Num'], x['Away_Pos'], x['Is_Cup']), axis=1)

    # ---------------------------------------------------------
    # 4. FADIGA E ELO
    # ---------------------------------------------------------
    df['Rest_Home'] = df.groupby('HomeTeam')['Date'].diff().dt.days.fillna(7).clip(upper=15)
    df['Rest_Away'] = df.groupby('AwayTeam')['Date'].diff().dt.days.fillna(7).clip(upper=15)
    
    def check_euro_fatigue(rest, value):
        if value > 400 and rest < 4: return 1
        return 0

    df['Home_Fatigue'] = df.apply(lambda x: check_euro_fatigue(x['Rest_Home'], x['Home_Value']), axis=1)
    df['Away_Fatigue'] = df.apply(lambda x: check_euro_fatigue(x['Rest_Away'], x['Away_Value']), axis=1)

    df['HomeElo'] = 1500.0; df['AwayElo'] = 1500.0
    elo_dict = {}
    k_factor = 20
    
    # ELO √© Global (mistura ligas e CL)
    for i, row in df.iterrows():
        h, a, res = row['HomeTeam'], row['AwayTeam'], row['FTR']
        h_elo = elo_dict.get(h, 1500.0); a_elo = elo_dict.get(a, 1500.0)
        df.at[i, 'HomeElo'] = h_elo; df.at[i, 'AwayElo'] = a_elo
        actual = 1 if res == 'H' else 0.5 if res == 'D' else 0
        exp = 1 / (1 + 10**((a_elo - h_elo)/400))
        elo_dict[h] = h_elo + k_factor * (actual - exp)
        elo_dict[a] = a_elo - k_factor * (actual - exp)
        
    df['EloDiff'] = df['HomeElo'] - df['AwayElo']

    # ---------------------------------------------------------
    # 5. ROLLING STATS
    # ---------------------------------------------------------
    # Na CL, muitas vezes faltam dados de Cantos/Cart√µes no CSV. 
    # Vamos focar no que temos: Golos e xG.
    cols_to_avg = ['FTHG', 'FTAG', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC', 'Home_xG', 'Away_xG']
    cols_to_avg = [c for c in cols_to_avg if c in df.columns]
    
    home_stats = df[['Date', 'HomeTeam'] + cols_to_avg].rename(columns={'HomeTeam': 'Team'})
    away_stats = df[['Date', 'AwayTeam'] + cols_to_avg].rename(columns={'AwayTeam': 'Team'})
    
    for col in cols_to_avg:
        home_stats.rename(columns={col: f'Stat_{col}'}, inplace=True)
        away_stats.rename(columns={col: f'Stat_{col}'}, inplace=True)

    all_stats = pd.concat([home_stats, away_stats]).sort_values(['Team', 'Date'])
    
    for col in [c for c in all_stats.columns if 'Stat_' in c]:
        all_stats[f'Avg_{col}_L5'] = all_stats.groupby('Team')[col].transform(lambda x: x.shift(1).rolling(5, min_periods=3).mean()).fillna(0)
    
    feat_cols = ['Date', 'Team'] + [c for c in all_stats.columns if 'Avg_' in c]
    df = df.merge(all_stats[feat_cols], left_on=['Date', 'HomeTeam'], right_on=['Date', 'Team'], how='left').drop(columns=['Team'])
    df = df.rename(columns={c: f'Home_{c}' for c in feat_cols if 'Avg_' in c})
    df = df.merge(all_stats[feat_cols], left_on=['Date', 'AwayTeam'], right_on=['Date', 'Team'], how='left').drop(columns=['Team'])
    df = df.rename(columns={c: f'Away_{c}' for c in feat_cols if 'Avg_' in c})

    # Odds Features (Preencher com 0 se for jogo da CL sem odds, para n√£o partir o c√≥digo)
    if 'B365H' not in df.columns: df['B365H'] = 0
    if 'B365D' not in df.columns: df['B365D'] = 0
    if 'B365A' not in df.columns: df['B365A'] = 0
    
    df['Imp_Home'] = np.where(df['B365H']>0, 1/df['B365H'], 0)
    df['Imp_Draw'] = np.where(df['B365D']>0, 1/df['B365D'], 0)
    df['Imp_Away'] = np.where(df['B365A']>0, 1/df['B365A'], 0)
    
    # 1X / X2 (Estimativa)
    df['Imp_1X'] = df['Imp_Home'] + df['Imp_Draw']
    df['Imp_X2'] = df['Imp_Draw'] + df['Imp_Away']
    df['Imp_12'] = df['Imp_Home'] + df['Imp_Away']

    # LISTA FINAL
    features_needed = [
        'Div_Code', 'Is_Cup', # <--- NOVO
        'HomeElo', 'AwayElo', 'EloDiff', 
        'Rest_Home', 'Rest_Away', 
        'Home_Value', 'Away_Value', 'Value_Ratio',
        'Home_Fatigue', 'Away_Fatigue', 'Home_Motiv', 'Away_Motiv',
        'Imp_Home', 'Imp_Draw', 'Imp_Away',
        'Imp_1X', 'Imp_X2', 'Imp_12',
        'Home_Pts', 'Away_Pts', 'Home_Pos', 'Away_Pos'
    ]
    features_needed += [c for c in df.columns if 'Home_Avg_' in c or 'Away_Avg_' in c]
    features_needed = list(set(features_needed))
    existing_features = [f for f in features_needed if f in df.columns]
    
    print("üßπ Limpeza Final (Removendo jogos sem Odds para Treino)...")
    # Para treino, s√≥ queremos jogos com Odds (Ligas). A CL serviu para calcular ELO/Stats.
    df_clean = df.dropna(subset=['FTR']).copy()
    # Filtro importante: s√≥ manter se tiver odds v√°lidas OU se for para infer√™ncia futura
    df_clean = df_clean[df_clean['Imp_Home'] > 0] 
    
    df_clean[existing_features] = df_clean[existing_features].fillna(0)
    df_clean.replace([np.inf, -np.inf], 0, inplace=True)
    
    return df_clean, existing_features, elo_dict, le_div

In [None]:
# Executar a engenharia de features
df_ready, features, current_elos, le_div = feature_engineering(df)
print(f"‚úÖ Features updated. Total features: {len(features)}")

## 3. Treino do Modelo (XGBoost)
Utilizamos `GridSearchCV` com `TimeSeriesSplit` para garantir que o modelo n√£o aprende com o futuro.
* **Modelo Multi:** Prev√™ Probabilidades (Home, Draw, Away).
* **Modelo Sniper:** Bin√°rio, focado apenas na vit√≥ria da casa.
* **Modelo Shield:** Bin√°rio, focado em evitar a derrota (1X).

In [None]:
# 1. Preparar Target
le = LabelEncoder()
df_ready['Target'] = le.fit_transform(df_ready['FTR']) # 0=Away, 1=Draw, 2=Home

# 2. Split Cronol√≥gico (80/20)
split_index = int(len(df_ready) * 0.80)
train = df_ready.iloc[:split_index]
test = df_ready.iloc[split_index:]

X_train, y_train = train[features], train['Target']
X_test, y_test = test[features], test['Target']

print(f"üèãÔ∏è A treinar em {len(X_train)} jogos...")
tscv = TimeSeriesSplit(n_splits=3)

# --- MODELO 1: NORMAL (1X2) ---
print("üîç Otimizando Modelo Normal...")
xgb_multi = xgb.XGBClassifier(objective='multi:softprob', random_state=42, eval_metric='mlogloss')
# Podes ajustar os params aqui se quiseres ser mais r√°pido ou mais preciso
param_grid_multi = {'n_estimators': [200, 300], 'max_depth': [3, 4], 'learning_rate': [0.01, 0.03], 'subsample': [0.8]}
grid_multi = GridSearchCV(estimator=xgb_multi, param_grid=param_grid_multi, cv=tscv, scoring='neg_log_loss', n_jobs=-1)

# Pesos para empates (Estrat√©gia Anti-Cegueira de Empates)
sample_weights = np.ones(len(y_train))
draw_code = le.transform(['D'])[0]
sample_weights[y_train == draw_code] = 1.15

grid_multi.fit(X_train, y_train, sample_weight=sample_weights)
model_multi = grid_multi.best_estimator_
print(f"‚úÖ Melhores Params (Normal): {grid_multi.best_params_}")

# --- MODELO 2: SNIPER (Win Only) ---
print("üîç Otimizando Modelo Sniper...")
y_train_win = (y_train == 2).astype(int)
xgb_sniper = xgb.XGBClassifier(objective='binary:logistic', random_state=42, eval_metric='logloss')
param_grid_sniper = {'n_estimators': [150, 200], 'max_depth': [3, 4], 'learning_rate': [0.01, 0.02]} # Reduzi ligeiramente para ser mais r√°pido
grid_sniper = GridSearchCV(estimator=xgb_sniper, param_grid=param_grid_sniper, cv=tscv, scoring='neg_log_loss', n_jobs=-1)
grid_sniper.fit(X_train, y_train_win)
model_sniper = grid_sniper.best_estimator_

# --- MODELO 3: SHIELD (Double Chance) ---
print("üõ°Ô∏è Treinando Modelo Shield...")
y_train_1x = (y_train != 0).astype(int)
model_shield = xgb.XGBClassifier(**grid_sniper.best_params_, objective='binary:logistic', random_state=42)
model_shield.fit(X_train, y_train_1x)

print("üèÅ Treino Conclu√≠do.")

In [None]:
# 1. Configurar Figura para as Matrizes (apenas 2 gr√°ficos agora)
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# --- Plot 1: Matriz Normal ---
preds_multi = model_multi.predict(X_test)
cm_multi = confusion_matrix(y_test, preds_multi)
sns.heatmap(cm_multi, annot=True, fmt='d', cmap='Blues', ax=axes[0], 
            xticklabels=['Away', 'Draw', 'Home'], yticklabels=['Away', 'Draw', 'Home'])
axes[0].set_title(f'Modelo Normal (Acc: {accuracy_score(y_test, preds_multi):.1%})')

# --- Plot 2: Matriz Sniper ---
y_test_win = (y_test == 2).astype(int)
preds_sniper = model_sniper.predict(X_test)
cm_sniper = confusion_matrix(y_test_win, preds_sniper)
sns.heatmap(cm_sniper, annot=True, fmt='d', cmap='Greens', ax=axes[1], 
            xticklabels=['Not Win', 'Win'], yticklabels=['Not Win', 'Win'])
axes[1].set_title(f'Modelo Sniper (Acc: {accuracy_score(y_test_win, preds_sniper):.1%})')

plt.tight_layout()
plt.show()

# 2. Tabela de Import√¢ncia das Features (Todas)
print("\nüìä RANKING DE IMPORT√ÇNCIA DAS FEATURES:")
feature_imp = pd.DataFrame({
    'Feature': features, 
    'Importance': model_multi.feature_importances_
})

# Calcular percentagem e ordenar
feature_imp['Importance %'] = (feature_imp['Importance'] * 100).round(2)
feature_imp = feature_imp.sort_values('Importance', ascending=False).reset_index(drop=True)

# Mostrar a tabela (estilo Pandas)
# Ajustar op√ß√µes para mostrar todas as linhas se forem muitas
pd.set_option('display.max_rows', None)
display(feature_imp[['Feature', 'Importance %']])
pd.reset_option('display.max_rows') # Repor o padr√£o depois

## 4. Previs√£o de Jogos Futuros
Fun√ß√£o para inserir dados de jogos futuros e obter probabilidades da IA e an√°lise de valor.

In [None]:
# [CELL: Prediction Function v8.1 - Valida√ß√£o de Ligas (Anti-Erro)]
def predict_match_advanced(date_str, home_team, away_team, 
                           odd_h, odd_d, odd_a, 
                           division='E0', # OBRIGAT√ìRIO
                           odd_1x=None, odd_12=None, odd_x2=None):
    
    match_date = pd.to_datetime(date_str)
    
    div_map = {
        'E0': 'Premier League üá¨üáß', 'D1': 'Bundesliga üá©üá™', 
        'SP1': 'La Liga üá™üá∏', 'F1': 'Ligue 1 üá´üá∑', 
        'I1': 'Serie A üáÆüáπ', 'CL': 'Champions League üá™üá∫'
    }
    div_name = div_map.get(division, division)
    
    # --- 0. VALIDA√á√ÉO DE SEGURAN√áA (NOVO!) ---
    # Criar mapa de equipas -> liga (baseado no hist√≥rico recente)
    # Se a equipa jogou 90% dos jogos na liga X, ela √© da liga X.
    if division != 'CL': # Na Champions vale tudo
        print("üõ°Ô∏è A validar equipas...")
        for team in [home_team, away_team]:
            # Verificar √∫ltimos 20 jogos da equipa
            team_games = df_ready[(df_ready['HomeTeam'] == team) | (df_ready['AwayTeam'] == team)].tail(20)
            if not team_games.empty:
                # Contar ligas onde jogou (excluindo CL)
                leagues = team_games[team_games['Div'] != 'CL']['Div'].value_counts()
                if not leagues.empty:
                    main_league = leagues.index[0] # A liga mais comum
                    if main_league != division:
                        print(f"‚ùå ERRO CR√çTICO: {team} joga na {div_map.get(main_league, main_league)}, n√£o na {div_name}!")
                        print("   -> Corre a fun√ß√£o com a divis√£o correta ou muda a equipa.")
                        return # P√°ra a fun√ß√£o aqui

    print(f"\nüîÆ PREVIS√ÉO AVAN√áADA ({div_name}): {home_team} vs {away_team} ({date_str})")
    print("=" * 100)
    
    past_data = df_ready[df_ready['Date'] < match_date].copy()
    if past_data.empty: 
        print("‚ö†Ô∏è Erro: Sem dados hist√≥ricos suficientes.")
        return

    # --- 1. CONTEXTO & FEATURES ---
    def get_market_value(team):
        team_games = past_data[(past_data['HomeTeam'] == team) | (past_data['AwayTeam'] == team)]
        if not team_games.empty:
            last = team_games.iloc[-1]
            if last['HomeTeam'] == team: return last.get('Home_Value', 150)
            return last.get('Away_Value', 150)
        # Fallback
        tier_1 = ['Man City', 'Real Madrid', 'Bayern Munich', 'Paris Saint Germain', 'Inter']
        if team in tier_1: return 800
        return 200

    def get_context(team):
        team_games = past_data[(past_data['HomeTeam'] == team) | (past_data['AwayTeam'] == team)]
        if team_games.empty: return 0.5, 10, 7
        last = team_games.iloc[-1]
        pos = last['Home_Pos'] if last['HomeTeam'] == team else last['Away_Pos']
        games = len(team_games)
        rest = (match_date - last['Date']).days
        
        # Motiva√ß√£o baseada na competi√ß√£o
        if division == 'CL':
            motiv = 1.3
        else:
            motiv = 1.0
            if games > 28: 
                if pos > 6 and pos < 16: motiv = 0.5 
                else: motiv = 1.2 
        return motiv, pos, rest

    input_data = {}
    
    h_motiv, h_pos, h_rest = get_context(home_team)
    a_motiv, a_pos, a_rest = get_context(away_team)
    h_val = get_market_value(home_team)
    a_val = get_market_value(away_team)
    
    input_data['Home_Motiv'] = h_motiv; input_data['Away_Motiv'] = a_motiv
    input_data['Rest_Home'] = h_rest; input_data['Rest_Away'] = a_rest
    input_data['Home_Value'] = h_val; input_data['Away_Value'] = a_val
    input_data['Value_Ratio'] = np.log1p(h_val) - np.log1p(a_val)
    input_data['Is_Cup'] = 1 if division == 'CL' else 0
    input_data['Home_Fatigue'] = 1 if (h_val > 400 and h_rest < 4) else 0
    input_data['Away_Fatigue'] = 1 if (a_val > 400 and a_rest < 4) else 0
    
    h_elo = current_elos.get(home_team, 1500)
    a_elo = current_elos.get(away_team, 1500)
    input_data['HomeElo'] = h_elo; input_data['AwayElo'] = a_elo
    input_data['EloDiff'] = h_elo - a_elo
    input_data['Home_Pts'] = 0; input_data['Away_Pts'] = 0
    input_data['Home_Pos'] = h_pos; input_data['Away_Pos'] = a_pos
    
    try: input_data['Div_Code'] = le_div.transform([division])[0]
    except: input_data['Div_Code'] = 0
    
    # Odds
    input_data['Imp_Home'] = 1/odd_h; input_data['Imp_Draw'] = 1/odd_d; input_data['Imp_Away'] = 1/odd_a
    input_data['Imp_1X'] = 1/odd_1x if odd_1x else (1/odd_h + 1/odd_d)
    input_data['Imp_X2'] = 1/odd_x2 if odd_x2 else (1/odd_d + 1/odd_a)
    input_data['Imp_12'] = 1/odd_12 if odd_12 else (1/odd_h + 1/odd_a)
    
    # Stats
    def fill_stats(team, prefix_h, prefix_a):
        games = past_data[(past_data['HomeTeam'] == team) | (past_data['AwayTeam'] == team)]
        if games.empty: return
        last = games.iloc[-1]
        for f in features:
            if 'Avg_' in f:
                try:
                    clean = ""
                    val = 0
                    if prefix_h in f: 
                        clean = f.replace(prefix_h, "")
                        col_name = f"Home_{clean}"
                        if col_name in last: val = last[col_name] if last['HomeTeam'] == team else last.get(f"Away_{clean}", 0)
                    elif prefix_a in f:
                        clean = f.replace(prefix_a, "")
                        col_name = f"Home_{clean}"
                        if col_name in last: val = last[col_name] if last['HomeTeam'] == team else last.get(f"Away_{clean}", 0)
                    if clean: input_data[f] = val
                except: pass 

    fill_stats(home_team, "Home_", "XX_IGNORE_XX")
    fill_stats(away_team, "XX_IGNORE_XX", "Away_")

    for f in features: 
        if f not in input_data: input_data[f] = df_ready[f].mean()

    # --- 2. EXECU√á√ÉO ---
    X_new = pd.DataFrame([input_data])[features]
    probs = model_multi.predict_proba(X_new)[0] 
    prob_a, prob_d, prob_h = probs[0], probs[1], probs[2]
    conf_win = model_sniper.predict_proba(X_new)[0][1]
    try: conf_shield = model_shield.predict_proba(X_new)[0][1]
    except: conf_shield = prob_h + prob_d
    
    # --- 3. RELAT√ìRIO VISUAL ---
    print(f"üìä PROBABILIDADES (IA):")
    print(f"   üè† Casa: {prob_h:.1%} (Sniper: {conf_win:.1%})")
    print(f"   ü§ù Empate: {prob_d:.1%}")
    print(f"   ‚úàÔ∏è Fora: {prob_a:.1%}")
    print("-" * 100)

    opportunities = []

    def analyze(name, odd, prob, bet_type="Standard"):
        if not odd or odd <= 1: return
        implied_prob = 1 / odd 
        fair_odd = 1 / prob if prob > 0 else 99.0
        ev = (prob * odd) - 1
        status = "üíé VALOR!" if ev > 0 else ("‚úÖ JUSTO" if ev > -0.05 else "‚ùå FRACO")
        print(f"   ‚Ä¢ {name:<35} | Odd: {odd:.2f} ({implied_prob:.1%}) | IA: {fair_odd:.2f} ({prob:.1%}) | {status}")
        opportunities.append({"name": name, "odd": odd, "prob": prob, "ev": ev})

    print("üí∞ SCANNER DE MERCADO (Compara√ß√£o de Percentagens):")
    analyze(f"Vitoria {home_team}", odd_h, prob_h, "HOME")
    analyze("Empate", odd_d, prob_d, "DRAW")
    analyze(f"Vitoria {away_team}", odd_a, prob_a, "AWAY")
    
    prob_1x_val = ((prob_h + prob_d) + conf_win) / 2
    try: prob_1x_val = (prob_1x_val + conf_shield) / 2
    except: pass

    if odd_1x: analyze(f"DC 1X ({home_team} ou Empate)", odd_1x, prob_1x_val, "1X")
    if odd_12: analyze(f"DC 12 ({home_team} ou {away_team})", odd_12, (prob_h + prob_a), "12")
    if odd_x2: analyze(f"DC X2 ({away_team} ou Empate)", odd_x2, (prob_a + prob_d), "X2")

    print("-" * 100)
    
    # --- 4. VEREDICTO FINAL ---
    opportunities.sort(key=lambda x: x['ev'], reverse=True)
    best = opportunities[0]
    
    most_likely = sorted(opportunities, key=lambda x: x['prob'], reverse=True)[0]
    final_pick = best
    reason = "Melhor valor matem√°tico dispon√≠vel (EV Positivo)."
    
    if most_likely['prob'] > 0.65 and best['ev'] < 0.05:
        final_pick = most_likely
        reason = f"Probabilidade Dominante ({final_pick['prob']:.1%}). Aposta 'Banker'."

    print(f"üèÜ ESCOLHA RACIONAL (Dinheiro): üëâ {final_pick['name']} (Odd: {final_pick['odd']})")
    print(f"   üìù Motivo: {reason}")
    print(f"   üìâ Confian√ßa IA: {final_pick['prob']:.1%}")
    print("")
    
    if most_likely['name'] != final_pick['name']:
        print(f"üé≤ RESULTADO MAIS PROV√ÅVEL:   üëâ {most_likely['name']} ({most_likely['prob']:.1%})")
        print("   ‚ö†Ô∏è Nota: Este √© o desfecho que a IA acha que vai acontecer, mas a Odd paga pouco.")
    else:
        print("üé≤ RESULTADO MAIS PROV√ÅVEL:   (Igual √† Escolha Racional)")

In [None]:
predict_match_advanced('2025-12-09', 'Inter', 'Liverpool', 
                       odd_h=2.02, odd_d=3.60, odd_a=3.25, 
                       division='CL',
                       odd_1x=1.30, odd_12=1.25, odd_x2=1.65)

In [None]:
predict_match_advanced('2025-12-08', 'Torino', 'Milan', 
                       odd_h=5.25, odd_d=3.70, odd_a=1.61, 
                       division='I1',
                       odd_1x=2.12, odd_12=1.24, odd_x2=1.15)

In [None]:
predict_match_advanced('2025-12-08', 'Wolves', 'Man United', 
                       odd_h=5.00, odd_d=4.20, odd_a=1.56, 
                       division='E0', 
                       odd_1x=2.25, odd_12=1.19, odd_x2=1.15)