In [1]:
import pandas as pd
import pickle
from scipy.stats import poisson

# Load data
df_fixture = pd.read_csv("data/can_matches_2025.csv")
with open('data/team_strength.pkl', 'rb') as f:
    df_team_strength = pickle.load(f)
with open('data/group_results.pkl', 'rb') as f:
    group_results = pickle.load(f)
with open('data/qualified_thirds.pkl', 'rb') as f:
    qualified_thirds = pickle.load(f)
with open('data/can_group_tables.pkl', 'rb') as f:
    dict_table = pickle.load(f)

print("✓ All data loaded")

✓ All data loaded


## Define Prediction Functions

In [2]:
def predict_points(home, away):
    """
    Predict expected points using Poisson distribution.
    """
    if home in df_team_strength.index and away in df_team_strength.index:
        lamb_home = df_team_strength.at[home,'GoalsScored'] * df_team_strength.at[away,'GoalsConceded']
        lamb_away = df_team_strength.at[away,'GoalsScored'] * df_team_strength.at[home,'GoalsConceded']
        
        prob_home, prob_away, prob_draw = 0, 0, 0
        for x in range(0,11):
            for y in range(0, 11):
                p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
                if x == y:
                    prob_draw += p
                elif x > y:
                    prob_home += p
                else:
                    prob_away += p
        
        points_home = 3 * prob_home + prob_draw
        points_away = 3 * prob_away + prob_draw
        return (float(points_home), float(points_away))
    else:
        return (0, 0)

def get_winner(df_fixture_updated):
    """
    Predict match winners based on expected points.
    """
    for index, row in df_fixture_updated.iterrows():
        home, away = row['HomeTeam'], row['AwayTeam']
        points_home, points_away = predict_points(home, away)
        if points_home > points_away:
            winner = home
        else:
            winner = away
        df_fixture_updated.at[index, 'winner'] = winner
    return df_fixture_updated

def update_table(df_fixture_round_1, df_fixture_round_2):
    """
    Replace "Vainqueur Match X" placeholders with actual winners.
    """
    df_updated = df_fixture_round_2.copy()
    
    for index, row in df_fixture_round_1.iterrows():
        winner = row['winner']
        match_num = row['match_number'].replace('Match ', '')
        placeholder = f'Vainqueur Match {match_num}'
        
        df_updated['HomeTeam'] = df_updated['HomeTeam'].replace(placeholder, winner)
        df_updated['AwayTeam'] = df_updated['AwayTeam'].replace(placeholder, winner)
    
    df_updated['winner'] = ''
    return df_updated

def update_final(df_fixture_round_1, df_fixture_final):
    """
    Replace both "Vainqueur Match X" and "Perdant Match X" placeholders.
    """
    df_updated = df_fixture_final.copy()
    
    for index, row in df_fixture_round_1.iterrows():
        winner = row['winner']
        home_team = row['HomeTeam']
        away_team = row['AwayTeam']
        loser = away_team if winner == home_team else home_team
        
        match_num = row['match_number'].replace('Match ', '')
        winner_placeholder = f'Vainqueur Match {match_num}'
        loser_placeholder = f'Perdant Match {match_num}'
        
        df_updated['HomeTeam'] = df_updated['HomeTeam'].replace(winner_placeholder, winner)
        df_updated['HomeTeam'] = df_updated['HomeTeam'].replace(loser_placeholder, loser)
        df_updated['AwayTeam'] = df_updated['AwayTeam'].replace(winner_placeholder, winner)
        df_updated['AwayTeam'] = df_updated['AwayTeam'].replace(loser_placeholder, loser)
    
    df_updated['winner'] = ''
    return df_updated

print("✓ Functions defined")

✓ Functions defined


## Replace Group Qualifiers in Round of 16

In [3]:
# Split fixtures
df_fixture_8 = df_fixture[36:44].copy()

def replace_all_placeholders(fixture_df, group_results, qualified_thirds, dict_table):
    df_replaced = fixture_df.copy()
    available_thirds = dict(qualified_thirds)
    
    # Handle third-place qualifiers first (constraint-first ordering)
    third_place_matches = []
    for index, row in df_replaced.iterrows():
        for col in ['HomeTeam', 'AwayTeam']:
            cell_value = str(row[col])
            if '3e du groupe' in cell_value:
                groups_in_option = [g.strip() for g in cell_value.split('3e du groupe ')[1].split('/')]
                third_place_matches.append((index, col, groups_in_option))
    
    third_place_matches.sort(key=lambda x: len(x[2]))
    
    for index, col, groups_in_option in third_place_matches:
        rank_to_remove = None
        for rank, info in available_thirds.items():
            if info['group'] in groups_in_option:
                df_replaced.at[index, col] = info['team']
                rank_to_remove = rank
                break
        if rank_to_remove is not None:
            del available_thirds[rank_to_remove]
    
    # Handle 1st and 2nd place qualifiers
    for index, row in df_replaced.iterrows():
        for col in ['HomeTeam', 'AwayTeam']:
            cell_value = str(row[col])
            if '1er du groupe' in cell_value and '3e' not in cell_value:
                group_letter = cell_value.split('1er du groupe ')[1]
                group_key = f'Group_{group_letter}'
                if group_key in group_results:
                    df_replaced.at[index, col] = group_results[group_key]['1er']
            elif '2e du groupe' in cell_value:
                group_letter = cell_value.split('2e du groupe ')[1]
                group_key = f'Group_{group_letter}'
                if group_key in group_results:
                    df_replaced.at[index, col] = group_results[group_key]['2eme']
    
    return df_replaced

df_fixture_8_with_teams = replace_all_placeholders(df_fixture_8, group_results, qualified_thirds, dict_table)
print("Round of 16 - Teams assigned:")
print(df_fixture_8_with_teams[['HomeTeam', 'AwayTeam', 'match_number']])

Round of 16 - Teams assigned:
              HomeTeam            AwayTeam match_number
36             Sénégal              Angola     Match 37
37              Zambie             Tunisie     Match 38
38               Maroc             Ouganda     Match 39
39      Afrique du Sud       Côte d'Ivoire     Match 40
40              Égypte                Mali     Match 41
41             Nigeria  3e du groupe A/B/F     Match 42
42  Guinée équatoriale            RD Congo     Match 43
43            Cameroun             Algérie     Match 44


In [4]:
# Manual correction if needed
df_fixture_8_with_teams.loc[df_fixture_8_with_teams['match_number'] == 'Match 42', 'AwayTeam'] = "Burkina Faso"
print("✓ Manual corrections applied")

✓ Manual corrections applied


## Predict Round of 16 Winners

In [5]:
df_fixture_8_with_teams['winner'] = ''
df_fixture_8_with_teams = get_winner(df_fixture_8_with_teams)

print("Round of 16 - Winners predicted:")
print(df_fixture_8_with_teams[['HomeTeam', 'AwayTeam', 'match_number', 'winner']])

Round of 16 - Winners predicted:
              HomeTeam       AwayTeam match_number              winner
36             Sénégal         Angola     Match 37             Sénégal
37              Zambie        Tunisie     Match 38              Zambie
38               Maroc        Ouganda     Match 39               Maroc
39      Afrique du Sud  Côte d'Ivoire     Match 40       Côte d'Ivoire
40              Égypte           Mali     Match 41              Égypte
41             Nigeria   Burkina Faso     Match 42             Nigeria
42  Guinée équatoriale       RD Congo     Match 43  Guinée équatoriale
43            Cameroun        Algérie     Match 44            Cameroun


## Predict Quarter-Finals

In [6]:
df_fixture_quarter = df_fixture[44:48].copy()
df_fixture_quarter_with_teams = update_table(df_fixture_8_with_teams, df_fixture_quarter)
print("Round of quarter - Teams assigned:")
print(df_fixture_quarter_with_teams[['HomeTeam', 'AwayTeam', 'match_number']])

Round of quarter - Teams assigned:
              HomeTeam  AwayTeam match_number
44              Zambie   Sénégal     Match 45
45       Côte d'Ivoire     Maroc     Match 46
46  Guinée équatoriale   Nigeria     Match 47
47              Égypte  Cameroun     Match 48


In [7]:
df_fixture_quarter_with_teams = get_winner(df_fixture_quarter_with_teams)

print("Quarter-finals - Winners predicted:")
print(df_fixture_quarter_with_teams[['HomeTeam', 'AwayTeam', 'match_number', 'winner']])

Quarter-finals - Winners predicted:
              HomeTeam  AwayTeam match_number         winner
44              Zambie   Sénégal     Match 45        Sénégal
45       Côte d'Ivoire     Maroc     Match 46  Côte d'Ivoire
46  Guinée équatoriale   Nigeria     Match 47        Nigeria
47              Égypte  Cameroun     Match 48         Égypte


## Predict Semi-Finals

In [9]:
df_fixture_semi = df_fixture[48:50].copy()
df_fixture_semi_with_teams = update_table(df_fixture_quarter_with_teams, df_fixture_semi)
print("Round of Semi-Final - Teams assigned:")
print(df_fixture_semi_with_teams[['HomeTeam', 'AwayTeam', 'match_number']])

Round of Semi-Final - Teams assigned:
   HomeTeam       AwayTeam match_number
48  Sénégal         Égypte     Match 49
49  Nigeria  Côte d'Ivoire     Match 50


In [10]:
df_fixture_semi_with_teams = get_winner(df_fixture_semi_with_teams)

print("Semi-finals - Winners predicted:")
print(df_fixture_semi_with_teams[['HomeTeam', 'AwayTeam', 'match_number', 'winner']])

Semi-finals - Winners predicted:
   HomeTeam       AwayTeam match_number   winner
48  Sénégal         Égypte     Match 49   Égypte
49  Nigeria  Côte d'Ivoire     Match 50  Nigeria


## Predict Finals

In [11]:
df_fixture_final = df_fixture[50:].copy()
df_fixture_final_with_teams = update_final(df_fixture_semi_with_teams, df_fixture_final)
print("Round of Final - Teams assigned:")
print(df_fixture_final_with_teams[['HomeTeam', 'AwayTeam', 'match_number']])



Round of Final - Teams assigned:
   HomeTeam       AwayTeam match_number
50  Sénégal  Côte d'Ivoire     Match 51
51   Égypte        Nigeria     Match 52


In [12]:
df_fixture_final_with_teams = get_winner(df_fixture_final_with_teams)

print("Finals - Winners predicted:")
print(df_fixture_final_with_teams[['HomeTeam', 'AwayTeam', 'match_number', 'winner']])

Finals - Winners predicted:
   HomeTeam       AwayTeam match_number   winner
50  Sénégal  Côte d'Ivoire     Match 51  Sénégal
51   Égypte        Nigeria     Match 52   Égypte


# Save final predictions


In [13]:
with open('data/knockout_results.pkl', 'wb') as f:
    pickle.dump({
        'round_16': df_fixture_8_with_teams,
        'quarter': df_fixture_quarter_with_teams,
        'semi': df_fixture_semi_with_teams,
        'final': df_fixture_final_with_teams
    }, f)

print("✓ Saved: data/knockout_results.pkl")

✓ Saved: data/knockout_results.pkl
