In [9]:
import pandas as pd
import pickle
from scipy.stats import poisson

# Load data
df_fixture = pd.read_csv("data/can_matches_2025.csv")
with open('data/can_group_tables.pkl', 'rb') as f:
    dict_table = pickle.load(f)
with open('data/team_strength.pkl', 'rb') as f:
    df_team_strength = pickle.load(f)

print(f"Loaded {len(df_fixture)} total matches")
print(f"Loaded {len(dict_table)} groups")
print(f"Loaded {len(df_team_strength)} teams")

Loaded 52 total matches
Loaded 6 groups
Loaded 47 teams


## Divide Matches by Phase

In [10]:
# Split matches by tournament phase
df_fixture_group = df_fixture[:36].copy()  # Group stage (36 matches)
df_fixture_8 = df_fixture[36:44].copy()  # Round of 16 (8 matches)
df_fixture_quarter = df_fixture[44:48].copy()  # Quarter-finals (4 matches)
df_fixture_semi = df_fixture[48:50].copy()  # Semi-finals (2 matches)
df_fixture_final = df_fixture[50:].copy()  # Finals (2 matches)

print(f"Group stage: {len(df_fixture_group)} matches")
print(f"Round of 16: {len(df_fixture_8)} matches")
print(f"Quarter-finals: {len(df_fixture_quarter)} matches")
print(f"Semi-finals: {len(df_fixture_semi)} matches")
print(f"Finals: {len(df_fixture_final)} matches")

Group stage: 36 matches
Round of 16: 8 matches
Quarter-finals: 4 matches
Semi-finals: 2 matches
Finals: 2 matches


## Poisson Prediction Function

In [11]:
def predict_points(home, away):
    """
    Predict expected points for both teams using Poisson distribution.
    Lambda = team_strength_home * opponent_defensive_weakness
    """
    if home in df_team_strength.index and away in df_team_strength.index:
        # Calculate lambdas (expected goals)
        lamb_home = df_team_strength.at[home,'GoalsScored'] * df_team_strength.at[away,'GoalsConceded']
        lamb_away = df_team_strength.at[away,'GoalsScored'] * df_team_strength.at[home,'GoalsConceded']
        
        prob_home, prob_away, prob_draw = 0, 0, 0
        
        # Calculate probabilities for all possible scores
        for x in range(0,11):  # home team goals
            for y in range(0, 11):  # away team goals
                p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
                if x == y:
                    prob_draw += p
                elif x > y:
                    prob_home += p
                else:
                    prob_away += p
        
        points_home = 3 * prob_home + prob_draw
        points_away = 3 * prob_away + prob_draw
        return (float(points_home), float(points_away))
    else:
        return (0, 0)

# Test function
print("Sample predictions:")
print(f"Maroc vs Cameroun: {predict_points('Maroc', 'Cameroun')}")
print(f"Sénégal vs Égypte: {predict_points('Sénégal', 'Égypte')}")

Sample predictions:
Maroc vs Cameroun: (1.190779998921784, 1.5356067329827199)
Sénégal vs Égypte: (1.2491376362369977, 1.4527785428154851)


## Prepare Group Tables

In [12]:
# Fix team names and reset points to 0
dict_table['Group_A'].at[0,'Équipe'] = 'Maroc'  # Correct team name

for group in dict_table:
    dict_table[group]['Pts'] = pd.to_numeric(dict_table[group]['Pts'], errors='coerce').fillna(0).astype(float)

print("✓ Group tables prepared")
print(dict_table['Group_A'])

✓ Group tables prepared
  Rang   Équipe  Pts  J  G  N  P  Bp  Bc  Diff
0    1    Maroc  0.0  0  0  0  0   0   0     0
1    2     Mali  0.0  0  0  0  0   0   0     0
2    3  Comores  0.0  0  0  0  0   0   0     0
3    4   Zambie  0.0  0  0  0  0   0   0     0


## Simulate Group Stage Matches

In [13]:
# Simulate matches and allocate points
for group in dict_table:
    teams_in_group = dict_table[group]['Équipe'].values
    df_fixture_group_filtered = df_fixture_group[df_fixture_group['HomeTeam'].isin(teams_in_group)]
    
    for index, row in df_fixture_group_filtered.iterrows():
        home, away = row['HomeTeam'], row['AwayTeam']
        points_home, points_away = predict_points(home, away)
        dict_table[group].loc[dict_table[group]['Équipe'] == home, 'Pts'] += points_home
        dict_table[group].loc[dict_table[group]['Équipe'] == away, 'Pts'] += points_away
    
    # Sort by points
    dict_table[group] = dict_table[group].sort_values('Pts', ascending=False).reset_index(drop=True)
    dict_table[group] = dict_table[group][['Équipe', 'Pts']]
    dict_table[group]['Pts'] = dict_table[group]['Pts'].round(0)

print("✓ Group stage simulation complete!")
print("\nGroup A final standings:")
print(dict_table)

✓ Group stage simulation complete!

Group A final standings:
{'Group_A':     Équipe  Pts
0    Maroc  5.0
1   Zambie  5.0
2     Mali  4.0
3  Comores  2.0, 'Group_B':            Équipe  Pts
0          Égypte  6.0
1  Afrique du Sud  5.0
2          Angola  4.0
3        Zimbabwe  2.0, 'Group_C':      Équipe  Pts
0   Nigeria  7.0
1   Tunisie  5.0
2   Ouganda  4.0
3  Tanzanie  1.0, 'Group_D':      Équipe  Pts
0   Sénégal  7.0
1  RD Congo  6.0
2     Bénin  3.0
3  Botswana  2.0, 'Group_E':                Équipe  Pts
0  Guinée équatoriale  5.0
1             Algérie  5.0
2        Burkina Faso  4.0
3              Soudan  2.0, 'Group_F':           Équipe  Pts
0       Cameroun  6.0
1  Côte d'Ivoire  6.0
2          Gabon  4.0
3     Mozambique  1.0}


In [14]:
dict_table

{'Group_A':     Équipe  Pts
 0    Maroc  5.0
 1   Zambie  5.0
 2     Mali  4.0
 3  Comores  2.0,
 'Group_B':            Équipe  Pts
 0          Égypte  6.0
 1  Afrique du Sud  5.0
 2          Angola  4.0
 3        Zimbabwe  2.0,
 'Group_C':      Équipe  Pts
 0   Nigeria  7.0
 1   Tunisie  5.0
 2   Ouganda  4.0
 3  Tanzanie  1.0,
 'Group_D':      Équipe  Pts
 0   Sénégal  7.0
 1  RD Congo  6.0
 2     Bénin  3.0
 3  Botswana  2.0,
 'Group_E':                Équipe  Pts
 0  Guinée équatoriale  5.0
 1             Algérie  5.0
 2        Burkina Faso  4.0
 3              Soudan  2.0,
 'Group_F':           Équipe  Pts
 0       Cameroun  6.0
 1  Côte d'Ivoire  6.0
 2          Gabon  4.0
 3     Mozambique  1.0}

## Extract Group Qualifiers

In [15]:
# Create results dictionary (1st, 2nd, 3rd places)
group_results = {}
for group in dict_table:
    group_results[group] = {
        '1er': dict_table[group].loc[0, 'Équipe'],
        '2eme': dict_table[group].loc[1, 'Équipe'],
        '3eme': dict_table[group].loc[2, 'Équipe'] if len(dict_table[group]) > 2 else None
    }

print("Group Results:")
for group, results in group_results.items():
    print(f"{group}: 1er={results['1er']}, 2eme={results['2eme']}, 3eme={results['3eme']}")

Group Results:
Group_A: 1er=Maroc, 2eme=Zambie, 3eme=Mali
Group_B: 1er=Égypte, 2eme=Afrique du Sud, 3eme=Angola
Group_C: 1er=Nigeria, 2eme=Tunisie, 3eme=Ouganda
Group_D: 1er=Sénégal, 2eme=RD Congo, 3eme=Bénin
Group_E: 1er=Guinée équatoriale, 2eme=Algérie, 3eme=Burkina Faso
Group_F: 1er=Cameroun, 2eme=Côte d'Ivoire, 3eme=Gabon


## Select Best Third-Place Finishers

In [16]:
# Identify best 4 third-place finishers
third_places = {}
for group in dict_table:
    if len(dict_table[group]) > 2:
        third_team = dict_table[group].loc[2, 'Équipe']
        third_points = dict_table[group].loc[2, 'Pts']
        third_places[group] = {
            'team': third_team,
            'points': float(third_points)
        }

# Sort by points descending
sorted_third_places = sorted(third_places.items(), key=lambda x: x[1]['points'], reverse=True)

# Map qualified thirds (top 4)
qualified_thirds = {}
for idx, (group, data) in enumerate(sorted_third_places[:4]):
    qualified_thirds[idx + 1] = {
        'team': data['team'],
        'group': group.split('_')[1]
    }

print("Best 4 Third-Place Finishers:")
for rank, info in qualified_thirds.items():
    print(f"{rank}. {info['team']} (Group {info['group']})")

Best 4 Third-Place Finishers:
1. Mali (Group A)
2. Angola (Group B)
3. Ouganda (Group C)
4. Burkina Faso (Group E)


## Save Group Simulation Results

In [17]:
# Save results for use in knockout stage
with open('data/group_results.pkl', 'wb') as f:
    pickle.dump(group_results, f)

with open('data/qualified_thirds.pkl', 'wb') as f:
    pickle.dump(qualified_thirds, f)

print("✓ Saved: data/group_results.pkl")
print("✓ Saved: data/qualified_thirds.pkl")

✓ Saved: data/group_results.pkl
✓ Saved: data/qualified_thirds.pkl
