In [2]:
import pandas as pd
import pickle
from scipy.stats import poisson
from string import ascii_uppercase as alphabet

In [3]:
dict_table = pickle.load(open('dict_table','rb'))
df_historical_data = pd.read_csv('clean_fifa_euro_matches.csv')
df_fixture = pd.read_csv('clean_fifa_euro_fixture.csv')

In [5]:
df_home = df_historical_data[['HomeTeam', 'HomeGoals', 'AwayGoals']]
df_away = df_historical_data[['AwayTeam', 'HomeGoals', 'AwayGoals']]

df_home = df_home.rename(columns={'HomeTeam':'Team', 'HomeGoals': 'GoalsScored', 'AwayGoals': 'GoalsConceded'})
df_away = df_away.rename(columns={'AwayTeam':'Team', 'HomeGoals': 'GoalsConceded', 'AwayGoals': 'GoalsScored'})

df_team_strength = pd.concat([df_home, df_away], ignore_index=True).groupby(['Team']).mean()

In [6]:
def predict_points(home, away): #poisson probaabilities
    if home in df_team_strength.index and away in df_team_strength.index:
        # goals_scored * goals_conceded
        lamb_home = df_team_strength.at[home,'GoalsScored'] * df_team_strength.at[away,'GoalsConceded']
        lamb_away = df_team_strength.at[away,'GoalsScored'] * df_team_strength.at[home,'GoalsConceded']
        prob_home, prob_away, prob_draw = 0, 0, 0
        for x in range(0,11): #number of goals home team
            for y in range(0, 11): #number of goals away team
                p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
                if x == y:
                    prob_draw += p
                elif x > y:
                    prob_home += p
                else:
                    prob_away += p
        
        points_home = 3 * prob_home + prob_draw
        points_away = 3 * prob_away + prob_draw
        return (points_home, points_away)
    else:
        return (0, 0)

In [7]:
df_fixture_group_36 = df_fixture[:36].copy()
df_fixture_knockout = df_fixture[36:44].copy()
df_fixture_quarter = df_fixture[44:48].copy()
df_fixture_semi = df_fixture[48:50].copy()
df_fixture_final = df_fixture[50:].copy()

In [8]:
#run all match in a group stage 
for group in dict_table:
    teams_in_group = dict_table[group]['Équipe'].values
    df_fixture_group_6 = df_fixture_group_36[df_fixture_group_36['home'].isin(teams_in_group)] #represent 6 match in 1 group
    for index, row in df_fixture_group_6.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predict_points(home, away)
        dict_table[group].loc[dict_table[group]['Équipe'] == home, 'Pts'] += points_home
        dict_table[group].loc[dict_table[group]['Équipe'] == away, 'Pts'] += points_away

    dict_table[group] = dict_table[group].sort_values('Pts', ascending=False).reset_index()
    dict_table[group] = dict_table[group][['Équipe', 'Pts']]
    dict_table[group] = dict_table[group].round(0)

In [9]:
for group in dict_table:
    group_winner = dict_table[group].loc[0, 'Équipe']
    runners_up = dict_table[group].loc[1, 'Équipe']
    df_fixture_knockout.replace({f'1{group}':group_winner, f'2{group}':runners_up}, inplace=True)

df_fixture_knockout['winner'] = '?'

In [10]:
# Définir une fonction pour remplacer les noms des équipes par les vainqueurs et les deuxièmes places
def replace_teams(row):
    home = row['home']
    away = row['away']

    # Extraire la lettre du groupe à partir de 'home' et 'away'
    group_home = home[1:]
    group_away = away[1:]
    

    # Remplacer les noms des équipes en fonction du groupe
    if home.startswith('1'):
        home = dict_table[f'Group {group_home}'].loc[0, 'Équipe']
    if away.startswith('1'):
        away = dict_table[f'Group {group_away}'].loc[0, 'Équipe']
    if home.startswith('2'):
        home = dict_table[f'Group {group_home}'].loc[1, 'Équipe']
    if away.startswith('2'):
        away = dict_table[f'Group {group_away}'].loc[1, 'Équipe']

    # Retourner les noms des équipes modifiés
    return home, away

# Appliquer la fonction de remplacement sur les lignes du DataFrame
df_fixture_knockout[['home', 'away']] = df_fixture_knockout[['home', 'away']].apply(replace_teams, axis=1, result_type='expand')
df_fixture_knockout['away'].replace('3A/D/E/F', 'Hongrie', inplace=True)
df_fixture_knockout['away'].replace('3D/E/F', 'Ukraine', inplace=True)
df_fixture_knockout['away'].replace('3A/B/C', 'Croatie', inplace=True)
df_fixture_knockout['away'].replace('3A/B/C/D', 'Pologne', inplace=True)

In [11]:
def get_winner(df_fixture_updated):
    for index, row in df_fixture_updated.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predict_points(home, away)
        if points_home > points_away:
            winner = home
        else:
            winner = away
        df_fixture_updated.loc[index, 'winner'] = winner
    return df_fixture_updated

In [12]:
get_winner(df_fixture_knockout)

Unnamed: 0,home,score,away,year,winner
36,Allemagne,Match 37,Danemark,2024,Allemagne
37,Suisse,Match 38,Italie,2024,Italie
38,Espagne,Match 39,Hongrie,2024,Espagne
39,Angleterre,Match 40,Ukraine,2024,Angleterre
40,Portugal,Match 41,Croatie,2024,Portugal
41,France,Match 42,Roumanie,2024,France
42,Belgique,Match 43,Pologne,2024,Belgique
43,Pays-Bas,Match 44,Tchéquie,2024,Pays-Bas


In [13]:
def update_table(df_fixture_round_1, df_fixture_round_2):
    for index, row in df_fixture_round_1.iterrows():
        winner = df_fixture_round_1.loc[index, 'winner']
        match = int(df_fixture_round_1.loc[index, 'score'].split()[1])
        df_fixture_round_2.replace({f'W{match}':winner}, inplace=True)
    df_fixture_round_2['winner'] = '?'
    return df_fixture_round_2

In [14]:
update_table(df_fixture_knockout, df_fixture_quarter)

Unnamed: 0,home,score,away,year,winner
44,Espagne,Match 45,Allemagne,2024,?
45,Portugal,Match 46,France,2024,?
46,Belgique,Match 47,Pays-Bas,2024,?
47,Angleterre,Match 48,Italie,2024,?


In [15]:
get_winner(df_fixture_quarter)

Unnamed: 0,home,score,away,year,winner
44,Espagne,Match 45,Allemagne,2024,Espagne
45,Portugal,Match 46,France,2024,Portugal
46,Belgique,Match 47,Pays-Bas,2024,Pays-Bas
47,Angleterre,Match 48,Italie,2024,Italie


In [17]:
update_table(df_fixture_quarter, df_fixture_semi)

Unnamed: 0,home,score,away,year,winner
48,Espagne,Match 49,Portugal,2024,?
49,Pays-Bas,Match 50,Italie,2024,?


In [18]:
get_winner(df_fixture_semi)

Unnamed: 0,home,score,away,year,winner
48,Espagne,Match 49,Portugal,2024,Espagne
49,Pays-Bas,Match 50,Italie,2024,Italie


In [19]:
update_table(df_fixture_semi, df_fixture_final)

Unnamed: 0,home,score,away,year,winner
50,Espagne,Match 51,Italie,2024,?


In [20]:
get_winner(df_fixture_final)

Unnamed: 0,home,score,away,year,winner
50,Espagne,Match 51,Italie,2024,Italie
