In [542]:
#imports
import pandas as pd
import pickle
from scipy.stats import poisson

In [543]:
dict_table = pickle.load(open('dict_table', 'rb'))
df_historical_data = pd.read_csv('clean_fifa_euro_matches.csv')
df_fixture = pd.read_csv('clean_fifa_euro_fixtures.csv')

Calculate Team Strength

In [544]:
df_home = df_historical_data[['HomeTeam', 'HomeGoals', 'AwayGoals']]
df_away = df_historical_data[['AwayTeam', 'HomeGoals', 'AwayGoals']]

df_home = df_home.rename(columns={'HomeTeam':'Team', 'HomeGoals': 'GoalsScored', 'AwayGoals': 'GoalsConceded'})
df_away = df_away.rename(columns={'AwayTeam':'Team', 'HomeGoals': 'GoalsConceded', 'AwayGoals': 'GoalsScored'})

df_team_strength = pd.concat([df_home, df_away], ignore_index = True).groupby(['Team']).mean()
df_team_strength

Unnamed: 0_level_0,GoalsScored,GoalsConceded
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Albania,0.333333,1.0
Austria,0.7,1.2
Belgium,1.409091,1.272727
Bulgaria,0.666667,2.166667
CIS,0.333333,1.333333
Croatia,1.363636,1.272727
Czech Republic,1.241379,1.275862
Czechoslovakia,1.5,1.25
Denmark,1.272727,1.515152
England,1.342105,0.973684


Function PredictPoints

In [545]:
def PredictPoints(home, away):
    if home in df_team_strength.index and away in df_team_strength.index:
        #Goals scored * Goals conceded
        lamb_home = df_team_strength.at[home, 'GoalsScored'] * df_team_strength.at[away, 'GoalsConceded']
        lamb_away = df_team_strength.at[away, 'GoalsScored'] * df_team_strength.at[home, 'GoalsConceded']
        prob_home, prob_away, prob_draw = 0, 0, 0
        
        for x in range(0,11): #Number of goals for home team
            for y in range(0, 11): #Number of goals for away team
                p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
              
                if x == y:
                    prob_draw += p
                    
                elif x > y:
                    prob_home += p
                    
                else:
                    prob_away += p
        
        points_home = 3 * prob_home + prob_draw
        points_away = 3 * prob_away + prob_draw
        return (points_home, points_away)
    
    else:
        return (0, 0)

        

Predicting Euro 2024

    Group Stage

In [546]:
df_fixture_group_36 = df_fixture[:36].copy()
df_fixture_knockout = df_fixture[36:44].copy()
df_fixture_quarter = df_fixture[44:48].copy()
df_fixture_semi = df_fixture[48:50].copy()
df_fixture_final = df_fixture[50:].copy()

In [None]:
for group in dict_table:
    teams_in_group = dict_table[group]['Team'].values
    df_fixture_group_6 = df_fixture_group_36[df_fixture_group_36['home'].isin(teams_in_group)]
    for index, row in df_fixture_group_6.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = PredictPoints(home, away)
        dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
        dict_table[group].loc[dict_table[group]['Team'] == away, 'Pts'] += points_away

    dict_table[group] = dict_table[group].sort_values('Pts', ascending=False).reset_index()
    dict_table[group] = dict_table[group][['Team', 'Pts']]
    dict_table[group] = dict_table[group].round(0)

    Knockout Stage



In [549]:
df_fixture_knockout

Unnamed: 0,home,score,away,year
36,Runner-up Group A,Match 38,Runner-up Group B,2024
37,Winner Group A,Match 37,Runner-up Group C,2024
38,Winner Group C,Match 40,3rd Group D/E/F,2024
39,Winner Group B,Match 39,3rd Group A/D/E/F,2024
40,Runner-up Group D,Match 42,Runner-up Group E,2024
41,Winner Group F,Match 41,3rd Group A/B/C,2024
42,Winner Group E,Match 43,3rd Group A/B/C/D,2024
43,Winner Group D,Match 44,Runner-up Group F,2024


In [550]:
for group in dict_table:
    group_winner = dict_table[group].loc[0, 'Team']
    runners_up = dict_table[group].loc[1, 'Team']
    df_fixture_knockout.replace({f'Winner {group}': group_winner,
                                 f'Runner-up {group}': runners_up}, inplace = True)

df_fixture_knockout['winner'] = '?' #Create a new column to store the winner of each match

In [551]:
# Extract third-placed teams
third_placed_teams = []

for group in dict_table:
    third_placed_team = dict_table[group].iloc[2]  # Index 2 for third-placed team
    third_placed_teams.append(third_placed_team)

# Create a DataFrame for third-placed teams
df_third_placed = pd.DataFrame(third_placed_teams)

# Rank third-placed teams based on points
df_third_placed = df_third_placed.sort_values(by='Pts', ascending=False).reset_index(drop=True)

df_third_placed = df_third_placed.drop(index=6)


# Create separate DataFrames for each specified group
group_D_E_F_teams = df_third_placed[df_third_placed['Team'].isin(['Poland', 'Ukraine', 'Turkey'])].reset_index(inplace = False)
group_A_D_E_F_teams = df_third_placed[df_third_placed['Team'].isin(['Scotland', 'Poland', 'Ukraine', 'Turkey'])].reset_index(inplace = False)
group_A_B_C_teams = df_third_placed[df_third_placed['Team'].isin(['Scotland', 'Croatia', 'Slovenia'])].reset_index(inplace = False)
group_A_B_C_D_teams = df_third_placed[df_third_placed['Team'].isin(['Scotland', 'Croatia', 'Slovenia', 'Poland'])].reset_index(inplace = False)

used_teams = set()

#To replace 3rd Group D/E/F in df_fixture_knockout
match_40 = group_D_E_F_teams.loc[0, 'Team']
used_teams.add(match_40)

df_fixture_knockout.replace({'3rd Group D/E/F': match_40}, inplace=True)

#To replace 3rd Group A/D/E/F in df_fixture_knockout
match_39 = group_A_D_E_F_teams.loc[0,'Team']

for index in group_A_D_E_F_teams:
    i = 1
    if match_39 in used_teams:
        match_39 = group_A_D_E_F_teams.loc[i, 'Team']       
    else:
        i += 1
        
used_teams.add(match_39)      
df_fixture_knockout.replace({'3rd Group A/D/E/F': match_39}, inplace=True)   

#To replace 3rd Group A/B/C in df_fixture_knockout
match_41 = group_A_B_C_teams.loc[0,'Team']

for index in group_A_B_C_teams:
    i = 1
    if match_41 in used_teams:
        match_41 = group_A_B_C_teams.loc[i, 'Team']          
    else:
        i += 1
        
used_teams.add(match_41) 
df_fixture_knockout.replace({'3rd Group A/B/C': match_41}, inplace=True)

#To replace 3rd Group A/B/C/D in df_fixture_knockout
match_43 = group_A_B_C_D_teams.loc[2,'Team']

df_fixture_knockout.replace({'3rd Group A/B/C/D': match_43}, inplace=True)

print(df_fixture_knockout)




           home     score            away  year winner
36      Hungary  Match 38           Italy  2024      ?
37  Switzerland  Match 37         Denmark  2024      ?
38      England  Match 40          Poland  2024      ?
39        Spain  Match 39         Ukraine  2024      ?
40       France  Match 42         Romania  2024      ?
41     Portugal  Match 41         Croatia  2024      ?
42      Belgium  Match 43        Scotland  2024      ?
43  Netherlands  Match 44  Czech Republic  2024      ?


In [562]:
def get_winner(df_fixture_updated):
    for index, row in df_fixture_updated.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = PredictPoints(home, away)
        if points_home > points_away:
            winner = home
        else:
            winner = away
        df_fixture_updated.loc[index, 'winner'] = winner
    return df_fixture_updated

In [563]:
get_winner(df_fixture_knockout)

Unnamed: 0,home,score,away,year,winner
36,Hungary,Match 38,Italy,2024,Italy
37,Switzerland,Match 37,Denmark,2024,Denmark
38,England,Match 40,Poland,2024,England
39,Spain,Match 39,Ukraine,2024,Spain
40,France,Match 42,Romania,2024,France
41,Portugal,Match 41,Croatia,2024,Portugal
42,Belgium,Match 43,Scotland,2024,Belgium
43,Netherlands,Match 44,Czech Republic,2024,Netherlands


    Quarter Final Stage

In [564]:
def update_table(df_fixture_round_1, df_fixture_round_2):
    for index, row in df_fixture_round_1.iterrows():
        winner = df_fixture_round_1.loc[index, 'winner']
        match = df_fixture_round_1.loc[index, 'score']
        df_fixture_round_2.replace({f'Winner {match}':winner}, inplace=True)
    df_fixture_round_2['winner'] = '?'
    return df_fixture_round_2

In [565]:
update_table(df_fixture_knockout, df_fixture_quarter)

Unnamed: 0,home,score,away,year,winner
44,Spain,Match 45,Denmark,2024,?
45,Portugal,Match 46,France,2024,?
46,England,Match 48,Italy,2024,?
47,Belgium,Match 47,Netherlands,2024,?


In [566]:
get_winner(df_fixture_quarter)

Unnamed: 0,home,score,away,year,winner
44,Spain,Match 45,Denmark,2024,Spain
45,Portugal,Match 46,France,2024,Portugal
46,England,Match 48,Italy,2024,Italy
47,Belgium,Match 47,Netherlands,2024,Netherlands


    Semi Final Stage

In [567]:
update_table(df_fixture_quarter, df_fixture_semi)

Unnamed: 0,home,score,away,year,winner
48,Spain,Match 49,Portugal,2024,?
49,Netherlands,Match 50,Italy,2024,?


In [568]:
get_winner(df_fixture_semi)

Unnamed: 0,home,score,away,year,winner
48,Spain,Match 49,Portugal,2024,Spain
49,Netherlands,Match 50,Italy,2024,Italy


    Finals

In [569]:
update_table(df_fixture_semi, df_fixture_final)

Unnamed: 0,home,score,away,year,winner
50,Spain,Match 51,Italy,2024,?


In [570]:
get_winner(df_fixture_final)

Unnamed: 0,home,score,away,year,winner
50,Spain,Match 51,Italy,2024,Italy
