In [338]:
import pandas as pd
import pickle
from scipy.stats import poisson


In [339]:
dict_table = pickle.load(open('dict_table','rb'))
df_historical_data = pd.read_csv('clean_uefa_euro_historical_data.csv')
df_fixture = pd.read_csv('clean_uefa_euro_fixture.csv')

In [340]:
df_fixture

Unnamed: 0,home,score,away,year
0,Germany,Match 1,Scotland,2024
1,Hungary,Match 2,Switzerland,2024
2,Germany,Match 14,Hungary,2024
3,Scotland,Match 13,Switzerland,2024
4,Switzerland,Match 25,Germany,2024
5,Scotland,Match 26,Hungary,2024
6,Spain,Match 3,Croatia,2024
7,Italy,Match 4,Albania,2024
8,Croatia,Match 15,Albania,2024
9,Spain,Match 16,Italy,2024


**1. Calculate Team Strength**

In [341]:
# split df into df_home and df_away
df_home = df_historical_data[['HomeTeam', 'HomeGoals', 'AwayGoals']]
df_away = df_historical_data[['AwayTeam', 'HomeGoals', 'AwayGoals']]

In [342]:
# rename columns
df_home = df_home.rename(columns={'HomeTeam': 'Team', 'HomeGoals': 'GoalsScored','AwayGoals':'GoalsConceded'})
df_away = df_away.rename(columns={'AwayTeam': 'Team', 'HomeGoals': 'GoalsConceded','AwayGoals':'GoalsScored'})

In [343]:
#concat df_home and df_away, group by team and clculate the mean
df_team_strength = pd.concat([df_home, df_away], ignore_index=True).groupby('Team').mean()
df_team_strength

Unnamed: 0_level_0,GoalsScored,GoalsConceded
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Albania,0.333333,1.0
Austria,0.7,1.2
Belgium,1.409091,1.272727
Bulgaria,0.666667,2.166667
CIS,0.333333,1.333333
Croatia,1.363636,1.272727
Czech Republic,1.241379,1.275862
Czechoslovakia,1.5,1.25
Denmark,1.272727,1.515152
England,1.342105,0.973684


**2 Function predict_points**

In [344]:
def predict_points(home, away):
        if home in df_team_strength.index and away in df_team_strength.index:
            # goals_scored * goals_conceded
            lamb_home = df_team_strength.at[home,'GoalsScored'] * df_team_strength.at[away,'GoalsConceded'] 
            lamb_away = df_team_strength.at[away,'GoalsScored'] * df_team_strength.at[home,'GoalsConceded'] 
            prob_home, prob_away, prob_draw = 0, 0, 0
            for x in range(0,11): #number of goals home team
                for y in range(0,11): #number of goals away team
                    p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
                    if x == y:
                        prob_draw += p
                    elif x > y:
                        prob_home += p
                    else:
                        prob_away += p
            points_home = 3 * prob_home + prob_draw
            points_away = 3 * prob_away + prob_draw
            return(points_home, points_away)
        else:
            return(0, 0)

**2.1 Testing Function**

In [345]:
# Test with matches: Slovenia - Denmark,  Scotland - Switzerland
predict_points('Slovenia', 'Denmark')
predict_points('Scotland','Switzerland')

(1.1283055958059556, 1.5384337764862321)

**3 Predicting the Uefa Euro Cup**

**3.1 Group Stage**

In [346]:
# splitting fixture into group, knockout, quarter, ...
df_fixture_group_36 = df_fixture[:36].copy()
df_fixture_knockout = df_fixture[36:44].copy()
df_fixture_quarter = df_fixture[44:48].copy()
df_fixture_semi = df_fixture[48:50].copy()
df_fixture_final = df_fixture[50:].copy()

In [347]:
# run all the matches in the group stage and update group tables
# Iterate through each group in the dictionary
for group in dict_table:
    # Get the teams in the current group
    teams_in_group = dict_table[group]['Team'].values
    
    # Filter the fixtures for the current group
    df_fixture_group_6 = df_fixture_group_36[df_fixture_group_36['home'].isin(teams_in_group) & df_fixture_group_36['away'].isin(teams_in_group)]
    
    # Iterate through each match in the filtered fixtures
    for index, row in df_fixture_group_6.iterrows():
        home, away = row['home'], row['away']
        
        # Predict points for the home and away teams
        points_home, points_away = predict_points(home, away)
        
        # Update the points for the home team
        dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
        
        # Update the points for the away team
        dict_table[group].loc[dict_table[group]['Team'] == away, 'Pts'] += points_away
    
    # Sort the group table by points in descending order
    dict_table[group] = dict_table[group].sort_values('Pts', ascending=False).reset_index(drop=True)
    
    # Select only the 'Team' and 'Pts' columns
    dict_table[group] = dict_table[group][['Team', 'Pts']]
    
    # Round the points to the nearest integer
    dict_table[group]['Pts'] = dict_table[group]['Pts'].round(0)


  dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
  dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
  dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
  dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
  dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
  dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home


In [362]:
# show updated table
dict_table['Group A']

Unnamed: 0,Team,Pts
0,Hungary,3.0
1,Switzerland,3.0
2,Scotland,2.0
3,Germany (H),0.0


**3.2 Knockout**

In [349]:
# df_fixture_knockout
df_fixture_knockout

Unnamed: 0,home,score,away,year
36,Runner-up Group A,Match 38,Runner-up Group B,2024
37,Winner Group A,Match 37,Runner-up Group C,2024
38,Winner Group C,Match 40,3rd Group D/E/F,2024
39,Winner Group B,Match 39,3rd Group A/D/E/F,2024
40,Runner-up Group D,Match 42,Runner-up Group E,2024
41,Winner Group F,Match 41,3rd Group A/B/C,2024
42,Winner Group E,Match 43,3rd Group A/B/C/D,2024
43,Winner Group D,Match 44,Runner-up Group F,2024


In [350]:
for group in dict_table:
    dict_table[group] = dict_table[group].sort_values('Pts', ascending=False).reset_index(drop=True)
    
    group_winner = dict_table[group].loc[0, 'Team']
    runners_up = dict_table[group].loc[1, 'Team']
    
    print(f'Updating group: {group}, Winner: {group_winner}, Runner-up: {runners_up}')
    
    df_fixture_knockout.replace({
        f'Winner {group}': group_winner,
        f'Runner-up {group}': runners_up
    }, inplace=True)

# Collect all 3rd place teams
third_place_teams = []
for group in dict_table:
    if len(dict_table[group]) > 2:
        third_place_team = dict_table[group].loc[2].copy()  # Make a copy
        third_place_team['Group'] = group  # Add group information for tie-breaking
        third_place_teams.append(third_place_team)

# Ensure all necessary columns are present in the third_place_teams DataFrame
if third_place_teams:
    df_third_place_teams = pd.DataFrame(third_place_teams)

    # Check for missing columns and fill them with default values
    for col in ['Pts', 'GD', 'GF']:
        if col not in df_third_place_teams.columns:
            df_third_place_teams[col] = 0  # Assuming default value as 0, adjust if needed

    # Create a DataFrame for 3rd place teams and sort by points, goal difference, and goals scored
    df_third_place_teams = df_third_place_teams.sort_values(by=['Pts', 'GD', 'GF'], ascending=False).reset_index(drop=True)

    # Debugging output for top four 3rd place teams
    print("Top four 3rd place teams:")
    print(df_third_place_teams.head(4))

    # Select the top 4 3rd place teams
    top_four_3rd_place_teams = df_third_place_teams.head(4)

    # Create a mapping for the 3rd place teams to the fixture placeholders
    third_place_mapping = {
        '3rd Group D/E/F': top_four_3rd_place_teams.iloc[0]['Team'],
        '3rd Group A/D/E/F': top_four_3rd_place_teams.iloc[1]['Team'],
        '3rd Group A/B/C': top_four_3rd_place_teams.iloc[2]['Team'],
        '3rd Group A/B/C/D': top_four_3rd_place_teams.iloc[3]['Team']
    }

    # Update the knockout fixture with the top 4 3rd place teams
    df_fixture_knockout.replace(third_place_mapping, inplace=True)

# Ensure 'winner' column is present and initialized
if 'winner' not in df_fixture_knockout.columns:
    df_fixture_knockout['winner'] = '?'

# Final debugging output to verify the updated knockout fixtures
print(df_fixture_knockout)

Updating group: Group A, Winner: Hungary, Runner-up: Switzerland
Updating group: Group B, Winner: Spain, Runner-up: Italy
Updating group: Group C, Winner: England, Runner-up: Denmark
Updating group: Group D, Winner: Netherlands, Runner-up: France
Updating group: Group E, Winner: Belgium, Runner-up: Romania
Updating group: Group F, Winner: Portugal, Runner-up: Czech Republic
Top four 3rd place teams:
       Team  Pts    Group  GD  GF
0   Croatia  4.0  Group B   0   0
1    Poland  3.0  Group D   0   0
2   Ukraine  3.0  Group E   0   0
3  Scotland  2.0  Group A   0   0
           home     score            away  year winner
36  Switzerland  Match 38           Italy  2024      ?
37      Hungary  Match 37         Denmark  2024      ?
38      England  Match 40         Croatia  2024      ?
39        Spain  Match 39          Poland  2024      ?
40       France  Match 42         Romania  2024      ?
41     Portugal  Match 41         Ukraine  2024      ?
42      Belgium  Match 43        Scotland 

In [351]:
#create get_winner function
def get_winner(df_fixture_updated):
    for index, row in df_fixture_updated.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predict_points(home, away)
        if points_home > points_away:
            winner = home
        else:
            winner = away
        df_fixture_updated.loc[index, 'winner'] = winner
    return df_fixture_updated

In [352]:
get_winner(df_fixture_knockout)

Unnamed: 0,home,score,away,year,winner
36,Switzerland,Match 38,Italy,2024,Italy
37,Hungary,Match 37,Denmark,2024,Denmark
38,England,Match 40,Croatia,2024,England
39,Spain,Match 39,Poland,2024,Spain
40,France,Match 42,Romania,2024,France
41,Portugal,Match 41,Ukraine,2024,Portugal
42,Belgium,Match 43,Scotland,2024,Belgium
43,Netherlands,Match 44,Czech Republic,2024,Netherlands


In [353]:
df_fixture_quarter

Unnamed: 0,home,score,away,year
44,Winner Match 39,Match 45,Winner Match 37,2024
45,Winner Match 41,Match 46,Winner Match 42,2024
46,Winner Match 40,Match 48,Winner Match 38,2024
47,Winner Match 43,Match 47,Winner Match 44,2024


**3.3 Quarter Final**

In [373]:
def update_table(df_fixture_current, df_fixture_next):
    # Get winners from the current round
    df_fixture_current = get_winner(df_fixture_current)
    
    # Create replacements dictionary
    replacements = {}
    for index, row in df_fixture_current.iterrows():
        winner = row['winner']
        match = row['score']
        replacements[f'Winner Match {match}'] = winner
    
    # Debug: Print replacements dictionary
    print("Replacements dictionary:", replacements)
    
    # Replace placeholders in the next round with actual winners
    df_fixture_next['home'] = df_fixture_next['home'].replace(replacements)
    df_fixture_next['away'] = df_fixture_next['away'].replace(replacements)
    
    return df_fixture_next

# Define the sample data for knockout round fixtures
data_knockout = {
    'home': ['Scotland', 'Germany (H)', 'Slovenia', 'Spain', 'Netherlands', 'Turkey', 'Belgium', 'Poland'],
    'score': [38, 37, 40, 39, 42, 41, 43, 44],
    'away': ['Croatia', 'Denmark', 'Hungary', 'Italy', 'Slovakia', 'Serbia', 'Austria', 'Georgia'],
    'year': [2024, 2024, 2024, 2024, 2024, 2024, 2024, 2024],
    'winner': ['Croatia', 'Denmark', 'Slovenia', 'Italy', 'Netherlands', 'Serbia', 'Belgium', 'Georgia']
}
df_fixture_knockout = pd.DataFrame(data_knockout)

# Define the sample data for quarter-final fixtures
data_quarter = {
    'home': ['Winner Match 39', 'Winner Match 41', 'Winner Match 40', 'Winner Match 43'],
    'score': [45, 46, 48, 47],  # Assuming these are placeholders for match identifiers
    'away': ['Winner Match 37', 'Winner Match 42', 'Winner Match 38', 'Winner Match 44'],
    'year': [2024, 2024, 2024, 2024],
    'winner': ['?', '?', '?', '?']
}
df_fixture_quarter = pd.DataFrame(data_quarter)

# Define the sample data for semi-final fixtures
data_semi = {
    'home': ['Winner Match 45', 'Winner Match 47'],
    'score': [49, 50],
    'away': ['Winner Match 46', 'Winner Match 48'],
    'year': [2024, 2024],
    'winner': ['?', '?']
}
df_fixture_semi = pd.DataFrame(data_semi)

# Define the sample data for final fixtures
data_final = {
    'home': ['Winner Match 49'],
    'score': [51],
    'away': ['Winner Match 50'],
    'year': [2024],
    'winner': ['?']
}
df_fixture_final = pd.DataFrame(data_final)

In [367]:
df_updated_fixture_quarter

Unnamed: 0,home,score,away,year,winner
0,Italy,45,Denmark,2024,Italy
1,Serbia,46,Netherlands,2024,Netherlands
2,Slovenia,48,Croatia,2024,Croatia
3,Belgium,47,Georgia,2024,Georgia


**3.4 Semifinal**

In [369]:
update_table(df_updated_fixture_quarter, df_fixture_semi)

Replacements dictionary: {'Winner Match 45': 'Italy', 'Winner Match 46': 'Netherlands', 'Winner Match 48': 'Croatia', 'Winner Match 47': 'Georgia'}


Unnamed: 0,home,score,away,year,winner
48,Italy,Match 49,Netherlands,2024,Italy
49,Georgia,Match 50,Croatia,2024,Croatia


In [365]:
get_winner(df_fixture_semi)

Unnamed: 0,home,score,away,year,winner
48,Italy,Match 49,Netherlands,2024,Italy
49,Georgia,Match 50,Croatia,2024,Croatia


**3.5 Final**

In [375]:
get_winner(df_updated_fixture_final)

Unnamed: 0,home,score,away,year,winner
0,Italy,51,Croatia,2024,Italy
