In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn
from scipy.stats import poisson,skellam

In [2]:
df = pd.read_csv('final_game_results.csv', decimal=',', encoding='utf-8')
df.shape

(625, 6)

In [3]:
df

Unnamed: 0,Date,Score,Home_team,Away_team,Home_goals,Away_goals
0,08 Sep 2018,6-0,Switzerland,Iceland,6,0
1,11 Sep 2018,1-0,England,Switzerland,1,0
2,12 Oct 2018,2-1,Belgium,Switzerland,2,1
3,15 Oct 2018,1-2,Iceland,Switzerland,1,2
4,14 Nov 2018,0-1,Switzerland,Qatar,0,1
...,...,...,...,...,...,...
620,12 Nov 2020,2-1,Hungary,Iceland,2,1
621,15 Nov 2020,1-1,Hungary,Serbia,1,1
622,18 Nov 2020,2-0,Hungary,Turkey,2,0
623,28 Mar 2021,0-3,San Marino,Hungary,0,3


In [6]:
df_rank = pd.read_csv('fifa_rankings_all.csv', decimal=',', encoding='utf-8')
df_rank.shape

(5260, 4)

In [7]:
df_rank

Unnamed: 0,date,rank,team,points
0,01 July 2018,1,Germany,2172
1,01 July 2018,2,France,2164
2,01 July 2018,3,Portugal,2163
3,01 July 2018,4,Brazil,2160
4,01 July 2018,5,Belgium,2124
...,...,...,...,...
5255,27 May 2021,206,Turks and Caicos Islands,843.65
5256,27 May 2021,207,US Virgin Islands,829.13
5257,27 May 2021,208,British Virgin Islands,826.27
5258,27 May 2021,209,Anguilla,805.42


In [8]:
df_rank['date'] = pd.to_datetime(df_rank['date'])
df['Date'] = pd.to_datetime(df['Date'])

In [9]:
df_rank

Unnamed: 0,date,rank,team,points
0,2018-07-01,1,Germany,2172
1,2018-07-01,2,France,2164
2,2018-07-01,3,Portugal,2163
3,2018-07-01,4,Brazil,2160
4,2018-07-01,5,Belgium,2124
...,...,...,...,...
5255,2021-05-27,206,Turks and Caicos Islands,843.65
5256,2021-05-27,207,US Virgin Islands,829.13
5257,2021-05-27,208,British Virgin Islands,826.27
5258,2021-05-27,209,Anguilla,805.42


In [10]:
df

Unnamed: 0,Date,Score,Home_team,Away_team,Home_goals,Away_goals
0,2018-09-08,6-0,Switzerland,Iceland,6,0
1,2018-09-11,1-0,England,Switzerland,1,0
2,2018-10-12,2-1,Belgium,Switzerland,2,1
3,2018-10-15,1-2,Iceland,Switzerland,1,2
4,2018-11-14,0-1,Switzerland,Qatar,0,1
...,...,...,...,...,...,...
620,2020-11-12,2-1,Hungary,Iceland,2,1
621,2020-11-15,1-1,Hungary,Serbia,1,1
622,2020-11-18,2-0,Hungary,Turkey,2,0
623,2021-03-28,0-3,San Marino,Hungary,0,3


In [11]:
df['Home_rank'] = -1
df['Away_rank'] = -1

In [12]:
for index, row in df.iterrows():
    home_team_rank_rows = df_rank[df_rank['team'] == row['Home_team']]
    dates = home_team_rank_rows['date'].values
    home_team_rank_rows = home_team_rank_rows.sort_values(by='date',ascending=True)
    
    previous = False
    previous_rank_row = None
    for index_rank, row_rank in home_team_rank_rows.iterrows():
        date = row_rank['date']
        if date >= row['Date'] and previous:
            df.at[index, 'Home_rank'] = previous_rank_row['rank']
            break;
        elif date < row['Date']: 
            previous = True
            previous_rank_row = row_rank

In [13]:
for index, row in df.iterrows():
    away_team_rank_rows = df_rank[df_rank['team'] == row['Away_team']]
    dates = away_team_rank_rows['date'].values
    away_team_rank_rows = away_team_rank_rows.sort_values(by='date',ascending=True)
    
    previous = False
    previous_rank_row = None
    for index_rank, row_rank in away_team_rank_rows.iterrows():
        date = row_rank['date']
        if date >= row['Date'] and previous:
            df.at[index, 'Away_rank'] = previous_rank_row['rank']
            break;
        elif date < row['Date']: 
            previous = True
            previous_rank_row = row_rank

In [14]:
df

Unnamed: 0,Date,Score,Home_team,Away_team,Home_goals,Away_goals,Home_rank,Away_rank
0,2018-09-08,6-0,Switzerland,Iceland,6,0,8,32
1,2018-09-11,1-0,England,Switzerland,1,0,6,8
2,2018-10-12,2-1,Belgium,Switzerland,2,1,1,8
3,2018-10-15,1-2,Iceland,Switzerland,1,2,36,8
4,2018-11-14,0-1,Switzerland,Qatar,0,1,8,96
...,...,...,...,...,...,...,...,...
620,2020-11-12,2-1,Hungary,Iceland,2,1,47,39
621,2020-11-15,1-1,Hungary,Serbia,1,1,47,30
622,2020-11-18,2-0,Hungary,Turkey,2,0,47,33
623,2021-03-28,0-3,San Marino,Hungary,0,3,210,40


In [15]:
df.mean()

  df.mean()


Home_goals     1.8080
Away_goals     1.2032
Home_rank     36.9440
Away_rank     40.0928
dtype: float64

In [16]:
skellam.pmf(0.0,  df.mean()[0],  df.mean()[1])

  skellam.pmf(0.0,  df.mean()[0],  df.mean()[1])


0.23074803045586798

In [17]:
skellam.pmf(1,  df.mean()[0],  df.mean()[1])

  skellam.pmf(1,  df.mean()[0],  df.mean()[1])


0.22804210068609898

In [18]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [19]:
goal_model_data = pd.concat([df[['Home_team','Away_team','Home_goals']].assign(home=1).rename(
            columns={'Home_team':'team', 'Away_team':'opponent','Home_goals':'goals'}),
           df[['Away_team','Home_team','Away_goals']].assign(home=0).rename(
            columns={'Away_team':'team', 'Home_team':'opponent','Away_goals':'goals'})])

In [20]:
goal_model_data = pd.concat([df[['Home_team','Home_rank','Away_team','Away_rank','Home_goals']].assign(home=1).rename(
            columns={'Home_team':'team', 'Home_rank':'home_rank', 'Away_team':'opponent', 'Away_rank':'opponent_rank','Home_goals':'goals'}),
           df[['Away_team','Away_rank','Home_team','Home_rank','Away_goals']].assign(home=0).rename(
            columns={'Away_team':'team', 'Away_rank':'home_rank', 'Home_team':'opponent', 'Home_rank':'opponent_rank','Away_goals':'goals'})])

In [21]:
goal_model_data

Unnamed: 0,team,home_rank,opponent,opponent_rank,goals,home
0,Switzerland,8,Iceland,32,6,1
1,England,6,Switzerland,8,1,1
2,Belgium,1,Switzerland,8,2,1
3,Iceland,36,Switzerland,8,1,1
4,Switzerland,8,Qatar,96,0,1
...,...,...,...,...,...,...
620,Iceland,39,Hungary,47,1,0
621,Serbia,30,Hungary,47,1,0
622,Turkey,33,Hungary,47,0,0
623,Hungary,40,San Marino,210,3,0


In [22]:
poisson_model = smf.glm(formula="goals ~ home + team + opponent", data=goal_model_data, 
                        family=sm.families.Poisson()).fit()
poisson_model.summary()

0,1,2,3
Dep. Variable:,goals,No. Observations:,1250.0
Model:,GLM,Df Residuals:,1108.0
Model Family:,Poisson,Df Model:,141.0
Link Function:,log,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-1691.6
Date:,"Wed, 02 Jun 2021",Deviance:,1152.8
Time:,16:54:12,Pearson chi2:,1040.0
No. Iterations:,23,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.1982,0.754,-1.588,0.112,-2.677,0.280
team[T.Andorra],-0.3430,1.227,-0.280,0.780,-2.748,2.062
team[T.Argentina],2.2334,1.013,2.204,0.028,0.247,4.220
team[T.Armenia],1.4054,0.805,1.747,0.081,-0.172,2.983
team[T.Austria],1.6805,0.728,2.310,0.021,0.254,3.107
team[T.Azerbaijan],0.9265,0.821,1.128,0.259,-0.683,2.536
team[T.Bahrain],1.5526,1.235,1.258,0.209,-0.867,3.972
team[T.Belarus],-0.1452,1.230,-0.118,0.906,-2.555,2.265
team[T.Belgium],2.3706,0.717,3.308,0.001,0.966,3.775


In [23]:
poisson_model = smf.glm(formula="goals ~ home + home_rank + team + opponent + opponent_rank", data=goal_model_data, 
                        family=sm.families.Poisson()).fit()
poisson_model.summary()

0,1,2,3
Dep. Variable:,goals,No. Observations:,1250.0
Model:,GLM,Df Residuals:,1106.0
Model Family:,Poisson,Df Model:,143.0
Link Function:,log,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-1687.8
Date:,"Wed, 02 Jun 2021",Deviance:,1145.3
Time:,16:54:14,Pearson chi2:,1030.0
No. Iterations:,23,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.4802,0.792,-1.869,0.062,-3.032,0.072
team[T.Andorra],-1.0161,1.266,-0.803,0.422,-3.498,1.465
team[T.Argentina],2.6634,1.033,2.577,0.010,0.638,4.689
team[T.Armenia],1.0495,0.819,1.281,0.200,-0.556,2.655
team[T.Austria],1.9663,0.741,2.653,0.008,0.514,3.419
team[T.Azerbaijan],0.5353,0.841,0.636,0.525,-1.114,2.184
team[T.Bahrain],1.2326,1.243,0.992,0.321,-1.204,3.669
team[T.Belarus],-0.3255,1.232,-0.264,0.792,-2.741,2.090
team[T.Belgium],2.8873,0.754,3.828,0.000,1.409,4.366


In [24]:
#poisson_model = smf.glm(formula="goals ~ team + opponent", data=goal_model_data, family=sm.families.Poisson()).fit()
#poisson_model.summary()

In [25]:
#27 May 2021
fifa_ranking_dict = {
    'Turkey': 29, 
    'Italy': 7,
    'Wales': 17,
    'Switzerland': 13,
    'Denmark': 10,
    'Finland': 54,
    'Belgium': 1,
    'Russia': 38,
    'England': 4,
    'Croatia': 14,
    'Austria': 23,
    'North Macedonia': 62,
    'Netherlands': 16,
    'Ukraine': 24,
    'Scotland': 44,
    'Czech Republic': 40,
    'Poland': 21,
    'Slovakia': 36,
    'Spain': 6,
    'Sweden': 18,
    'Hungary': 37,
    'Portugal': 5,
    'France': 2,
    'Germany': 12
}

In [26]:
def simulate_match(foot_model, homeTeam, awayTeam, max_goals=10):
    home_goals_avg = foot_model.predict(pd.DataFrame(data={'team': homeTeam,
                                                           'home_rank': fifa_ranking_dict.get(homeTeam),
                                                           'opponent': awayTeam,
                                                           'opponent_rank': fifa_ranking_dict.get(awayTeam),
                                                           'home':1},
                                                      index=[1])).values[0]
    away_goals_avg = foot_model.predict(pd.DataFrame(data={'team': awayTeam, 
                                                           'home_rank': fifa_ranking_dict.get(awayTeam),
                                                           'opponent': homeTeam,
                                                           'opponent_rank': fifa_ranking_dict.get(homeTeam),
                                                           'home':0},
                                                      index=[1])).values[0]
    team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in [home_goals_avg, away_goals_avg]]
    return(np.outer(np.array(team_pred[0]), np.array(team_pred[1])))

In [27]:
simulate_match(poisson_model, 'Germany', 'Belgium', max_goals=4)

array([[0.02431612, 0.05728652, 0.06748088, 0.0529929 , 0.0312116 ],
       [0.03308714, 0.07795025, 0.09182179, 0.07210789, 0.04246988],
       [0.02251097, 0.05303378, 0.06247134, 0.0490589 , 0.02889456],
       [0.01021029, 0.0240545 , 0.02833509, 0.02225162, 0.01310569],
       [0.00347331, 0.00818279, 0.00963895, 0.00756949, 0.00445826]])

In [28]:
sun = simulate_match(poisson_model, 'Germany', 'Belgium', max_goals=10)

In [29]:
np.sum(np.tril(sun, -1))

0.21187331284596525

In [30]:
np.sum(np.diag(sun))

0.19207365969797288

In [31]:
np.sum(np.tril(sun, 1))

0.6175631519616525

In [32]:
def predict_game_result(home_team, away_team):
    home_goals = poisson_model.predict(pd.DataFrame(data={
        'team': home_team, 'home_rank': fifa_ranking_dict.get(home_team),'opponent': away_team, 'opponent_rank': fifa_ranking_dict.get(away_team), 'home': 1
    },index=[1]))
    away_goals = poisson_model.predict(pd.DataFrame(data={
        'team': away_team, 'home_rank': fifa_ranking_dict.get(away_team), 'opponent': home_team, 'opponent_rank': fifa_ranking_dict.get(home_team), 'home': 0
    },index=[1]))
    print(str(home_team) + " " + str(int(round(home_goals))) + " - " + str(int(round(away_goals))) + " " + str(away_team))

EM-KISAT 2021 PREDICTIONS

In [33]:
predict_game_result('Turkey', 'Italy')

Turkey 0 - 1 Italy


In [34]:
predict_game_result('Wales', 'Switzerland')

Wales 1 - 1 Switzerland


In [35]:
predict_game_result('Denmark', 'Finland')

Denmark 2 - 1 Finland


In [36]:
predict_game_result('Belgium', 'Russia')

Belgium 4 - 1 Russia


In [37]:
predict_game_result('England', 'Croatia')

England 3 - 1 Croatia


In [38]:
predict_game_result('Austria', 'North Macedonia')

Austria 2 - 1 North Macedonia


In [39]:
predict_game_result('Netherlands', 'Ukraine')

Netherlands 3 - 1 Ukraine


In [40]:
predict_game_result('Scotland', 'Czech Republic')

Scotland 1 - 1 Czech Republic


In [41]:
predict_game_result('Poland', 'Slovakia')

Poland 2 - 1 Slovakia


In [42]:
predict_game_result('Spain', 'Sweden')

Spain 2 - 1 Sweden


In [43]:
predict_game_result('Hungary', 'Portugal')

Hungary 1 - 2 Portugal


In [44]:
predict_game_result('France', 'Germany')

France 2 - 1 Germany


In [45]:
predict_game_result('Finland', 'Russia')

Finland 1 - 2 Russia


In [46]:
predict_game_result('Turkey', 'Wales')

Turkey 1 - 1 Wales


In [47]:
predict_game_result('Italy', 'Switzerland')

Italy 2 - 0 Switzerland


In [48]:
predict_game_result('Ukraine', 'North Macedonia')

Ukraine 2 - 1 North Macedonia


In [49]:
predict_game_result('Denmark', 'Belgium')

Denmark 1 - 1 Belgium


In [50]:
predict_game_result('Netherlands', 'Austria')

Netherlands 2 - 1 Austria


In [51]:
predict_game_result('Sweden', 'Slovakia')

Sweden 2 - 1 Slovakia


In [52]:
predict_game_result('Croatia', 'Czech Republic')

Croatia 2 - 1 Czech Republic


In [53]:
predict_game_result('England', 'Scotland')

England 4 - 1 Scotland


In [54]:
predict_game_result('Hungary', 'France')

Hungary 1 - 2 France


In [55]:
predict_game_result('Portugal', 'Germany')

Portugal 3 - 1 Germany


In [56]:
predict_game_result('Spain', 'Poland')

Spain 2 - 1 Poland


In [57]:
predict_game_result('Italy', 'Wales')

Italy 2 - 0 Wales


In [58]:
predict_game_result('Switzerland', 'Turkey')

Switzerland 2 - 1 Turkey


In [59]:
predict_game_result('Ukraine', 'Austria')

Ukraine 1 - 1 Austria


In [60]:
predict_game_result('North Macedonia', 'Netherlands')

North Macedonia 1 - 2 Netherlands


In [61]:
predict_game_result('Finland', 'Belgium')

Finland 1 - 2 Belgium


In [62]:
predict_game_result('Russia', 'Denmark')

Russia 1 - 2 Denmark


In [63]:
predict_game_result('Czech Republic', 'England')

Czech Republic 1 - 2 England


In [64]:
predict_game_result('Croatia', 'Scotland')

Croatia 3 - 1 Scotland


In [65]:
predict_game_result('Sweden', 'Poland')

Sweden 1 - 1 Poland


In [66]:
predict_game_result('Slovakia', 'Spain')

Slovakia 1 - 2 Spain


In [67]:
predict_game_result('Germany', 'Hungary')

Germany 2 - 1 Hungary


In [68]:
predict_game_result('Portugal', 'France')

Portugal 2 - 1 France
