In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn
from scipy.stats import poisson,skellam

In [2]:
df = pd.read_csv('final_game_results.csv', decimal=',', encoding='utf-8')
df.shape

(625, 6)

In [3]:
df

Unnamed: 0,Date,Score,Home_team,Away_team,Home_goals,Away_goals
0,08 Sep 2018,6-0,Switzerland,Iceland,6,0
1,11 Sep 2018,1-0,England,Switzerland,1,0
2,12 Oct 2018,2-1,Belgium,Switzerland,2,1
3,15 Oct 2018,1-2,Iceland,Switzerland,1,2
4,14 Nov 2018,0-1,Switzerland,Qatar,0,1
...,...,...,...,...,...,...
620,12 Nov 2020,2-1,Hungary,Iceland,2,1
621,15 Nov 2020,1-1,Hungary,Serbia,1,1
622,18 Nov 2020,2-0,Hungary,Turkey,2,0
623,28 Mar 2021,0-3,San Marino,Hungary,0,3


In [4]:
df.mean()

Home_goals    1.8080
Away_goals    1.2032
dtype: float64

In [5]:
skellam.pmf(0.0,  df.mean()[0],  df.mean()[1])

0.23074803045586798

In [6]:
skellam.pmf(1,  df.mean()[0],  df.mean()[1])

0.22804210068609898

In [7]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [92]:
goal_model_data = pd.concat([df[['Home_team','Away_team','Home_goals']].assign(home=1).rename(
            columns={'Home_team':'team', 'Away_team':'opponent','Home_goals':'goals'}),
           df[['Away_team','Home_team','Away_goals']].assign(home=0).rename(
            columns={'Away_team':'team', 'Home_team':'opponent','Away_goals':'goals'})])

In [93]:
goal_model_data

Unnamed: 0,team,opponent,goals,home
0,Switzerland,Iceland,6,1
1,England,Switzerland,1,1
2,Belgium,Switzerland,2,1
3,Iceland,Switzerland,1,1
4,Switzerland,Qatar,0,1
...,...,...,...,...
620,Iceland,Hungary,1,0
621,Serbia,Hungary,1,0
622,Turkey,Hungary,0,0
623,Hungary,San Marino,3,0


In [94]:
#poisson_model = smf.glm(formula="goals ~ home + team + opponent", data=goal_model_data, family=sm.families.Poisson()).fit()
#poisson_model.summary()

In [95]:
poisson_model = smf.glm(formula="goals ~ team + opponent", data=goal_model_data, family=sm.families.Poisson()).fit()
#poisson_model.summary()

In [96]:
def simulate_match(foot_model, homeTeam, awayTeam, max_goals=10):
    home_goals_avg = foot_model.predict(pd.DataFrame(data={'team': homeTeam, 
                                                            'opponent': awayTeam},
                                                      index=[1])).values[0]
    away_goals_avg = foot_model.predict(pd.DataFrame(data={'team': awayTeam, 
                                                            'opponent': homeTeam},
                                                      index=[1])).values[0]
    team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in [home_goals_avg, away_goals_avg]]
    return(np.outer(np.array(team_pred[0]), np.array(team_pred[1])))

In [97]:
simulate_match(poisson_model, 'Germany', 'Belgium', max_goals=4)

array([[0.01938464, 0.05386133, 0.07482837, 0.06930496, 0.04814194],
       [0.02257763, 0.06273323, 0.08715392, 0.0807207 , 0.05607176],
       [0.01314828, 0.03653324, 0.05075484, 0.0470084 , 0.03265388],
       [0.00510468, 0.01418364, 0.01970502, 0.0182505 , 0.01267751],
       [0.00148638, 0.00412998, 0.00573769, 0.00531417, 0.00369143]])

In [98]:
sun = simulate_match(poisson_model, 'Germany', 'Belgium', max_goals=100)

In [99]:
np.sum(np.tril(sun, -1))

0.13388235429004552

In [100]:
np.sum(np.diag(sun))

0.15533844508936834

In [101]:
np.sum(np.tril(sun, 1))

0.4922127039078146

In [120]:
def predict_game_result(home_team, away_team, more=False):
    home_goals = poisson_model.predict(pd.DataFrame(data={'team': home_team, 'opponent': away_team,
                                       'home':1},index=[1]))
    away_goals = poisson_model.predict(pd.DataFrame(data={'team': away_team, 'opponent': home_team,
                                       'home':0},index=[1]))
    
    int_home_goals = int(round(home_goals))
    int_away_goals = int(round(away_goals))
    
    print(str(home_team) + " " + str(int_home_goals) + " - " + str(int_away_goals) + " " + str(away_team))
    
    if(more):
        print(str(home_team) + ": " + str(home_goals))
        print(str(away_team) + ": " + str(away_goals))
        sun = simulate_match(poisson_model, home_team, away_team, max_goals=10)
        print(home_team + " wins probability of:" + str(np.sum(np.tril(sun, -1))))
        print("Draw is probability of:" + str(np.sum(np.diag(sun))))
        print(away_team + " wins probability of:" + str(np.sum(np.tril(sun, 1))))

EM-KISAT 2021 PREDICTIONS

Group stage

In [19]:
predict_game_result('Turkey', 'Italy')

Turkey 0 - 2 Italy


In [20]:
predict_game_result('Wales', 'Switzerland')

Wales 1 - 1 Switzerland


In [21]:
predict_game_result('Denmark', 'Finland')

Denmark 2 - 1 Finland


In [22]:
predict_game_result('Belgium', 'Russia')

Belgium 4 - 1 Russia


In [23]:
predict_game_result('England', 'Croatia')

England 3 - 1 Croatia


In [24]:
predict_game_result('Austria', 'North Macedonia')

Austria 2 - 1 North Macedonia


In [25]:
predict_game_result('Netherlands', 'Ukraine')

Netherlands 3 - 1 Ukraine


In [26]:
predict_game_result('Scotland', 'Czech Republic')

Scotland 1 - 1 Czech Republic


In [27]:
predict_game_result('Poland', 'Slovakia')

Poland 2 - 1 Slovakia


In [28]:
predict_game_result('Spain', 'Sweden')

Spain 2 - 1 Sweden


In [29]:
predict_game_result('Hungary', 'Portugal')

Hungary 1 - 2 Portugal


In [30]:
predict_game_result('France', 'Germany')

France 2 - 1 Germany


In [31]:
predict_game_result('Finland', 'Russia')

Finland 1 - 2 Russia


In [32]:
predict_game_result('Turkey', 'Wales')

Turkey 1 - 1 Wales


In [33]:
predict_game_result('Italy', 'Switzerland')

Italy 2 - 0 Switzerland


In [34]:
predict_game_result('Ukraine', 'North Macedonia')

Ukraine 2 - 1 North Macedonia


In [35]:
predict_game_result('Denmark', 'Belgium')

Denmark 1 - 1 Belgium


In [36]:
predict_game_result('Netherlands', 'Austria')

Netherlands 2 - 1 Austria


In [37]:
predict_game_result('Sweden', 'Slovakia')

Sweden 2 - 1 Slovakia


In [38]:
predict_game_result('Croatia', 'Czech Republic')

Croatia 2 - 1 Czech Republic


In [39]:
predict_game_result('England', 'Scotland')

England 4 - 1 Scotland


In [40]:
predict_game_result('Hungary', 'France')

Hungary 1 - 2 France


In [41]:
predict_game_result('Portugal', 'Germany')

Portugal 3 - 1 Germany


In [42]:
predict_game_result('Spain', 'Poland')

Spain 2 - 1 Poland


In [43]:
predict_game_result('Italy', 'Wales')

Italy 2 - 0 Wales


In [44]:
predict_game_result('Switzerland', 'Turkey')

Switzerland 2 - 1 Turkey


In [45]:
predict_game_result('Ukraine', 'Austria')

Ukraine 1 - 1 Austria


In [46]:
predict_game_result('North Macedonia', 'Netherlands')

North Macedonia 1 - 2 Netherlands


In [47]:
predict_game_result('Finland', 'Belgium')

Finland 1 - 2 Belgium


In [48]:
predict_game_result('Russia', 'Denmark')

Russia 1 - 2 Denmark


In [49]:
predict_game_result('Czech Republic', 'England')

Czech Republic 1 - 2 England


In [50]:
predict_game_result('Croatia', 'Scotland')

Croatia 3 - 1 Scotland


In [51]:
predict_game_result('Sweden', 'Poland')

Sweden 1 - 1 Poland


In [52]:
predict_game_result('Slovakia', 'Spain')

Slovakia 1 - 2 Spain


In [53]:
predict_game_result('Germany', 'Hungary')

Germany 2 - 1 Hungary


In [54]:
predict_game_result('Portugal', 'France')

Portugal 2 - 1 France


Round of 16

In [110]:
predict_game_result('Portugal', 'Russia')

Portugal 2 - 1 Russia


In [109]:
predict_game_result('Croatia', 'Poland')

Croatia 1 - 2 Poland


In [108]:
predict_game_result('Italy', 'Austria')

Italy 2 - 0 Austria


In [107]:
predict_game_result('Belgium', 'Germany')

Belgium 3 - 1 Germany


In [106]:
predict_game_result('Netherlands', 'Sweden')

Netherlands 2 - 1 Sweden


In [105]:
predict_game_result('Switzerland', 'Denmark', more=True)

Switzerland 1 - 2 Denmark
Switzerland: 1
Denmark: 2


In [111]:
predict_game_result('Spain', 'Ukraine')

Spain 2 - 1 Ukraine


In [112]:
predict_game_result('England', 'France')

England 2 - 1 France


Quarterfinals

In [113]:
predict_game_result('Portugal', 'Croatia')

Portugal 2 - 1 Croatia


In [121]:
predict_game_result('Italy', 'Belgium', more=True)

Italy 1 - 1 Belgium
Italy: 1    1.117723
dtype: float64
Belgium: 1    0.787529
dtype: float64
Italy wins probability of:0.4319464644526446
Draw is probability of:0.31155161918878616
Belgium wins probability of:0.9203865510843819


In [123]:
predict_game_result('Netherlands', 'Denmark', more=True)

Netherlands 1 - 1 Denmark
Netherlands: 1    1.24171
dtype: float64
Denmark: 1    1.308281
dtype: float64
Netherlands wins probability of:0.3508990380898434
Draw is probability of:0.2667496334676036
Denmark wins probability of:0.8285446151373641


In [117]:
predict_game_result('Spain', 'England')

Spain 2 - 1 England
