In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn
from scipy.stats import poisson,skellam

In [6]:
epl_1920 = pd.read_csv('E0.csv')
epl_1920 = epl_1920[['HomeTeam','AwayTeam','FTHG','FTAG']]
epl_1920 = epl_1920.rename(columns={'FTHG':'HomeGoals','FTAG':'AwayGoals'})
epl_1920.head()

Unnamed: 0,HomeTeam,AwayTeam,HomeGoals,AwayGoals
0,Liverpool,Norwich,4,1
1,West Ham,Man City,0,5
2,Bournemouth,Sheffield United,1,1
3,Burnley,Southampton,3,0
4,Crystal Palace,Everton,0,0


In [7]:
epl_1920 = epl_1920[:-10]
epl_1920.mean()

HomeGoals    1.508108
AwayGoals    1.197297
dtype: float64

In [8]:
skellam.pmf(0.0, epl_1920.mean()[0],epl_1920.mean()[1])

0.25428088854445496

In [9]:
skellam.pmf(1, epl_1920.mean()[0],epl_1920.mean()[1])

0.22372371409045236

In [10]:
epl_1920['HomeTeam']

0           Liverpool
1            West Ham
2         Bournemouth
3             Burnley
4      Crystal Palace
            ...      
365            Wolves
366           Watford
367       Aston Villa
368        Man United
369         Liverpool
Name: HomeTeam, Length: 370, dtype: object

In [11]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
goal_model_data = pd.concat([epl_1920[['HomeTeam','AwayTeam','HomeGoals']].assign(home=1).rename(
            columns={'HomeTeam':'team', 'AwayTeam':'opponent','HomeGoals':'goals'}),
           epl_1920[['AwayTeam','HomeTeam','AwayGoals']].assign(home=0).rename(
            columns={'AwayTeam':'team', 'HomeTeam':'opponent','AwayGoals':'goals'})])

poisson_model = smf.glm(formula="goals ~ home + team + opponent", data=goal_model_data, 
                        family=sm.families.Poisson()).fit()
poisson_model.summary()

0,1,2,3
Dep. Variable:,goals,No. Observations:,740.0
Model:,GLM,Df Residuals:,700.0
Model Family:,Poisson,Df Model:,39.0
Link Function:,log,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-1028.6
Date:,"Tue, 08 Sep 2020",Deviance:,748.82
Time:,08:20:12,Pearson chi2:,642.0
No. Iterations:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.1498,0.207,0.722,0.470,-0.257,0.556
team[T.Aston Villa],-0.2673,0.210,-1.274,0.203,-0.679,0.144
team[T.Bournemouth],-0.3520,0.215,-1.640,0.101,-0.773,0.069
team[T.Brighton],-0.3663,0.215,-1.707,0.088,-0.787,0.054
team[T.Burnley],-0.2359,0.207,-1.140,0.254,-0.642,0.170
team[T.Chelsea],0.2312,0.184,1.255,0.210,-0.130,0.592
team[T.Crystal Palace],-0.5752,0.229,-2.514,0.012,-1.024,-0.127
team[T.Everton],-0.2003,0.206,-0.974,0.330,-0.603,0.203
team[T.Leicester],0.2141,0.184,1.162,0.245,-0.147,0.575


In [13]:
poisson_model.predict(pd.DataFrame(data={'team': 'Liverpool', 'opponent': 'Norwich',
                                       'home':1},index=[1]))

1    3.423612
dtype: float64

In [14]:
poisson_model.predict(pd.DataFrame(data={'team': 'Norwich', 'opponent': 'Liverpool',
                                       'home':0},index=[1]))

1    0.414053
dtype: float64

In [15]:
def simulate_match(foot_model, homeTeam, awayTeam, max_goals=10):
    home_goals_avg = foot_model.predict(pd.DataFrame(data={'team': homeTeam, 
                                                            'opponent': awayTeam,'home':1},
                                                      index=[1])).values[0]
    away_goals_avg = foot_model.predict(pd.DataFrame(data={'team': awayTeam, 
                                                            'opponent': homeTeam,'home':0},
                                                      index=[1])).values[0]
    team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in [home_goals_avg, away_goals_avg]]
    return(np.outer(np.array(team_pred[0]), np.array(team_pred[1])))
simulate_match(poisson_model, 'Liverpool', 'Norwich', max_goals=3)

array([[0.02154386, 0.00892029, 0.00184673, 0.00025488],
       [0.07375783, 0.03053961, 0.0063225 , 0.00087262],
       [0.12625908, 0.05227789, 0.0108229 , 0.00149375],
       [0.14408735, 0.05965973, 0.01235113, 0.00170467]])

In [16]:
liv_nor = simulate_match(poisson_model, "Liverpool", "Norwich", max_goals=10)
# Liverpool win
np.sum(np.tril(liv_nor, -1))

0.9141605417246046

In [17]:
# draw
np.sum(np.diag(liv_nor))

0.06477098410083065

In [18]:
# Norwich win
np.sum(np.triu(liv_nor, 1))

0.020212436081052984