In [5]:
from scipy.stats import poisson 
import numpy as np
import matplotlib
import matplotlib.pyplot as plt 
import seaborn as sns
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [6]:
def get_premier_league_data(start_year):
    season = str(start_year)[-2:] + str(start_year + 1)[-2:]
    data = pd.read_csv("http://www.football-data.co.uk/mmz4281/" + season + "/E0.csv") 
    return data

In [7]:
data = getdata(2021)

In [5]:
columns = ["HomeTeam", "AwayTeam", "FTHG", "FTAG", "FTR"]
data = data[columns]

data = data.rename(
    columns={"FTHG": "HomeGoals", "FTAG": "AwayGoals", "FTR": "Result"}
)

In [8]:
home_goals = data[["HomeTeam", "AwayTeam", "HomeGoals"]]
home_goals = home_goals.assign(home=1)
home_goals = home_goals.rename(
    columns={"HomeTeam": "team",
             "AwayTeam": "opponent", 
             "HomeGoals": "goals"}
)

away_goals = data[["AwayTeam", "HomeTeam", "AwayGoals"]]
away_goals = away_goals.assign(home=0)
away_goals = away_goals.rename(
    columns={"AwayTeam": "team",
             "HomeTeam": "opponent", 
             "AwayGoals": "goals"}
)

In [9]:
training_data = pd.concat([home_goals, away_goals])
training_data.head()

Unnamed: 0,team,opponent,goals,home
0,Brentford,Arsenal,2,1
1,Man United,Leeds,5,1
2,Burnley,Brighton,1,1
3,Chelsea,Crystal Palace,3,1
4,Everton,Southampton,3,1


In [10]:

poisson_model = smf.glm(
    formula="goals ~ home + team + opponent",
    data=training_data,
    family=sm.families.Poisson() 
).fit()

In [11]:
poisson_model.summary()

0,1,2,3
Dep. Variable:,goals,No. Observations:,408.0
Model:,GLM,Df Residuals:,368.0
Model Family:,Poisson,Df Model:,39.0
Link Function:,log,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-562.64
Date:,"Fri, 21 Jan 2022",Deviance:,382.96
Time:,16:54:29,Pearson chi2:,330.0
No. Iterations:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.2030,0.274,0.742,0.458,-0.333,0.739
team[T.Aston Villa],-0.1341,0.261,-0.513,0.608,-0.646,0.378
team[T.Brentford],-0.3169,0.266,-1.191,0.234,-0.838,0.205
team[T.Brighton],-0.4563,0.277,-1.649,0.099,-0.999,0.086
team[T.Burnley],-0.5290,0.306,-1.727,0.084,-1.129,0.071
team[T.Chelsea],0.2479,0.229,1.082,0.279,-0.201,0.697
team[T.Crystal Palace],-0.0963,0.254,-0.380,0.704,-0.594,0.401
team[T.Everton],-0.1933,0.270,-0.717,0.473,-0.722,0.335
team[T.Leeds],-0.2790,0.270,-1.034,0.301,-0.808,0.250


In [12]:
def create_X(home_team, away_team):
    X_home = pd.DataFrame(data={"team": home_team,
                                "opponent": away_team,
                                "home": 1
                                }, index=[1])
    
    X_away = pd.DataFrame(data={"team": away_team,
                                "opponent": home_team,
                                "home": 0
                                }, index=[1])
    
    return X_home, X_away

In [13]:
def predict_avg_goals(X_home, X_away, model):
    home_goals_avg = model.predict(X_home) 
    
    away_goals_avg = model.predict(X_away) 
    
    return home_goals_avg, away_goals_avg

In [14]:
X_home, X_away =  create_X('Chelsea', 'Man City')
avg_goals = predict_avg_goals(X_home, X_away, poisson_model)
print('Average Home Goals: ', avg_goals[0].values[0])
print('Average Away Goals: ', avg_goals[1].values[0])

Average Home Goals:  0.9299023272771175
Average Away Goals:  1.1219573696488538


In [15]:

def predict_score_pmf(X_home, X_away, model, max_goals):
    avg_goals = predict_avg_goals(X_home, X_away, model)
    home_goals_avg = avg_goals[0]
    away_goals_avg = avg_goals[1]
    home_goals_pmf = [poisson.pmf(i, home_goals_avg)
                      for i in range(0, max_goals + 1)]
    away_goals_pmf = [poisson.pmf(i, away_goals_avg)
                      for i in range(0, max_goals + 1)]
    joint_pmf = np.outer(np.array(home_goals_pmf),
                         np.array(away_goals_pmf))
    
    return joint_pmf

In [17]:
def predict_score(X_home, X_away, model):
    score_pmf = predict_score_pmf(X_home, X_away, model,16)
    score_pmf = score_pmf.round(3)
    
    home_goals_mode = np.argmax(score_pmf) // (17)
    away_goals_mode = np.argmax(score_pmf) % (17)
    score_pred = (home_goals_mode, away_goals_mode)
    return score_pred

In [18]:
Watford, Norwich =  create_X('Watford', 'Norwich')
Match =  predict_score(Watford, Norwich, poisson_model)
#using results from the predict_score function
print('Watford v Norwich:', Match)


Watford v Norwich: (1, 0)
Everton v Aston Villa: (1, 1)
Brentford v Wolves: (0, 0)
Leeds v Newcastle: (1, 1)
Man United v Westham: (1, 1)
Southampton v Man City: (0, 2)
Arsenal v Burnley: (1, 0)
Crystal Palace v Liverpool: (0, 2)
Leicester v Brighton: (1, 1)
Chelsea v Spurs: (1, 0)
