In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn
from scipy.stats import poisson,skellam
df = pd.concat(
    map(pd.read_csv, ['https://www.football-data.co.uk/mmz4281/0506/E0.csv',
                      'https://www.football-data.co.uk/mmz4281/0607/E0.csv',
                      'https://www.football-data.co.uk/mmz4281/0708/E0.csv',
                      'https://www.football-data.co.uk/mmz4281/0809/E0.csv',
                      'https://www.football-data.co.uk/mmz4281/0910/E0.csv',
                      'https://www.football-data.co.uk/mmz4281/1011/E0.csv',
                      'https://www.football-data.co.uk/mmz4281/1112/E0.csv',
                      'https://www.football-data.co.uk/mmz4281/1213/E0.csv',
                      'https://www.football-data.co.uk/mmz4281/1314/E0.csv',
                      'https://www.football-data.co.uk/mmz4281/1415/E0.csv',
                      'https://www.football-data.co.uk/mmz4281/1516/E0.csv',
                      'https://www.football-data.co.uk/mmz4281/1617/E0.csv',
                      'https://www.football-data.co.uk/mmz4281/1718/E0.csv',
                      'https://www.football-data.co.uk/mmz4281/1920/E0.csv',
                      'https://www.football-data.co.uk/mmz4281/2021/E0.csv',
                      'https://www.football-data.co.uk/mmz4281/2122/E0.csv'
                     ]), ignore_index=True)

df = df[['HomeTeam','AwayTeam','FTHG','FTAG']]
df = df.rename(columns={'FTHG': 'HomeGoals', 'FTAG': 'AwayGoals'})
df.head()

Unnamed: 0,HomeTeam,AwayTeam,HomeGoals,AwayGoals
0,Aston Villa,Bolton,2.0,2.0
1,Everton,Man United,0.0,2.0
2,Fulham,Birmingham,0.0,0.0
3,Man City,West Brom,0.0,0.0
4,Middlesbrough,Liverpool,0.0,0.0


In [2]:
df = df[:-10]
df.mean()

  df.mean()


HomeGoals    1.523633
AwayGoals    1.158170
dtype: float64

In [3]:
# probability of draw between home and away team
skellam.pmf(0.0,  df.mean()[0],  df.mean()[1])

  skellam.pmf(0.0,  df.mean()[0],  df.mean()[1])


0.25417257953857963

In [4]:
# probability of home team winning by one goal
skellam.pmf(1,  df.mean()[0],  df.mean()[1])

  skellam.pmf(1,  df.mean()[0],  df.mean()[1])


0.22769249724974064

In [5]:
# importing the tools required for the Poisson regression model
import statsmodels.api as sm
import statsmodels.formula.api as smf

goal_model_data = pd.concat([df[['HomeTeam','AwayTeam','HomeGoals']].assign(home=1).rename(
            columns={'HomeTeam':'team', 'AwayTeam':'opponent','HomeGoals':'goals'}),
           df[['AwayTeam','HomeTeam','AwayGoals']].assign(home=0).rename(
            columns={'AwayTeam':'team', 'HomeTeam':'opponent','AwayGoals':'goals'})])

poisson_model = smf.glm(formula="goals ~ home + team + opponent", data=goal_model_data, 
                        family=sm.families.Poisson()).fit()
poisson_model.summary()

0,1,2,3
Dep. Variable:,goals,No. Observations:,11848.0
Model:,GLM,Df Residuals:,11766.0
Model Family:,Poisson,Df Model:,81.0
Link Function:,log,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-17047.0
Date:,"Fri, 18 Feb 2022",Deviance:,13486.0
Time:,22:01:42,Pearson chi2:,11800.0
No. Iterations:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.2179,0.052,4.204,0.000,0.116,0.320
team[T.Aston Villa],-0.4008,0.051,-7.926,0.000,-0.500,-0.302
team[T.Birmingham],-0.6101,0.088,-6.962,0.000,-0.782,-0.438
team[T.Blackburn],-0.3757,0.063,-5.935,0.000,-0.500,-0.252
team[T.Blackpool],-0.2028,0.138,-1.465,0.143,-0.474,0.069
team[T.Bolton],-0.4221,0.064,-6.548,0.000,-0.548,-0.296
team[T.Bournemouth],-0.3654,0.080,-4.577,0.000,-0.522,-0.209
team[T.Brentford],-0.4727,0.199,-2.379,0.017,-0.862,-0.083
team[T.Brighton],-0.5718,0.091,-6.260,0.000,-0.751,-0.393


In [35]:
ht = str(input('Enter Home Team Name:'))
at = str(input('Enter Away Team Name:'))

#check to see how many goals will be scored by team
htg = poisson_model.predict(pd.DataFrame(data={'team': ht, 'opponent': at,
                                       'home':1},index=[1]))
#check to see how many goals will be scored by opponent
atg = poisson_model.predict(pd.DataFrame(data={'team': ht, 'opponent': at,
                                       'home':0},index=[1]))

def simulate_match(foot_model, homeTeam, awayTeam, max_goals=10):
    home_goals_avg = foot_model.predict(pd.DataFrame(data={'team': homeTeam, 
                                                            'opponent': awayTeam,'home':1},
                                                      index=[1])).values[0]
    away_goals_avg = foot_model.predict(pd.DataFrame(data={'team': awayTeam, 
                                                            'opponent': homeTeam,'home':0},
                                                      index=[1])).values[0]
    team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in [home_goals_avg, away_goals_avg]]
    return(np.outer(np.array(team_pred[0]), np.array(team_pred[1])))
simulate_match(poisson_model, ht, at, max_goals=3)

chel_sun = simulate_match(poisson_model, ht, at, max_goals=10)
hw = np.sum(np.tril(chel_sun, -1))
dr = np.sum(np.diag(chel_sun))
aw = np.sum(np.triu(chel_sun, 1))
hw = "{:.2f}".format(hw*100)
aw = "{:.2f}".format(aw*100)
dr = "{:.2f}".format(dr*100)

print(ht, "has a {}% chance of winning.".format(hw))
print('')
print("Draw has a {}% chance of occuring.".format(dr))
print('')
print(at, "has a {}% chance of winning.".format(aw))
print('')
print(ht, "is predicted to score","{:.2f}".format(htg[1]), "goals.")
print('')
print(at, "is predicted to score", "{:.2f}".format(atg[1]), "goals.")

Enter Home Team Name:Man City
Enter Away Team Name:Tottenham
Man City has a 56.87% chance of winning.

Draw has a 22.02% chance of occuring.

Tottenham has a 21.11% chance of winning.

Man City is predicted to score 1.93 goals.

Tottenham is predicted to score 1.46 goals.
