In [1]:
#!conda install -c conda-forge rpy2
#!conda install -c ejolly -c defaults -c conda-forge pymer4

In [2]:
import pandas as pd
import numpy as np
import os
from win_draw_loss import get_elo
from pymer4.models import Lmer
from sklearn.preprocessing import StandardScaler
from statsmodels.formula.api import glm
import statsmodels.api as sm

In [3]:
leagues = []
for file in os.listdir("Data"):
    if file.endswith(".xlsx") or file.endswith(".csv"):
        leagues.append(file.split(".")[0])
print(f"Available data: {', '.join(leagues)}")

Available data: N1, F1, SC0, D1, EC, FIN, T1, SWE, ARG, MEX, POL, IRL, G1, SC3, SP1, E2, E3, USA, P1, E0, B1, JPN, SWZ, I1, CHN, AUT, SC2, SP2, RUS, DNK, ROU, I2, BRA, SC1, D2, NOR, E1, F2


In [4]:
league = "IRL"
try:
    df = pd.read_excel(f"Data/{league}.xlsx")
except:
    df = pd.read_csv(f"Data/{league}.csv")

try:
    df = df[["HomeTeam", "AwayTeam", "FTHG", "FTAG"]]
except KeyError: # some of the leagues have mismatched names
    df.rename(columns={"HG": "FTHG", "AG": "FTAG"}, inplace=True)
    df.rename(columns={"Home": "HomeTeam", "Away": "AwayTeam"}, inplace=True)

    df = df[["HomeTeam", "AwayTeam", "FTHG", "FTAG"]]

elo_history, elo_ratings = get_elo(df)
elo_history = elo_history[10:].reset_index(drop=True)

scaler = StandardScaler()
elo_history["EloDiff"] = scaler.fit_transform(elo_history["EloDiff"].values.reshape(-1, 1))

elo_history.head()

Unnamed: 0,HomeTeam,AwayTeam,FTHG,FTAG,EloDiff
0,Shamrock Rovers,Monaghan,3,1,0.082928
1,Sligo Rovers,UC Dublin,2,1,-0.064281
2,Cork City,Shamrock Rovers,1,1,-0.282976
3,Drogheda,Dundalk,0,0,0.08081
4,Sligo Rovers,Bohemians,1,0,0.230137


In [5]:
teams = pd.unique(df[["HomeTeam", "AwayTeam"]].values.ravel("K"))
teams

array(['Drogheda', 'Derry City', 'St. Patricks', 'UC Dublin', 'Monaghan',
       'Shelbourne', 'Bohemians', 'Bray', 'Cork City', 'Dundalk',
       'Shamrock Rovers', 'Sligo Rovers', 'Longford', 'Waterford',
       'Limerick', 'Mervue', 'Athlone', 'Galway', 'Finn Harps', 'Wexford',
       'Cobh Ramblers', 'Cabinteely', 'Treaty United'], dtype=object)

In [6]:
home_team = "Drogheda"
away_team = "Shamrock Rovers"

home_elo = elo_ratings[home_team]
away_elo = elo_ratings[away_team]

print(f"{home_team} ELO: {home_elo:.2f}")
print(f"{away_team} ELO: {away_elo:.2f}")

Drogheda ELO: 1576.80
Shamrock Rovers ELO: 1708.71


In [7]:
# targets: FTHG, FTAG
# fixed effects: EloDiff
# random effects: HomeTeam, AwayTeam

home_model = Lmer(
    "FTHG ~ 1 + EloDiff + (1 + EloDiff | HomeTeam) + (1 + EloDiff | AwayTeam)",
    data=elo_history,
    family="poisson",
)

away_model = Lmer(
    "FTAG ~ 1 + EloDiff + (1 + EloDiff | AwayTeam) + (1 + EloDiff | HomeTeam)",
    data=elo_history,
    family="poisson"
)

In [8]:
print(elo_history.describe())

              FTHG         FTAG       EloDiff
count  2465.000000  2465.000000  2.465000e+03
mean      1.431237     1.147262 -1.459279e-17
std       1.354795     1.113222  1.000203e+00
min       0.000000     0.000000 -3.361460e+00
25%       0.000000     0.000000 -6.770601e-01
50%       1.000000     1.000000  9.323737e-03
75%       2.000000     2.000000  6.725055e-01
max      10.000000     7.000000  3.360138e+00


In [9]:
elo_history.columns

Index(['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'EloDiff'], dtype='object')

In [10]:
home_fit = home_model.fit()
away_fit = away_model.fit()

boundary (singular) fit: see help('isSingular') 

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: FTHG~1+EloDiff+(1+EloDiff|HomeTeam)+(1+EloDiff|AwayTeam)

Family: poisson	 Inference: parametric

Number of observations: 2465	 Groups: {'HomeTeam': 23.0, 'AwayTeam': 23.0}

Log-likelihood: -3690.093 	 AIC: 7396.186

Random effects:

                 Name    Var    Std
HomeTeam  (Intercept)  0.000  0.000
HomeTeam      EloDiff  0.000  0.000
AwayTeam  (Intercept)  0.012  0.112
AwayTeam      EloDiff  0.000  0.017

                  IV1      IV2 Corr
HomeTeam  (Intercept)  EloDiff     
AwayTeam  (Intercept)  EloDiff  1.0

Fixed effects:



  ran_vars = ran_vars.applymap(
  ran_corrs = ran_corrs.applymap(


Model failed to converge with max|grad| = 0.00779359 (tol = 0.002, component 1) 

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: FTAG~1+EloDiff+(1+EloDiff|AwayTeam)+(1+EloDiff|HomeTeam)

Family: poisson	 Inference: parametric

Number of observations: 2465	 Groups: {'AwayTeam': 23.0, 'HomeTeam': 23.0}

Log-likelihood: -3348.480 	 AIC: 6712.960

Random effects:

                 Name    Var    Std
AwayTeam  (Intercept)  0.000  0.002
AwayTeam      EloDiff  0.003  0.055
HomeTeam  (Intercept)  0.015  0.123
HomeTeam      EloDiff  0.002  0.043

                  IV1      IV2  Corr
AwayTeam  (Intercept)  EloDiff  -1.0
HomeTeam  (Intercept)  EloDiff   1.0

Fixed effects:



  ran_vars = ran_vars.applymap(
  ran_corrs = ran_corrs.applymap(


In [11]:
# Many of the random effects are not significant
# Try something simpler

home_model_0 = glm(
    "FTHG ~ 1 + EloDiff",
    data=elo_history,
    family=sm.families.Poisson()
)
away_model_0 = glm(
    "FTAG ~ 1 + EloDiff",
    data=elo_history,
    family=sm.families.Poisson()
)
home_fit_0 = home_model_0.fit()
away_fit_0 = away_model_0.fit()

home_model_2 = Lmer(
    "FTHG ~ 1 + EloDiff + (1 | HomeTeam)",
    data=elo_history,
    family="poisson",
)

away_model_2 = Lmer(
    "FTAG ~ 1 + EloDiff + (1 | AwayTeam)",
    data=elo_history,
    family="poisson"
)

home_fit_2 = home_model_2.fit()
away_fit_2 = away_model_2.fit()

boundary (singular) fit: see help('isSingular') 

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: FTHG~1+EloDiff+(1|HomeTeam)

Family: poisson	 Inference: parametric

Number of observations: 2465	 Groups: {'HomeTeam': 23.0}

Log-likelihood: -3699.701 	 AIC: 7405.402

Random effects:

                 Name  Var  Std
HomeTeam  (Intercept)  0.0  0.0

No random effect correlations specified

Fixed effects:



  ran_vars = ran_vars.applymap(


boundary (singular) fit: see help('isSingular') 

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: FTAG~1+EloDiff+(1|AwayTeam)

Family: poisson	 Inference: parametric

Number of observations: 2465	 Groups: {'AwayTeam': 23.0}

Log-likelihood: -3355.059 	 AIC: 6716.118

Random effects:

                 Name  Var  Std
AwayTeam  (Intercept)  0.0  0.0

No random effect correlations specified

Fixed effects:



  ran_vars = ran_vars.applymap(


In [12]:
home_model_3 = Lmer(
    "FTHG ~ 1 + EloDiff + (0 + EloDiff | HomeTeam)",
    data=elo_history,
    family="poisson",
)

away_model_3 = Lmer(
    "FTAG ~ 1 + EloDiff + (0 + EloDiff | AwayTeam)",
    data=elo_history,
    family="poisson"
)

home_fit_3 = home_model_3.fit()
away_fit_3 = away_model_3.fit()

boundary (singular) fit: see help('isSingular') 

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: FTHG~1+EloDiff+(0+EloDiff|HomeTeam)

Family: poisson	 Inference: parametric

Number of observations: 2465	 Groups: {'HomeTeam': 23.0}

Log-likelihood: -3699.701 	 AIC: 7405.402

Random effects:

             Name  Var  Std
HomeTeam  EloDiff  0.0  0.0

No random effect correlations specified

Fixed effects:



  ran_vars = ran_vars.applymap(


Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: FTAG~1+EloDiff+(0+EloDiff|AwayTeam)

Family: poisson	 Inference: parametric

Number of observations: 2465	 Groups: {'AwayTeam': 23.0}

Log-likelihood: -3353.771 	 AIC: 6713.543

Random effects:

             Name    Var   Std
AwayTeam  EloDiff  0.003  0.05

No random effect correlations specified

Fixed effects:



  ran_vars = ran_vars.applymap(


In [13]:
home_model_4 = Lmer(
    "FTHG ~ 1 + EloDiff + (1 | HomeTeam:AwayTeam) + (1 + EloDiff | HomeTeam) + (1 + EloDiff | AwayTeam)",
    data=elo_history,
    family="poisson",
)

away_model_4 = Lmer(
    "FTAG ~ 1 + EloDiff + (1 | HomeTeam:AwayTeam) + (1 + EloDiff | AwayTeam) + (1 + EloDiff | HomeTeam)",
    data=elo_history,
    family="poisson"
)

home_fit_4 = home_model_4.fit()
away_fit_4 = away_model_4.fit()

boundary (singular) fit: see help('isSingular') 

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: FTHG~1+EloDiff+(1|HomeTeam:AwayTeam)+(1+EloDiff|HomeTeam)+(1+EloDiff|AwayTeam)

Family: poisson	 Inference: parametric

Number of observations: 2465	 Groups: {'HomeTeam:AwayTeam': 307.0, 'HomeTeam': 23.0, 'AwayTeam': 23.0}

Log-likelihood: -3690.093 	 AIC: 7398.186

Random effects:

                          Name    Var    Std
HomeTeam:AwayTeam  (Intercept)  0.000  0.000
HomeTeam           (Intercept)  0.000  0.000
HomeTeam               EloDiff  0.000  0.000
AwayTeam           (Intercept)  0.012  0.112
AwayTeam               EloDiff  0.000  0.017

                  IV1      IV2 Corr
HomeTeam  (Intercept)  EloDiff     
AwayTeam  (Intercept)  EloDiff  1.0

Fixed effects:



  ran_vars = ran_vars.applymap(
  ran_corrs = ran_corrs.applymap(


boundary (singular) fit: see help('isSingular') 

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: FTAG~1+EloDiff+(1|HomeTeam:AwayTeam)+(1+EloDiff|AwayTeam)+(1+EloDiff|HomeTeam)

Family: poisson	 Inference: parametric

Number of observations: 2465	 Groups: {'HomeTeam:AwayTeam': 307.0, 'AwayTeam': 23.0, 'HomeTeam': 23.0}

Log-likelihood: -3348.445 	 AIC: 6714.890

Random effects:

                          Name    Var    Std
HomeTeam:AwayTeam  (Intercept)  0.002  0.045
AwayTeam           (Intercept)  0.000  0.000
AwayTeam               EloDiff  0.003  0.056
HomeTeam           (Intercept)  0.015  0.122
HomeTeam               EloDiff  0.002  0.043

                  IV1      IV2 Corr
AwayTeam  (Intercept)  EloDiff     
HomeTeam  (Intercept)  EloDiff  1.0

Fixed effects:



  ran_vars = ran_vars.applymap(
  ran_corrs = ran_corrs.applymap(


In [20]:
home_fit_0.aic

np.float64(7403.401716008719)

In [22]:
# compare AIC values
print(f"Home Model 0 AIC: {home_fit_0.aic:.2f}")
print(f"Away Model 0 AIC: {away_fit_0.aic:.2f}")

print(f"\nHome Model 1 AIC: {home_model.AIC:.2f}")
print(f"Away Model 1 AIC: {away_model.AIC:.2f}")

print(f"\nHome Model 2 AIC: {home_model_2.AIC:.2f}")
print(f"Away Model 2 AIC: {away_model_2.AIC:.2f}")

print(f"\nHome Model 3 AIC: {home_model_3.AIC:.2f}")
print(f"Away Model 3 AIC: {away_model_3.AIC:.2f}")

print(f"\nHome Model 4 AIC: {home_model_4.AIC:.2f}")
print(f"Away Model 4 AIC: {away_model_4.AIC:.2f}")


Home Model 0 AIC: 7403.40
Away Model 0 AIC: 6714.12

Home Model 1 AIC: 7396.19
Away Model 1 AIC: 6712.96

Home Model 2 AIC: 7405.40
Away Model 2 AIC: 6716.12

Home Model 3 AIC: 7405.40
Away Model 3 AIC: 6713.54

Home Model 4 AIC: 7398.19
Away Model 4 AIC: 6714.89


In [23]:
# We actually prefer the more complex model with random intercept and slope
home_model.summary()

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: FTHG~1+EloDiff+(1+EloDiff|HomeTeam)+(1+EloDiff|AwayTeam)

Family: poisson	 Inference: parametric

Number of observations: 2465	 Groups: {'HomeTeam': 23.0, 'AwayTeam': 23.0}

Log-likelihood: -3690.093 	 AIC: 7396.186

Random effects:

                 Name    Var    Std
HomeTeam  (Intercept)  0.000  0.000
HomeTeam      EloDiff  0.000  0.000
AwayTeam  (Intercept)  0.012  0.112
AwayTeam      EloDiff  0.000  0.017

                  IV1      IV2 Corr
HomeTeam  (Intercept)  EloDiff     
AwayTeam  (Intercept)  EloDiff  1.0

Fixed effects:



Unnamed: 0,Estimate,2.5_ci,97.5_ci,SE,Z-stat,P-val,Sig
(Intercept),0.315,0.247,0.382,0.034,9.156,0.0,***
EloDiff,0.333,0.294,0.373,0.02,16.553,0.0,***


In [24]:
away_model.summary()

Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: FTAG~1+EloDiff+(1+EloDiff|AwayTeam)+(1+EloDiff|HomeTeam)

Family: poisson	 Inference: parametric

Number of observations: 2465	 Groups: {'AwayTeam': 23.0, 'HomeTeam': 23.0}

Log-likelihood: -3348.480 	 AIC: 6712.960

Random effects:

                 Name    Var    Std
AwayTeam  (Intercept)  0.000  0.002
AwayTeam      EloDiff  0.003  0.055
HomeTeam  (Intercept)  0.015  0.123
HomeTeam      EloDiff  0.002  0.043

                  IV1      IV2  Corr
AwayTeam  (Intercept)  EloDiff  -1.0
HomeTeam  (Intercept)  EloDiff   1.0

Fixed effects:



Unnamed: 0,Estimate,2.5_ci,97.5_ci,SE,Z-stat,P-val,Sig
(Intercept),0.141,0.065,0.217,0.039,3.636,0.0,***
EloDiff,-0.225,-0.287,-0.162,0.032,-6.996,0.0,***
