In [1]:
import pandas as pd
import numpy as np
import os
import bambi as bmb
from win_draw_loss import get_elo

In [2]:
leagues = []
for file in os.listdir("Data"):
    if file.endswith(".xlsx") or file.endswith(".csv"):
        leagues.append(file.split(".")[0])
print(f"Available data: {', '.join(leagues)}")

Available data: ARG, AUT, B1, BRA, CHN, D1, D2, DNK, E0, E1, E2, E3, EC, F1, F2, FIN, G1, I1, I2, IRL, JPN, MEX, N1, NOR, P1, POL, ROU, RUS, SC0, SC1, SC2, SC3, SP1, SP2, SWE, SWZ, T1, USA


In [3]:
league = "E0"
try:
    df = pd.read_excel(f"Data/{league}.xlsx")
except:
    df = pd.read_csv(f"Data/{league}.csv")

try:
    df = df[["HomeTeam", "AwayTeam", "FTHG", "FTAG"]]
except KeyError: # some of the leagues have mismatched names
    df.rename(columns={"HG": "FTHG", "AG": "FTAG"}, inplace=True)
    df.rename(columns={"Home": "HomeTeam", "Away": "AwayTeam"}, inplace=True)

    df = df[["HomeTeam", "AwayTeam", "FTHG", "FTAG"]]

elo_history, elo_ratings = get_elo(df)
elo_history = elo_history[10:].reset_index(drop=True)

elo_history.head()

Unnamed: 0,HomeTeam,AwayTeam,FTHG,FTAG,EloDiff
0,Brighton,Man United,2,1,0.0
1,Crystal Palace,West Ham,0,2,0.0
2,Fulham,Leicester,2,1,-10.0
3,Man City,Ipswich,4,1,20.0
4,Southampton,Nott'm Forest,0,1,-10.0


In [4]:
teams = pd.unique(df[["HomeTeam", "AwayTeam"]].values.ravel("K"))
teams

array(['Man United', 'Ipswich', 'Arsenal', 'Everton', 'Newcastle',
       "Nott'm Forest", 'West Ham', 'Brentford', 'Chelsea', 'Leicester',
       'Brighton', 'Crystal Palace', 'Fulham', 'Man City', 'Southampton',
       'Tottenham', 'Aston Villa', 'Bournemouth', 'Wolves', 'Liverpool'],
      dtype=object)

In [5]:
home_team = "Man City"
away_team = "Wolves"

home_elo = elo_ratings[home_team]
away_elo = elo_ratings[away_team]

print(f"{home_team} ELO: {home_elo:.2f}")
print(f"{away_team} ELO: {away_elo:.2f}")

Man City ELO: 1583.53
Wolves ELO: 1467.14


In [6]:
# targets: FTHG, FTAG
# fixed effects: EloDiff
# random effects: HomeTeam, AwayTeam

home_model = bmb.Model(
    "FTHG ~ 1 + EloDiff + (1 | HomeTeam) + (1 | AwayTeam)",
    data=elo_history,
    family="poisson",
)

away_model = bmb.Model(
    "FTAG ~ 1 + EloDiff + (1 | HomeTeam) + (1 | AwayTeam)",
    data=elo_history,
    family="poisson"
)

In [7]:
elo_history.columns

Index(['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'EloDiff'], dtype='object')

In [8]:
home_model.fit()

Initializing NUTS using jitter+adapt_diag...
This usually happens when PyTensor is installed via pip. We recommend it be installed via conda/mamba/pixi instead.
Alternatively, you can use an experimental backend such as Numba or JAX that perform their own BLAS optimizations, by setting `pytensor.config.mode == 'NUMBA'` or passing `mode='NUMBA'` when compiling a PyTensor function.
For more options and details see https://pytensor.readthedocs.io/en/latest/troubleshooting.html#how-do-i-configure-test-my-blas-library


KeyboardInterrupt: 