# Base model


In [2]:
import numpy as np
import pandas as pd
import pymc as pm
import pytensor.tensor as pt
import xarray as xr

In [3]:
df = pd.read_csv("scores_22-23.csv")
home_team_idxs, team_names = pd.factorize(df.home_team, sort=True)
away_team_idxs, _ = pd.factorize(df.away_team, sort=True)
num_teams = len(team_names)
df

Unnamed: 0,home_team,away_team,home_goals,away_goals
0,Athletic Club,Almería,4,0
1,Atlético de Madrid,Almería,2,1
2,Osasuna,Almería,3,1
3,Cádiz,Almería,1,1
4,Elche,Almería,1,1
...,...,...,...,...
375,Mallorca,Villarreal,4,2
376,Real Sociedad,Villarreal,1,0
377,Sevilla,Villarreal,2,1
378,Valencia,Villarreal,1,1


In [4]:
coords = {"team": team_names, "match": np.arange(len(df))}
with pm.Model(coords=coords) as m_base:
    # constant data
    home_team = pm.MutableData("home_team", home_team_idxs, dims="match")
    away_team = pm.MutableData("away_team", away_team_idxs, dims="match")
    
    # global model parameters
    home = pm.Normal('home', mu=0, sigma=5)
    sd_att = pm.HalfStudentT('sd_att', nu=3, sigma=2.5)
    sd_def = pm.HalfStudentT('sd_def', nu=3, sigma=2.5)
    intercept = pm.Normal('intercept', mu=0, sigma=5)

    # team-specific model parameters
    atts_star = pm.Normal("atts_star", mu=0, sigma=sd_att, dims="team")
    defs_star = pm.Normal("defs_star", mu=0, sigma=sd_def, dims="team")

    atts = atts_star - pt.mean(atts_star)
    defs = defs_star - pt.mean(defs_star)
    home_theta = pt.exp(intercept + home + atts[home_team] + defs[away_team])
    away_theta = pt.exp(intercept + atts[away_team] + defs[home_team])

    # likelihood of observed data
    home_goals = pm.Poisson('home_goals', mu=home_theta, observed=df.home_goals, dims="match")
    away_goals = pm.Poisson('away_goals', mu=away_theta, observed=df.away_goals, dims="match")

In [5]:
with m_base:
    idata = pm.sample(draws=2000,
                      random_seed=1375,
                      idata_kwargs={"log_likelihood":True})

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [home, sd_att, sd_def, intercept, atts_star, defs_star]


Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 8 seconds.


In [8]:
idata.to_netcdf("base_model.nc")

'base_model.nc'