# Budget model

An extension on the base model where the attack and defense power has a "team inherent" component like in the base model,
and a second directly related to the team budget

In [1]:
import numpy as np
import pandas as pd
import pymc as pm
import pytensor.tensor as pt
import xarray as xr

In [2]:
df = pd.read_csv("scores_22-23.csv")
home_team_idxs, team_names = pd.factorize(df.home_team, sort=True)
away_team_idxs, _ = pd.factorize(df.away_team, sort=True)
num_teams = len(team_names)
df

Unnamed: 0,home_team,away_team,home_goals,away_goals
0,Athletic Club,Almería,4,0
1,Atlético de Madrid,Almería,2,1
2,Osasuna,Almería,3,1
3,Cádiz,Almería,1,1
4,Elche,Almería,1,1
...,...,...,...,...
375,Mallorca,Villarreal,4,2
376,Real Sociedad,Villarreal,1,0
377,Sevilla,Villarreal,2,1
378,Valencia,Villarreal,1,1


In [3]:
budget_df = pd.read_csv("budgets_22-23.csv").sort_values("team")

In [4]:
coords = {"team": team_names, "match": np.arange(len(df))}
with pm.Model(coords=coords) as m_budget:
    # constant data
    home_team = pm.MutableData("home_team", home_team_idxs, dims="match")
    away_team = pm.MutableData("away_team", away_team_idxs, dims="match")
    budget = pm.MutableData("budget", budget_df["budget"], dims="team")
    
    # global model parameters
    home = pm.Normal('home', mu=0, sigma=5)
    sd_att = pm.HalfStudentT('sd_att', nu=3, sigma=2.5)
    sd_def = pm.HalfStudentT('sd_def', nu=3, sigma=2.5)
    intercept = pm.Normal('intercept', mu=0, sigma=5)
    budget_att = pm.Normal("budget_att", mu=0, sigma=5)
    budget_def = pm.Normal("budget_def", mu=0, sigma=5)

    # team-specific model parameters
    atts_star = pm.Normal("atts_star", mu=0, sigma=sd_att, dims="team")
    defs_star = pm.Normal("defs_star", mu=0, sigma=sd_def, dims="team")

    atts = atts_star - pt.mean(atts_star) + budget_att * pt.log(budget)
    defs = defs_star - pt.mean(defs_star) + budget_def * pt.log(budget)
    home_theta = pt.exp(intercept + home + atts[home_team] + defs[away_team])
    away_theta = pt.exp(intercept + atts[away_team] + defs[home_team])

    # likelihood of observed data
    home_goals = pm.Poisson('home_goals', mu=home_theta, observed=df.home_goals, dims="match")
    away_goals = pm.Poisson('away_goals', mu=away_theta, observed=df.away_goals, dims="match")



In [5]:
with m_budget:
    idata = pm.sample(draws=4000,
                      random_seed=1375,
                      target_accept=0.9,
                      idata_kwargs={"log_likelihood":True})

Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [home, sd_att, sd_def, intercept, budget_att, budget_def, atts_star, defs_star]


Output()

Sampling 4 chains for 1_000 tune and 4_000 draw iterations (4_000 + 16_000 draws total) took 50 seconds.
There were 1 divergences after tuning. Increase `target_accept` or reparameterize.


In [6]:
idata.to_netcdf("budget_model.nc")

'budget_model.nc'