In [1]:
# Loading libraries
import arviz as az
import pymc as pm
import pytensor.tensor as pt
import pandas as pd
import numpy as np

In [2]:
# Reading the data
df = pd.read_csv('../rugby/rugby.csv')

In [3]:
observed_home_goals = df.home_score.values
observed_away_goals = df.away_score.values

home_team = df.i_home.values
away_team = df.i_away.values

num_teams = len(df.i_home.drop_duplicates())
num_games = len(home_team)

teams = np.array(['Wales', 'France', 'Ireland', 'Scotland', 'Italy', 'England'])
matches = [f"{home} {away}" for home, away in zip(df.home_team, df.away_team)]

In [4]:
# building the model
coords = {
    "team": teams,
    "match": matches,
    "field": ["home", "away"],
}
with pm.Model(coords=coords) as model:
    # global model parameters
    sd_att = pm.HalfNormal('sd_att', sigma=2)
    sd_def = pm.HalfNormal('sd_def', sigma=2)
    sd_att_field = pm.HalfNormal('sd_att_field', sigma=2)
    sd_def_field = pm.HalfNormal('sd_def_field', sigma=2)
    intercept = pm.Normal('intercept', mu=3, sigma=1, dims="field")
    
    # team-specific model parameters
    atts_team = pm.Normal("atts_team", mu=0, sigma=1, dims="team") * sd_att
    defs_team = pm.Normal("defs_team", mu=0, sigma=1, dims="team") * sd_def

    # team-field specific parameters
    atts = pm.Normal("atts", mu=0, sigma=1, dims=("field", "team")) * sd_att_field + atts_team
    defs = pm.Normal("defs", mu=0, sigma=1, dims=("field", "team")) * sd_def_field + defs_team
 
    #atts_star = pm.Deterministic('atts_star', atts - pt.mean(atts), dims=("field", "team"))
    #defs_star = pm.Deterministic('defs_star', defs - pt.mean(defs), dims=("field", "team"))
    atts_star = atts - pt.mean(atts, axis=0)
    defs_star = defs - pt.mean(defs, axis=0)
    home_theta = pt.exp(intercept[0] + atts_star[0, home_team] + defs_star[1, away_team])
    away_theta = pt.exp(intercept[1] + atts_star[1, away_team] + defs_star[0, home_team])
    
    # likelihood of observed data
    pm.Poisson('home_points', mu=home_theta, observed=observed_home_goals, dims="match")
    pm.Poisson('away_points', mu=away_theta, observed=observed_away_goals, dims="match")

In [5]:
with model:
    idata = pm.sample_prior_predictive()
    idata.extend(
        pm.sample(
            500,
            tune=1000,
            target_accept=0.9,
            random_seed=11,
            idata_kwargs={"log_likelihood": True, "log_prior": True}
        )
    )
    pm.sample_posterior_predictive(idata, extend_inferencedata=True)

Sampling: [atts, atts_team, away_points, defs, defs_team, home_points, intercept, sd_att, sd_att_field, sd_def, sd_def_field]
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sd_att, sd_def, sd_att_field, sd_def_field, intercept, atts_team, defs_team, atts, defs]


Sampling 4 chains for 1_000 tune and 500 draw iterations (4_000 + 2_000 draws total) took 21 seconds.
Sampling: [away_points, home_points]


In [7]:
# Storing the model to .nc format
idata.to_netcdf('rugby_field.nc')

'rugby_field.nc'