In [1]:
# Loading libraries
import arviz as az
import pymc as pm
import pytensor.tensor as pt
import pandas as pd
import numpy as np

In [2]:
# Reading the data
df = pd.read_csv('rugby.csv')

In [3]:
observed_home_goals = df.home_score.values
observed_away_goals = df.away_score.values

home_team = df.i_home.values
away_team = df.i_away.values

teams = np.array(['Wales', 'France', 'Ireland', 'Scotland', 'Italy', 'England'])
matches = [f"{home} {away}" for home, away in zip(df.home_team, df.away_team)]
coords = {"team": teams, "match": matches}

In [4]:
# building the model
with pm.Model(coords=coords) as model:
    # global model parameters
    home = pm.Normal('home', mu=0, sigma=1)
    sd_att = pm.HalfNormal('sd_att', sigma=2)
    sd_def = pm.HalfNormal('sd_def', sigma=2)
    intercept = pm.Normal('intercept', mu=3, sigma=1)
    
    # team-specific model parameters
    atts_star = pm.Normal("atts_star", mu=0, sigma=sd_att, dims="team")
    defs_star = pm.Normal("defs_star", mu=0, sigma=sd_def, dims="team")
 
    atts = pm.Deterministic('atts', atts_star - pt.mean(atts_star), dims="team")
    defs = pm.Deterministic('defs', defs_star - pt.mean(defs_star), dims="team")
    home_theta = pt.exp(intercept + home + atts[home_team] + defs[away_team])
    away_theta = pt.exp(intercept + atts[away_team] + defs[home_team])
    
    # likelihood of observed data
    pm.Poisson('home_points', mu=home_theta, observed=observed_home_goals, dims="match")
    pm.Poisson('away_points', mu=away_theta, observed=observed_away_goals, dims="match")

In [5]:
var_names = [v.name for v in model.free_RVs] 
with model:
    idata = pm.sample_prior_predictive()
    idata.extend(
        pm.sample(
            500,
            tune=1000,
            idata_kwargs={
                "log_likelihood": True,
                "log_prior": var_names,
                "include_transformed": True
            },
            random_seed=5
        )
    )
    pm.sample_posterior_predictive(idata, extend_inferencedata=True)

Sampling: [atts_star, away_points, defs_star, home, home_points, intercept, sd_att, sd_def]
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [home, sd_att, sd_def, intercept, atts_star, defs_star]


Sampling 4 chains for 1_000 tune and 500 draw iterations (4_000 + 2_000 draws total) took 17 seconds.
Sampling: [away_points, home_points]


In [6]:
transformed_vars = idata.posterior[["sd_att_log__", "sd_def_log__"]].rename(
    sd_att_log__="sd_att", sd_def_log__="sd_def"
)
transformed_vars.attrs = {
    k.name: f"{v.__module__}.{v.__class__.__name__}"
    for k, v in model.rvs_to_transforms.items()
    if v is not None
}
transformed_vars
idata.add_groups(unconstrained_posterior=transformed_vars)



In [7]:
idata.posterior = idata.posterior.drop_vars(["sd_att_log__", "sd_def_log__"])

In [8]:
idata

In [9]:
# Storing the model to .nc format
idata.to_netcdf('rugby.nc')

'rugby.nc'