In [25]:
# Loading libraries
import arviz as az
import pymc3 as pm
import theano.tensor as tt
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [26]:
# Reading the data
df = pd.read_csv('rugby.csv')

In [27]:
observed_home_goals = df.home_score.values
observed_away_goals = df.away_score.values

home_team = df.i_home.values
away_team = df.i_away.values

num_teams = len(df.i_home.drop_duplicates())
num_games = len(home_team)

In [28]:
# building the model
teams = np.array(
    [
        "England",
        "Ireland",
        "France",
        "Wales",
        "Scotland",
        "Italy"
    ]
)

with pm.Model() as model:
    # global model parameters
    home = pm.Flat('home')
    sd_att = pm.HalfStudentT('sd_att', nu=3, sigma=2.5)
    sd_def = pm.HalfStudentT('sd_def', nu=3, sigma=2.5)
    intercept = pm.Flat('intercept')
    
    # team-specific model parameters
    atts_star = pm.Normal("atts_star", mu=0, sigma=sd_att, shape=num_teams)
    defs_star = pm.Normal("defs_star", mu=0, sigma=sd_def, shape=num_teams)
 
    atts = pm.Deterministic('atts', atts_star - tt.mean(atts_star))
    defs = pm.Deterministic('defs', defs_star - tt.mean(defs_star))
    home_theta = tt.exp(intercept + home + atts[home_team] + defs[away_team])
    away_theta = tt.exp(intercept + atts[away_team] + defs[home_team])
    
    # likelihood of observed data
    home_points = pm.Poisson('home_points', mu=home_theta, observed=observed_home_goals)
    away_points = pm.Poisson('away_points', mu=away_theta, observed=observed_away_goals)
    
    prior = pm.Normal("prior", mu = 0, sd = 5)
    trace = pm.sample(1000, tune=1000, cores=3)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (3 chains in 3 jobs)
NUTS: [prior, defs_star, atts_star, intercept, sd_def, sd_att, home]
Sampling 3 chains, 0 divergences: 100%|████████████████████████████████████████| 6000/6000 [00:25<00:00, 237.00draws/s]


In [29]:
with model:
    prior = {"prior": trace["prior"], 
             "home_points": home_points.tag.test_value,
             "away_points": away_points.tag.test_value}
    trace = pm.sample(1000, tune=1000, cores=3)
    posterior_predictive = pm.sample_posterior_predictive(trace)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (3 chains in 3 jobs)
NUTS: [prior, defs_star, atts_star, intercept, sd_def, sd_att, home]
Sampling 3 chains, 0 divergences: 100%|████████████████████████████████████████| 6000/6000 [00:21<00:00, 278.18draws/s]
100%|█████████████████████████████████████████████████████████████████████████████| 3000/3000 [00:05<00:00, 567.54it/s]


In [30]:
# Generating the arviz data object
data = az.from_pymc3(
    trace=trace,
    prior=prior,
    posterior_predictive=posterior_predictive,
    model=model,
    coords={"team": teams},
    dims={"theta": ["team"], "obs": ["team"]},    
)

data

In [31]:
# Storing the model to .nc format
data.to_netcdf('rugby.nc')

'rugby.nc'