In [1]:
import numpy as np
import torch
import pyro
from pyro import poutine
import pyro.distributions as dist
from pyro.infer import SVI, TraceEnum_ELBO
from pyro.optim import Adam
import pandas as pd
import tqdm

In [2]:
data = pd.read_csv("data/hulls_df_matchday1.csv")
data = data.dropna()
data.head()

Unnamed: 0,Period,Frame,Time [s],HomeHull,AwayHull
0,1.0,0.0,2.0,587.639501,1232.944696
1,1.0,1.0,2.04,585.816727,1230.338673
2,1.0,2.0,2.08,583.760092,1227.828173
3,1.0,3.0,2.12,581.673753,1225.107437
4,1.0,4.0,2.16,579.483073,1222.165286


In [3]:
sequence = torch.tensor(data["HomeHull"].values)
hidden_dim = 2
sequence.shape


torch.Size([95946])

In [4]:
def model(sequence, hidden_dim, include_prior=True):
    length = len(sequence)
    with poutine.mask(mask=include_prior):
        # Transition probabilities
        probs_x = pyro.sample(
            "probs_x",
            dist.Dirichlet(0.9 * torch.eye(hidden_dim) + 0.1).to_event(),
        )
        # Emission probabilities (1-dimensional for the area)
        probs_alpha = pyro.sample(
            "probs_alpha",
            dist.Gamma(1.0, 1.0).expand([hidden_dim]).to_event(1)
        )

        probs_beta = pyro.sample(
            "probs_beta",
            dist.Gamma(1.0, 1.0).expand([hidden_dim]).to_event(1)
        )
    
    x = 0  # Initial hidden state
    for t in pyro.markov(range(length)):
        x = pyro.sample(
            f"x_{t}",
            dist.Categorical(probs_x[x]),
            infer={"enumerate": "parallel"},
        )
        pyro.sample(
            f"y_{t}",
            dist.Gamma(probs_alpha[x], probs_beta[x]),
            obs=sequence[t],
        )

from pyro.infer.autoguide import AutoDelta

# Define the guide (variational distribution)
guide = AutoDelta(poutine.block(model, expose=["probs_x", "probs_alpha", "probs_beta"]))

In [None]:
pyro.clear_param_store()

# Optimizer
optimizer = Adam({"lr": 0.01})

# Inference algorithm
elbo = TraceEnum_ELBO(max_plate_nesting=1)
svi = SVI(model, guide, optimizer, loss=elbo)

# Training
num_steps = 1000
tqdm_bar = tqdm.tqdm(range(num_steps))
for step in tqdm_bar:
    loss = svi.step(sequence, hidden_dim)
    if step % 100 == 0:
         tqdm_bar.set_postfix({'LOSS': loss})

In [6]:
from pyro.infer.mcmc import NUTS,MCMC
nuts_kernel = NUTS(model)

mcmc= MCMC(nuts_kernel, num_samples=1000, warmup_steps=100)
mcmc.run(sequence[:3000], hidden_dim)


Warmup:   1%|          | 9/1100 [01:41, 10.58s/it, step size=1.19e-03, acc. prob=0.702]

KeyboardInterrupt: 