# Variational Inference

- [Univariate (separate) models](#univariate-separate-models)
- [Bivariate Copula model](#bivariate-copula-model)

Libraries:

In [None]:
import numpy as np
import torch
import pyro
from pyro import poutine
import pyro.distributions as dist
from pyro.infer import SVI, TraceEnum_ELBO
from pyro.infer.autoguide import AutoDelta
from pyro.optim import Adam,ClippedAdam
import pandas as pd
import tqdm
# from models.UnivariateHMM import UnivariateHMM
# from models.CopulaHMM import CopulaHMM
from utils.CopulaHelpers import *

Global variables:

In [None]:
DATA_DIR="data/"
HIDDEN_STATES = 2
TRAINING_STEPS= 500

Import data:

In [None]:
data = pd.read_csv(f"{DATA_DIR}hulls_df_matchday2_reduced.csv")
data = data.dropna()
# Convert the areas from m^2 to dam^2 for computational reasons
data["HomeHull"]=data["HomeHull"]/100
data["AwayHull"]=data["AwayHull"]/100
data.head()

## Univariate (separate) models

**NB: DA CAMBIARE IN UN BIVARIATO CON EMISSIONI INDIPENDENTI, COSI DA AVERE UNO STESSO STATO PER LE 2 SERIE STORICHE**

In [None]:
def UnivariateHMM(sequence: torch.tensor, 
                  hidden_dim: int, 
                  include_prior: bool=True):
    '''
    Pyro Model for a Hidden Markov Model with a single univariate observation with Gamma emission distribution.
    Structure of the model taken from the Pyro documentation:
    https://pyro.ai/examples/hmm.html
    
    INPUTS:
    - sequence (torch.tensor): A 1-dimensional tensor of observations.
    - hidden_dim (int): The number of hidden states.
    - include_prior (bool): If True, include priors for the parameters of the model.
    '''
    length = len(sequence)
    with poutine.mask(mask=include_prior):
        # Transition probabilities
        probs_x = pyro.sample(
            "probs_x",
            dist.Dirichlet(0.9 * torch.eye(hidden_dim) + 0.1).to_event(),
        )
        # Emission probabilities (1-dimensional for the area)
        probs_alpha = pyro.sample(
            "probs_alpha",
            dist.Gamma(1.0, 1.0).expand([hidden_dim]).to_event(1)
        )

        probs_beta = pyro.sample(
            "probs_beta",
            dist.Gamma(1.0, 1.0).expand([hidden_dim]).to_event(1)
        )
    
    x = 0  # Initial hidden state
    for t in pyro.markov(range(length)):
        x = pyro.sample(
            f"x_{t}",
            dist.Categorical(probs_x[x]),
            infer={"enumerate": "parallel"},
        )
        pyro.sample(
            f"y_{t}",
            dist.Gamma(probs_alpha[x], probs_beta[x]),
            obs=sequence[t],
        )

In [None]:
sequence_X = torch.tensor(data["HomeHull"].values)
sequence_Y = torch.tensor(data["AwayHull"].values)

#### Home Team

In [None]:
pyro.clear_param_store()

In [None]:
# Guide
guide = AutoDelta(poutine.block(UnivariateHMM, expose=["probs_x", "probs_alpha", "probs_beta"]))
# Optimizer
optimizer = Adam({"lr": 0.01})
# Inference algorithm
elbo = TraceEnum_ELBO(max_plate_nesting=1)
svi = SVI(UnivariateHMM, guide, optimizer, loss=elbo)
# Training
tqdm_bar = tqdm.tqdm(range(TRAINING_STEPS))
for step in tqdm_bar:
    loss = svi.step(sequence_X, HIDDEN_STATES)
    if step % 100 == 0:
         tqdm_bar.set_postfix({'LOSS': loss})

In [None]:
posterior_HomeTeam = guide(sequence_X,HIDDEN_STATES)
posterior_HomeTeam

In [None]:
torch.save(posterior_HomeTeam,f"parameters/univariateHMM_matchday2_HomeTeam_{HIDDEN_STATES}states.pt")

#### Away Team

In [None]:
pyro.clear_param_store()

In [None]:
# Guide
guide = AutoDelta(poutine.block(UnivariateHMM, expose=["probs_x", "probs_alpha", "probs_beta"]))
# Optimizer
optimizer = Adam({"lr": 0.01})
# Inference algorithm
elbo = TraceEnum_ELBO(max_plate_nesting=1)
svi = SVI(UnivariateHMM, guide, optimizer, loss=elbo)
# Training
tqdm_bar = tqdm.tqdm(range(TRAINING_STEPS))
for step in tqdm_bar:
    loss = svi.step(sequence_Y, HIDDEN_STATES)
    if step % 100 == 0:
         tqdm_bar.set_postfix({'LOSS': loss})

In [None]:
posterior_AwayTeam = guide(sequence_Y,HIDDEN_STATES)
posterior_AwayTeam

In [None]:
#torch.save(posterior_AwayTeam,f"parameters/univariateHMM_matchday2_AwayTeam_{HIDDEN_STATES}states.pt")

## Bivariate copula model

In [None]:
def CopulaHMM(sequence: torch.tensor, 
              hidden_dim: int, 
              include_prior: bool=True):
    '''
    Pyro Model for a Hidden Markov Model with a bivariate observation with Copula emission distribution.
    Structure of the model taken from the Pyro documentation:
    https://pyro.ai/examples/hmm.html
    
    INPUTS:
    - sequence (torch.tensor): A 2-dimensional tensor of observations.
    - hidden_dim (int): The number of hidden states.
    - include_prior (bool): If True, include priors for the parameters of the model.
    '''
    n_obs = sequence.shape[0]
    with poutine.mask(mask=include_prior):
        #---------------------------------------------------------------------
        # Prior for the initial state probabilities
        probs_initial = pyro.sample(
            "probs_initial",
            dist.Dirichlet(torch.ones(hidden_dim))
        )
        #---------------------------------------------------------------------
        # Transition probabilities
        probs_x = pyro.sample(
            "probs_x",
            dist.Dirichlet(0.9 * torch.eye(hidden_dim) + 0.1).to_event(1),
        )
        #---------------------------------------------------------------------
        # Prior for the parameters of emission probabilities 
        probs_alpha1 = pyro.sample(
            "probs_alpha1",
            dist.Gamma(concentration=15.0, rate=0.8).expand([hidden_dim]).to_event(1)
        )

        probs_beta1 = pyro.sample(
            "probs_beta1",
            dist.Gamma(concentration=1.0, rate=1.0).expand([hidden_dim]).to_event(1)
        )
        probs_alpha2 = pyro.sample(
            "probs_alpha2",
            dist.Gamma(concentration=15.0, rate=0.8).expand([hidden_dim]).to_event(1)
        )

        probs_beta2 = pyro.sample(
            "probs_beta2",
            dist.Gamma(concentration=1.0, rate=1.0).expand([hidden_dim]).to_event(1)
        )
        #---------------------------------------------------------------------
        # Prior for theta
        theta = pyro.sample(
            "theta",
            dist.Gamma(5.0, 0.7).expand([hidden_dim]).to_event(1)
        )
        
    # Sample the initial hidden state
    x = pyro.sample(
        "x_0",
        dist.Categorical(probs_initial),
        infer={"enumerate": "parallel"},
    )

    for t in pyro.markov(range(n_obs)):
        if t > 0:
            x = pyro.sample(
                f"x_{t}",
                dist.Categorical(probs_x[x]),
                infer={"enumerate": "parallel"},
            )
        
        log_pdf = copulamodel_log_pdf(
            x=sequence[t,0],
            y=sequence[t,1],
            shape1=probs_alpha1[x],
            rate1=probs_beta1[x],
            shape2=probs_alpha2[x],
            rate2=probs_beta2[x],
            theta=theta[x]
        )
        pyro.factor(f"xy_{t}", log_pdf)

In [None]:
sequence_XY = torch.tensor(data[["HomeHull","AwayHull"]].values)
sequence_XY.shape

In [None]:
# Guide
guide = AutoDelta(poutine.block(CopulaHMM, expose=["probs_initial",
                                                  "probs_x",
                                                  "probs_alpha1",
                                                  "probs_beta1",
                                                  "probs_alpha2",
                                                  "probs_beta2",
                                                  "theta"
                                                  ]))
pyro.clear_param_store()

# Optimizer
optimizer = ClippedAdam({"lr": 0.01})

# Inference algorithm
elbo = TraceEnum_ELBO(max_plate_nesting=1)
svi = SVI(CopulaHMM, guide, optimizer, loss=elbo)

# Training
tqdm_bar = tqdm.tqdm(range(TRAINING_STEPS))
for step in tqdm_bar:
    loss = svi.step(sequence_XY, HIDDEN_STATES)
    #if step % 50 == 0:
    tqdm_bar.set_postfix({'LOSS': loss})

In [None]:
posterior = guide(sequence_XY,HIDDEN_STATES)
posterior

Here uncomment according to the number of states:

In [None]:
meanH_state0 = posterior["probs_alpha1"][0]/posterior["probs_beta1"][0]*100
meanA_state0 = posterior["probs_alpha2"][0]/posterior["probs_beta2"][0]*100
meanH_state1 = posterior["probs_alpha1"][1]/posterior["probs_beta1"][1]*100
meanA_state1 = posterior["probs_alpha2"][1]/posterior["probs_beta2"][1]*100
meanH_state2 = posterior["probs_alpha1"][2]/posterior["probs_beta1"][2]*100
meanA_state2 = posterior["probs_alpha2"][2]/posterior["probs_beta2"][2]*100
meanH_state3 = posterior["probs_alpha1"][3]/posterior["probs_beta1"][3]*100
meanA_state3 = posterior["probs_alpha2"][3]/posterior["probs_beta2"][3]*100
meanH_state4 = posterior["probs_alpha1"][4]/posterior["probs_beta1"][4]*100
meanA_state4 = posterior["probs_alpha2"][4]/posterior["probs_beta2"][4]*100

print(f">> Mean of the Convex Hull for home team (STATE 0): {meanH_state0:.2f} m^2")
print(f">> Mean of the Convex Hull for away team (STATE 0): {meanA_state0:.2f} m^2")
print(f">> Mean of the Convex Hull for home team (STATE 1): {meanH_state1:.2f} m^2")
print(f">> Mean of the Convex Hull for away team (STATE 1): {meanA_state1:.2f} m^2")
print(f">> Mean of the Convex Hull for home team (STATE 2): {meanH_state2:.2f} m^2")
print(f">> Mean of the Convex Hull for away team (STATE 2): {meanA_state2:.2f} m^2")
print(f">> Mean of the Convex Hull for home team (STATE 3): {meanH_state3:.2f} m^2")
print(f">> Mean of the Convex Hull for away team (STATE 3): {meanA_state3:.2f} m^2")
print(f">> Mean of the Convex Hull for home team (STATE 4): {meanH_state4:.2f} m^2")
print(f">> Mean of the Convex Hull for away team (STATE 4): {meanA_state4:.2f} m^2")

In [None]:
#torch.save(posterior,f"parameters/CopulaHMM_matchday2_{HIDDEN_STATES}states.pt")