In [None]:
import torch

# probabilistic programming
import pyro 

# MCMC plotting
import arviz as az
import matplotlib.pyplot as plt
from getdist.arviz_wrapper import arviz_to_mcsamples
from getdist import plots

# autoemulate imports
from autoemulate.simulations.epidemic import Epidemic
from autoemulate.core.compare import AutoEmulate
from autoemulate.calibration.bayes import BayesianCalibration
from autoemulate.emulators import GaussianProcessRBF

# suppress warnings in notebook for readability
import os
import warnings

# ignore warnings
warnings.filterwarnings("ignore")
os.environ["PYTHONWARNINGS"] = "ignore"

# random seed for reproducibility
random_seed = 42

In [None]:
from autoemulate.data.utils import set_random_seed
set_random_seed(random_seed)
pyro.set_rng_seed(random_seed)

# Evidence computation

## 1. Simulate data

In this example, we'll use the `Epidemic` simulator, which returns the peak infection rate given two input parameters, `beta`(the transimission rate per day) and `gamma` (the recovery rate per day).

In [None]:
simulator = Epidemic(log_level="error")
x = simulator.sample_inputs(1000)
y, _ = simulator.forward_batch(x)

Below we plot the simulated data. The peak infection rate is higher when the transmission rate increases and the recovery rate decreases and the two parameters are correlated with each other.

In [None]:
transmission_rate = x[:, 0]
recovery_rate = x[:, 1]

plt.scatter(transmission_rate, recovery_rate, c=y, cmap='viridis')
plt.xlabel('Transmission rate (beta)')
plt.ylabel('Recovery rate (gamma)')
plt.colorbar(label="Peak infection rate")
plt.show

Below we pick the initial parameter values and simulate the output. We then add noise to generate 100 "observations". 

In [None]:
true_beta = 0.3
true_gamma = 0.15 

# simulator expects inputs of shape [1, number of inputs]
params = torch.tensor([true_beta, true_gamma]).view(1, -1)
true_infection_rate = simulator.forward(params)

n_obs = 100
stdev = 0.05
noise = torch.normal(mean=0, std=stdev, size=(n_obs,))
observed_infection_rates = true_infection_rate[0] + noise

observations = {"infection_rate": observed_infection_rates}

We can now use these observations to infer which input parameters were most likely to have produced them.

## 2. Calibrate with simulator

In this example, we have a fast simulator with only two input parameters, so we can use the simulator. The below code shows how to do this directly with Pyro. 

In [None]:
import pyro.distributions as dist
from pyro.infer import MCMC
from pyro.infer.mcmc import RandomWalkKernel

import sys
from pathlib import Path

sys.path.insert(0, str((Path.cwd() / "docs" / "tutorials" / "tasks").resolve()))

from model import model

# run Bayesian inference with MCMC


kernel = RandomWalkKernel(model, init_step_size=2.5)
mcmc_sim = MCMC(
    kernel,
    warmup_steps=500,
    num_samples=5000,
    num_chains=10
)
mcmc_sim.run()


Below we plot the posterior samples of the input parameters.

In [None]:
sim_samples = mcmc_sim.get_samples()
    
plt.scatter(sim_samples['beta'], sim_samples['gamma'], alpha=0.5)
plt.xlabel('Transmission rate (beta)')
plt.ylabel('Recovery rate (gamma)')
plt.show()

## 3. Compute the evidence based on the harmonics

Compute the log probability for each posterior sample produced by MCMC.

In [None]:
import pyro.poutine as poutine


samples = mcmc_sim.get_samples(group_by_chain=True)
num_chains = samples["beta"].shape[0]
num_samples_per_chain = samples["beta"].shape[1]


log_probs_list = []

for chain_idx in range(num_chains):
    chain_log_probs = []
    for sample_idx in range(num_samples_per_chain):
        sample_params = {k: v[chain_idx, sample_idx] for k, v in samples.items()}
        
        conditioned_model = pyro.condition(model, data=sample_params)
        
        trace = poutine.trace(conditioned_model).get_trace()
        log_prob = trace.log_prob_sum()
        
        chain_log_probs.append(log_prob.item())
    log_probs_list.append(chain_log_probs)

# sim_samples: (num_chains, num_samples_per_chain, ndim)
sim_samples = torch.stack([samples[k] for k in samples.keys()], dim=-1)

# log_probs_tensor: (num_chains, num_samples_per_chain)
log_probs_tensor = torch.tensor(log_probs_list)

print(f"sim_samples shape: {sim_samples.shape}")
print(f"log_probs_tensor shape: {log_probs_tensor.shape}")
print(f"num_chains: {num_chains}, num_samples_per_chain: {num_samples_per_chain}")

samples_flat = mcmc_sim.get_samples()
plt.scatter(samples_flat['beta'], samples_flat['gamma'], alpha=0.5)
plt.xlabel('Transmission rate (beta)')
plt.ylabel('Recovery rate (gamma)')
plt.show()



In [None]:
%pip install harmonic

Create Harmonic Chains, split train/infer sets, fit a flow model (e.g. RQSpline), and compute the (log) inverse evidence and its error.

In [None]:
# Importance-sampling from the prior, then fit harmonic flow to posterior samples
import harmonic as hm

chains = hm.Chains(sim_samples.shape[2])
chains.add_chains_3d(sim_samples, log_probs_tensor)


# Split into train / inference for flow training
chains_train, chains_infer = hm.utils.split_data(chains, training_proportion=0.5)

# Train a flow model (RQSpline) on training posterior samples
temperature = 0.8
model = hm.model.RQSplineModel(sim_samples.shape[2], standardize=True, temperature=temperature)
model.fit(chains_train.samples, epochs=30, verbose=True)

# Instantiate harmonic's evidence class
ev = hm.Evidence(chains_infer.nchains, model)

# Pass the evidence class the inference chains and compute the evidence!
ev.add_chains(chains_infer)
ln_inv_evidence = ev.ln_evidence_inv
err_ln_inv_evidence = ev.compute_ln_inv_evidence_errors()
print(f'Log Inverse Evidence: {ln_inv_evidence} Â± {err_ln_inv_evidence}')