In [None]:
import torch

# probabilistic programming
import pyro 

# MCMC plotting
import arviz as az
import matplotlib.pyplot as plt
from getdist.arviz_wrapper import arviz_to_mcsamples
from getdist import plots

# autoemulate imports
from autoemulate.simulations.epidemic import Epidemic
from autoemulate.core.compare import AutoEmulate
from autoemulate.calibration.bayes import BayesianCalibration
from autoemulate.emulators import GaussianProcess

# suppress warnings in notebook for readability
import os
import warnings

# ignore warnings
warnings.filterwarnings("ignore")
os.environ["PYTHONWARNINGS"] = "ignore"

# random seed for reproducibility
random_seed = 42

In [None]:
from autoemulate.data.utils import set_random_seed
set_random_seed(random_seed)
pyro.set_rng_seed(random_seed)

In [None]:
simulator = Epidemic(log_level="error")
x = simulator.sample_inputs(1000)
y, _ = simulator.forward_batch(x)

In [None]:
transmission_rate = x[:, 0]
recovery_rate = x[:, 1]

plt.scatter(transmission_rate, recovery_rate, c=y, cmap='viridis')
plt.xlabel('Transmission rate (beta)')
plt.ylabel('Recovery rate (gamma)')
plt.colorbar(label="Peak infection rate")
plt.show

In [None]:
true_beta = 0.3
true_gamma = 0.15 

# simulator expects inputs of shape [1, number of inputs]
params = torch.tensor([true_beta, true_gamma]).view(1, -1)
true_infection_rate = simulator.forward(params)
assert isinstance(true_infection_rate, torch.Tensor)

n_obs = 100
stdev = 0.05
noise = torch.normal(mean=0, std=stdev, size=(n_obs,))
observed_infection_rates = true_infection_rate[0] + noise

observations = {"infection_rate": observed_infection_rates}


In [None]:
# Run AutoEmulate to find the best GP model
from autoemulate.emulators.gaussian_process.exact import GaussianProcessRBF


ae = AutoEmulate(
    x, 
    y, 
    models=[GaussianProcessRBF],
    model_params={},
    log_level="error", 
)

gp = ae.best_result().model


## Problem set-up: identify an interval excursion set for $f(x)$

The aim for the remainder of this notebook is to explore methods that are able to identify samples $x$ from the interval excursion set.

Mathematically this is:
$$
x \in \mathbb{R}^n, \quad a, b \in \mathbb{R}^m \quad f: \mathbb{R}^n \mapsto \mathbb{R}^m\quad a < f(x) < b
$$

Solving this problem is more general than calculating:
- the level set ($f(x) = c$)
- superlevel set ($f(x) > c$)
- sublevel set ($f(x) < c$)
Howver, each can be formulated such that samples returned can approximate each of these types of level set for crafted values of $a, b$.

In [None]:
from autoemulate.calibration.interval_excursion_set import IntervalExcursionSetCalibration

ies = IntervalExcursionSetCalibration(
    gp,
    parameter_range=simulator.parameters_range,
    y_lower=torch.tensor([0.3]),  # lower bound(s) per task
    y_upper=torch.tensor([0.6]),  # upper bound(s) per task
    y_labels=["infection_rate"],
    log_level="error",
)


In [None]:
# Run MCMC
mcmc = ies.run_mcmc(num_samples=1000, warmup_steps=200, num_chains=2, sampler="metropolis")


In [None]:
ies.plot_samples(mcmc.get_samples()["x_star"], mcmc.num_samples * mcmc.num_chains)

## Sequential Monte Carlo (SMC) with adaptive tempering

SMC is a further alternative to importance sampling that might be expected to scale to higher dimensions slightly better.

Temper the band likelihood from 0 to 1, adaptively controlling steps to hit a target Effective Sample Size (ESS). We resample when ESS falls below the threshold. This converges to the exact target at temperature 1 without gradients.

In [None]:
x_smc, smc_w, smc_betas, smc_ess, smc_unique = ies.run_smc(
    n_particles=4000, ess_target_frac=0.6, move_steps=2, rw_step=0.25, seed=random_seed
)


In [None]:
ies.plot_samples(x_smc, x_smc.shape[0])

In [None]:
# Diagnostic plots
plt.figure(figsize=(5,4))
plt.scatter(smc_particles[:,0].cpu(), smc_particles[:,1].cpu(), s=4, alpha=0.4, c='tab:orange')
plt.title(f'SMC particles (final), unique={smc_unique}/{smc_particles.shape[0]}')
plt.xlabel('x1'); plt.ylabel('x2'); plt.tight_layout()

plt.figure(figsize=(6,3))
plt.plot(smc_betas.cpu().numpy(), '-o', ms=3)
plt.ylabel('beta'); plt.xlabel('step'); plt.title('Temperatures')
plt.tight_layout()

plt.figure(figsize=(6,3))
plt.plot(smc_ess.cpu().numpy(), '-o', ms=3)
plt.ylabel('ESS'); plt.xlabel('step'); plt.title('ESS over steps')
plt.tight_layout()

### History matching with multi-task band likelihood

This secion looks at using the current history matching workflow to generate samples from the excursion set.

In [None]:
from autoemulate.calibration.history_matching import HistoryMatchingWorkflow
import numpy as np

lower = y_band_low.item()
upper = y_band_high.item()
midpoint = 0.5 * (lower + upper)
difference = upper - lower
observations = {"infection_rate": lower + (upper - lower)*torch.rand(100)}

hm = HistoryMatchingWorkflow(
    simulator=simulator,
    result=ae.best_result(),
    observations={"infection_rate": (midpoint, (difference / 4 * 2)**2)}, # 2 * sigma = 0.05
    threshold=1.0, # implausibility threshold in sigma units
    train_x=x,
    train_y=y,
    log_level="error",
)


In [None]:
# Get samples in NROY space
x_new = simulator.sample_inputs(10000)
mean, variance = gp.predict_mean_and_variance(x_new)
assert isinstance(variance, torch.Tensor)
implausibility = hm.calculate_implausibility(mean, variance)
x_star_nroy = hm.get_nroy(implausibility, x_new)


In [None]:
ies.plot_samples(x_star_nroy, x_star_nroy.shape[0])

### Compare with a BayesianCalibration approach

This section looks at using the current `BayesianCalibration` approach with a Gaussian-noise observation probabilistic model.


In [None]:
bc = BayesianCalibration(
    gp, 
    simulator.parameters_range, 
    observations, 
    observation_noise=0.1,
    model_uncertainty=True,
)

Run MCMC using the NUTS sampler. The `BayesianCalibration` class uses Pyro under the hood. Below we use `pyro.set_rng_seed` to ensure reproducibility.


In [None]:
mcmc_bc = bc.run_mcmc(
    warmup_steps=250, 
    num_samples=500,
    num_chains=2    
)

In [None]:
# Convert to required format for plotting
x_post_bc = torch.hstack([
    mcmc_bc.get_samples()["beta"].reshape(-1, 1),
    mcmc_bc.get_samples()["gamma"].reshape(-1, 1)
])

In [None]:
ies.plot_samples(x_post_bc, x_post_bc.shape[0])