In [None]:
import torch

# probabilistic programming
import pyro 

# MCMC plotting
import arviz as az
import matplotlib.pyplot as plt
from getdist.arviz_wrapper import arviz_to_mcsamples
from getdist import plots

# autoemulate imports
from autoemulate.simulations.epidemic import Epidemic
from autoemulate.core.compare import AutoEmulate
from autoemulate.calibration.bayes import BayesianCalibration
from autoemulate.emulators import GaussianProcess

# suppress warnings in notebook for readability
import os
import warnings

# ignore warnings
warnings.filterwarnings("ignore")
os.environ["PYTHONWARNINGS"] = "ignore"

# random seed for reproducibility
random_seed = 42

### Epidemic simulation: run simulator and fit emulator

In [None]:
from autoemulate.data.utils import set_random_seed
set_random_seed(random_seed)
pyro.set_rng_seed(random_seed)

simulator = Epidemic(log_level="error")
x = simulator.sample_inputs(1000)
y, _ = simulator.forward_batch(x)

transmission_rate = x[:, 0]
recovery_rate = x[:, 1]

plt.scatter(transmission_rate, recovery_rate, c=y, cmap='viridis')
plt.xlabel('Transmission rate (beta)')
plt.ylabel('Recovery rate (gamma)')
plt.colorbar(label="Peak infection rate")
plt.show()

# Run AutoEmulate to find the best GP model
from autoemulate.emulators.gaussian_process.exact import GaussianProcessRBF

ae = AutoEmulate(
    x, 
    y, 
    models=[GaussianProcessRBF],
    model_params={},
    log_level="error", 
)

gp = ae.best_result().model

## Problem set-up: identify an interval excursion set for $f(x)$

The aim for the remainder of this notebook is to explore methods that are able to identify samples $x$ from the interval excursion set.

Mathematically this is:
$$
x \in \mathbb{R}^n, \quad a, b \in \mathbb{R}^m \quad f: \mathbb{R}^n \mapsto \mathbb{R}^m\quad a < f(x) < b
$$

Solving this problem is more general than calculating:
- the level set ($f(x) = c$)
- superlevel set ($f(x) > c$)
- sublevel set ($f(x) < c$)
Howver, each can be formulated such that samples returned can approximate each of these types of level set for crafted values of $a, b$.

The probability that a Gaussian random variable $y \sim \mathcal{N}(\mu, \sigma^2)$ lies in the interval $a < y < b$ is:
$$
P(a < y < b) = \Phi\left(\frac{b - \mu}{\sigma}\right) - \Phi\left(\frac{a - \mu}{\sigma}\right)
$$
where $\Phi(\cdot)$ is the cumulative distribution function (CDF) of the standard normal distribution.



In the Bayesian setting, this interval probability is used as a likelihood function. The posterior density for $x$ is then proportional to the product of the prior $p(x)$ and the probability that the model output $f(x)$ lies in the interval $a < y < b$:

$$

p(x \mid a < f(x) < b) \propto p(x) \left[ \Phi\left(\frac{b - \mu(x)}{\sigma(x)}\right) - \Phi\left(\frac{a - \mu(x)}{\sigma(x)}\right) \right]

$$

where $\mu(x)$ and $\sigma(x)$ are the mean and standard deviation of the model output at $x$.

In [None]:
from autoemulate.calibration.interval_excursion_set import IntervalExcursionSetCalibration

lower, upper = 0.2, 0.225
ies = IntervalExcursionSetCalibration(
    gp,
    parameters_range=simulator.parameters_range,
    y_lower=torch.tensor([lower]),  # lower bound(s) per task
    y_upper=torch.tensor([upper]),  # upper bound(s) per task
    output_names=simulator.output_names,
    log_level="error",
)


## MCMC

Default to use NUTS here but metropolis also can produce reasonable samples for low-dimensional parameter spaces as is the case in this epidemic model.

In [None]:
mcmc = ies.run_mcmc(
    num_samples=1000,
    warmup_steps=200,
    num_chains=2,
    sampler="nuts",
    # sampler="metropolis",
    model_kwargs={"uniform_prior": True}
)
az_mcmc = ies.to_arviz(mcmc)

In [None]:
ies.plot_samples(az_mcmc)

In [None]:
az_data = ies.to_arviz(mcmc)

In [None]:
_ = az.plot_pair(az_data)

In [None]:
# convert emulator calibration samples
def get_dist_and_plot(ies, data):
    """Convert and plot GetDist MCSamples from MCMC samples."""
    emu_data = ies.to_getdist(data, label="Emulator")
    emu_data.smooth_scale_1D = 0.8
    g = plots.get_subplot_plotter()
    g.triangle_plot([emu_data], filled=True)
    plt.show()

get_dist_and_plot(ies, mcmc)

## Sequential Monte Carlo (SMC) with adaptive tempering

The SMC implementation provides an alternative to MCMC approaches.

It works by tempering the interval excursion set likelihood from 0 to 1 (i.e. sampling from the prior to the posterior), adaptively controlling steps to hit a target Effective Sample Size (ESS). We resample when ESS falls below the threshold. This converges to the exact target at temperature 1 without gradients.

In [None]:
az_data_smc = ies.run_smc(
    n_particles=4000,
    ess_target_frac=0.6,
    move_steps=2,
    rw_step=0.25,
    seed=random_seed,
    uniform_prior=True,
    plot_diagnostics=True,
    return_az_data=True
)

In [None]:
_ = az.plot_pair(az_data_smc)

In [None]:
assert isinstance(az_data_smc, az.InferenceData)
ies.plot_samples(az_data_smc)

In [None]:
get_dist_and_plot(ies, az_data_smc)

### History matching with interval excursion set likelihood

This secion looks at using the current history matching workflow to generate samples from the interval excursion set.

In [None]:
from autoemulate.calibration.history_matching import HistoryMatchingWorkflow
import numpy as np

lower = ies.y_lower.item()
upper = ies.y_upper.item()
midpoint = 0.5 * (lower + upper)
difference = upper - lower
# 100 observations in the interval to weight posterior towards interval
observations = {"infection_rate": lower + (upper - lower)*torch.rand(100)}

hm = HistoryMatchingWorkflow(
    simulator=simulator,
    result=ae.best_result(),
    observations={"infection_rate": (midpoint, (difference / 4 * 2)**2)}, # 2 * sigma = 0.05
    threshold=1.0, # implausibility threshold in sigma units
    train_x=x,
    train_y=y,
    log_level="error",
)


In [None]:
# Get samples in NROY space
x_new = simulator.sample_inputs(10000)
mean, variance = gp.predict_mean_and_variance(x_new)
assert isinstance(variance, torch.Tensor)
implausibility = hm.calculate_implausibility(mean, variance)
x_star_nroy = hm.get_nroy(implausibility, x_new)


In [None]:
ies.plot_samples(x_star_nroy)

### Compare with a BayesianCalibration approach



This final section explores using the current `BayesianCalibration` approach with a Gaussian-noise observation probabilistic model.



Here, we generate a set of synthetic observations that are chosen to lie within the target interval. These are not real data, but are constructed to represent the interval constraint as if they were observed values. We then use a Gaussian likelihood with mean and variance derived from the emulator, treating these synthetic values as if they were real observations.


Mathematically, for synthetic observations $y_1, \ldots, y_N$ (all in the interval) and model output $f(x)$, the likelihood is:

$$

p(y_1, \ldots, y_N \mid x) = \prod_{i=1}^N \mathcal{N}(y_i \mid \mu(x), \sigma^2(x) + \tau^2 + \gamma^2)

$$

where $\mu(x)$ and $\sigma^2(x)$ are the emulator's predictive mean and variance at $x$, $\tau^2$ is the observation noise variance, and $\gamma^2$ is an additional variance chosen to represent model discrepancy and imperfections. The set of variances included to represent uncertainty are similar to those in history matching.

The posterior is then:

$$

p(x \mid y_1, \ldots, y_N) \propto p(x) \prod_{i=1}^N \mathcal{N}(y_i \mid \mu(x), \sigma^2(x) + \tau^2)

$$

where $p(x)$ is the prior over parameters (uniform in the `BayesianCalibration` method here).

In [None]:
bc = BayesianCalibration(
    gp, 
    simulator.parameters_range, 
    observations, 
    observation_noise=0.1,
    model_uncertainty=True,
)

In [None]:
mcmc_bc = bc.run_mcmc(
    warmup_steps=250, 
    num_samples=500,
    num_chains=2    
)

In [None]:
az_mcmc_bc = bc.to_arviz(mcmc_bc)
az.plot_pair(az_mcmc_bc)

In [None]:
ies.plot_samples(mcmc_bc)


In [None]:
get_dist_and_plot(ies, az_mcmc_bc)