In [None]:
import torch

# probabilistic programming
import pyro 

# MCMC plotting
import arviz as az
import matplotlib.pyplot as plt
from getdist.arviz_wrapper import arviz_to_mcsamples
from getdist import plots

# autoemulate imports
from autoemulate.simulations.epidemic import Epidemic
from autoemulate.core.compare import AutoEmulate
from autoemulate.calibration.bayes import BayesianCalibration
from autoemulate.emulators import GaussianProcess

# suppress warnings in notebook for readability
import os
import warnings

# ignore warnings
warnings.filterwarnings("ignore")
os.environ["PYTHONWARNINGS"] = "ignore"

# random seed for reproducibility
random_seed = 42

### Epidemic simulation: run simulator and fit emulator

In [None]:
from autoemulate.data.utils import set_random_seed
set_random_seed(random_seed)
pyro.set_rng_seed(random_seed)

simulator = Epidemic(log_level="error")
x = simulator.sample_inputs(1000)
y, _ = simulator.forward_batch(x)

transmission_rate = x[:, 0]
recovery_rate = x[:, 1]

plt.scatter(transmission_rate, recovery_rate, c=y, cmap='viridis')
plt.xlabel('Transmission rate (beta)')
plt.ylabel('Recovery rate (gamma)')
plt.colorbar(label="Peak infection rate")
plt.show()

true_beta = 0.3
true_gamma = 0.15 

# simulator expects inputs of shape [1, number of inputs]
params = torch.tensor([true_beta, true_gamma]).view(1, -1)
true_infection_rate = simulator.forward(params)
assert isinstance(true_infection_rate, torch.Tensor)

# Run AutoEmulate to find the best GP model
from autoemulate.emulators.gaussian_process.exact import GaussianProcessRBF

ae = AutoEmulate(
    x, 
    y, 
    models=[GaussianProcessRBF],
    model_params={},
    log_level="error", 
)

gp = ae.best_result().model


## Problem set-up: identify an interval excursion set for $f(x)$

The aim for the remainder of this notebook is to explore methods that are able to identify samples $x$ from the interval excursion set.

Mathematically this is:
$$
x \in \mathbb{R}^n, \quad a, b \in \mathbb{R}^m \quad f: \mathbb{R}^n \mapsto \mathbb{R}^m\quad a < f(x) < b
$$

Solving this problem is more general than calculating:
- the level set ($f(x) = c$)
- superlevel set ($f(x) > c$)
- sublevel set ($f(x) < c$)
Howver, each can be formulated such that samples returned can approximate each of these types of level set for crafted values of $a, b$.

In [None]:
from autoemulate.calibration.interval_excursion_set import IntervalExcursionSetCalibration

lower, upper = 0.2, 0.225
ies = IntervalExcursionSetCalibration(
    gp,
    parameters_range=simulator.parameters_range,
    y_lower=torch.tensor([lower]),  # lower bound(s) per task
    y_upper=torch.tensor([upper]),  # upper bound(s) per task
    y_labels=["infection_rate"],
    log_level="error",
)


## MCMC

Metropolis runs faster than NUTS and produces reasonable samples for 2D case.

In [None]:
# mcmc = ies.run_mcmc(num_samples=1000, warmup_steps=200, num_chains=2, sampler="metropolis")
mcmc = ies.run_mcmc(num_samples=1000, warmup_steps=200, num_chains=2, sampler="nuts")
az_mcmc = ies.to_arviz(mcmc)


In [None]:
ies.plot_samples(az_mcmc)

In [None]:
az_data = ies.to_arviz(mcmc)

In [None]:
_ = az.plot_pair(az_data)

In [None]:
# convert emulator calibration samples
emu_data = arviz_to_mcsamples(az_data, dataset_label="Emulator")
emu_data.smooth_scale_1D = 0.8

g = plots.get_subplot_plotter()
g.triangle_plot( 
    [emu_data], 
    filled=True,
    markers={"beta": true_beta, "gamma": true_gamma},
)
plt.show()

## Sequential Monte Carlo (SMC) with adaptive tempering

SMC is a further alternative to importance sampling that might be expected to scale to higher dimensions slightly better.

Temper the band likelihood from 0 to 1, adaptively controlling steps to hit a target Effective Sample Size (ESS). We resample when ESS falls below the threshold. This converges to the exact target at temperature 1 without gradients.

In [None]:
az_data_smc = ies.run_smc(
    n_particles=4000,
    ess_target_frac=0.6,
    move_steps=2,
    rw_step=0.25,
    seed=random_seed,
    uniform_prior=False,
    plot_diagnostics=True,
    return_az_data=True
)

In [None]:
_ = az.plot_pair(az_data_smc)

In [None]:
assert isinstance(az_data_smc, az.InferenceData)
ies.plot_samples(az_data_smc)

### History matching with multi-task band likelihood

This secion looks at using the current history matching workflow to generate samples from the interval excursion set.

In [None]:
from autoemulate.calibration.history_matching import HistoryMatchingWorkflow
import numpy as np

lower = ies.y_band_low.item()
upper = ies.y_band_high.item()
midpoint = 0.5 * (lower + upper)
difference = upper - lower
# 100 observations in the interval to weight posterior towards interval
observations = {"infection_rate": lower + (upper - lower)*torch.rand(100)}

hm = HistoryMatchingWorkflow(
    simulator=simulator,
    result=ae.best_result(),
    observations={"infection_rate": (midpoint, (difference / 4 * 2)**2)}, # 2 * sigma = 0.05
    threshold=1.0, # implausibility threshold in sigma units
    train_x=x,
    train_y=y,
    log_level="error",
)


In [None]:
# Get samples in NROY space
x_new = simulator.sample_inputs(10000)
mean, variance = gp.predict_mean_and_variance(x_new)
assert isinstance(variance, torch.Tensor)
implausibility = hm.calculate_implausibility(mean, variance)
x_star_nroy = hm.get_nroy(implausibility, x_new)


In [None]:
ies.plot_samples(x_star_nroy)

### Compare with a BayesianCalibration approach

This section looks at using the current `BayesianCalibration` approach with a Gaussian-noise observation probabilistic model.


In [None]:
bc = BayesianCalibration(
    gp, 
    simulator.parameters_range, 
    observations, 
    observation_noise=0.1,
    model_uncertainty=True,
)

In [None]:
mcmc_bc = bc.run_mcmc(
    warmup_steps=250, 
    num_samples=500,
    num_chains=2    
)

In [None]:
az_mcmc_bc = bc.to_arviz(mcmc_bc)
az.plot_pair(az_mcmc_bc)

In [None]:
ies.plot_samples(mcmc_bc)