In [None]:
# probabilistic programming
import pyro 

# MCMC plotting
import arviz as az
import matplotlib.pyplot as plt
from getdist import plots

# autoemulate imports
from autoemulate.calibration.interval_excursion_set import IntervalExcursionSetCalibration
from autoemulate.calibration.history_matching import HistoryMatchingWorkflow
from autoemulate.core.compare import AutoEmulate
from autoemulate.data.utils import set_random_seed
from autoemulate.simulations.projectile import Projectile
from autoemulate.emulators.gaussian_process.exact import GaussianProcessRBF

# suppress warnings in notebook for readability
import os
import warnings

# ignore warnings
warnings.filterwarnings("ignore")
os.environ["PYTHONWARNINGS"] = "ignore"

# random seed for reproducibility
random_seed = 42

## Projectile simulation: run simulator and fit emulator

In [None]:
set_random_seed(random_seed)
pyro.set_rng_seed(random_seed)

simulator = Projectile(
    parameters_range={"c": (-5.0, 1.0), "v0": (0.0, 10.0)},
    log_level="error"
)
x = simulator.sample_inputs(1000)
y, _ = simulator.forward_batch(x)

c = x[:, 0]
v0 = x[:, 1]

plt.scatter(c, v0, c=y[:, 0], cmap='viridis')
plt.xlabel('Drag coefficient')
plt.ylabel('Initial velocity (m/s)')
plt.colorbar(label="Distance (m)")
plt.show()

ae = AutoEmulate(
    x, 
    y, 
    models=[GaussianProcessRBF],
    model_params={},
    log_level="error", 
)

gp = ae.best_result().model

## Problem set-up: identify an interval excursion set for $f(x)$

The aim for the remainder of this notebook is to explore methods that are able to identify samples $x$ from the interval excursion set.

Mathematically this is:
$$
x \in \mathbb{R}^n, \quad a, b \in \mathbb{R}^m \quad f: \mathbb{R}^n \mapsto \mathbb{R}^m\quad a < f(x) < b
$$

Solving this problem is more general than calculating:
- the level set ($f(x) = c$)
- superlevel set ($f(x) > c$)
- sublevel set ($f(x) < c$)
Howver, each can be formulated such that samples returned can approximate each of these types of level set for crafted values of $a, b$.

The probability that a Gaussian random variable $y \sim \mathcal{N}(\mu, \sigma^2)$ lies in the interval $a < y < b$ is:
$$
P(a < y < b) = \Phi\left(\frac{b - \mu}{\sigma}\right) - \Phi\left(\frac{a - \mu}{\sigma}\right)
$$
where $\Phi(\cdot)$ is the cumulative distribution function (CDF) of the standard normal distribution.



In the Bayesian setting, this interval probability is used as a likelihood function. The posterior density for $x$ is then proportional to the product of the prior $p(x)$ and the probability that the model output $f(x)$ lies in the interval $a < y < b$:

$$

p(x \mid a < f(x) < b) \propto p(x) \left[ \Phi\left(\frac{b - \mu(x)}{\sigma(x)}\right) - \Phi\left(\frac{a - \mu(x)}{\sigma(x)}\right) \right]

$$

where $\mu(x)$ and $\sigma(x)$ are the mean and standard deviation of the model output at $x$.

In [None]:
lower, upper = 6.0, 8.0
ies = IntervalExcursionSetCalibration(
    gp,
    parameters_range=simulator.parameters_range,
    output_bounds={"distance": (lower, upper)},
    output_names=simulator.output_names,
    log_level="error",
)

## MCMC

Default to use NUTS here but metropolis also can produce reasonable samples for low-dimensional parameter spaces as is the case in this epidemic model.

In [None]:
mcmc = ies.run_mcmc(
    num_samples=1000,
    warmup_steps=200,
    num_chains=2,
    sampler="nuts",
    # sampler="metropolis",
    model_kwargs={"uniform_prior": True}
)
az_mcmc = ies.to_arviz(mcmc)

In [None]:
# Draw posterior predictive samples
y_pred = ies.posterior_predictive(mcmc)

In [None]:
# Plot likelihood and values of SMC samples
ies.plot_samples(az_mcmc)

In [None]:
# Convert to arviz InferenceData object for further analysis if desired
az_data = ies.to_arviz(mcmc)

In [None]:
# Pair plot of MCMC samples
_ = az.plot_pair(az_data)

In [None]:
# GetDist plot of samples
def get_dist_and_plot(ies, data):
    """Convert and plot GetDist MCSamples from MCMC samples."""
    emu_data = ies.to_getdist(data, label="Emulator")
    emu_data.smooth_scale_1D = 0.8
    g = plots.get_subplot_plotter()
    g.triangle_plot([emu_data], filled=True)
    plt.show()

get_dist_and_plot(ies, mcmc)

## Sequential Monte Carlo (SMC) with adaptive tempering

The SMC implementation provides an alternative to MCMC approaches.

It works by tempering the interval excursion set likelihood from 0 to 1 (i.e. sampling from the prior to the posterior), adaptively controlling steps to hit a target Effective Sample Size (ESS). We resample when ESS falls below the threshold. This converges to the exact target at temperature 1 without gradients.

In [None]:
az_data_smc = ies.run_smc(
    n_particles=4000,
    ess_target_frac=0.6,
    move_steps=2,
    rw_step=0.25,
    seed=random_seed,
    uniform_prior=True,
    plot_diagnostics=True,
    return_az_data=True
)

In [None]:
# Pair plot of SMC samples
_ = az.plot_pair(az_data_smc)

In [None]:
# Plot likelihood and values of SMC samples
assert isinstance(az_data_smc, az.InferenceData)
ies.plot_samples(az_data_smc)

In [None]:
# GetDist plot of samples
get_dist_and_plot(ies, az_data_smc)