## 0. Simulate some data and fit an emulator

In [None]:
import torch
from autoemulate.experimental.emulators import GaussianProcessExact
from autoemulate.experimental.emulators.transformed.base import TransformedEmulator
import pandas as pd

In [None]:
# df_x = pd.read_csv("./error_data/3d/3_parameters.csv")
# df_y = pd.read_csv("./error_data/3d/output_3_params.csv")
df_x = pd.read_csv("./data/2d/2_parameters.csv")
df_y = pd.read_csv("./data/2d/output_2_params.csv")


In [None]:
df_y

In [None]:
x = torch.Tensor(df_x.to_numpy()).float()
y = torch.Tensor(df_y.to_numpy()).float()


In [None]:

torch.manual_seed(0)
idx = torch.randperm(x.shape[0])
# idx

In [None]:
x = x[idx]
y = y[idx, 1:]
# y = y[idx, :1]
x.shape, y.shape


In [None]:

x_train, y_train, x_test, y_test = x[:800], y[:800], x[800:], y[800:]


In [None]:
import matplotlib.pyplot as plt

plt.hist(df_y.iloc[:,1])
plt.show()

In [None]:

from autoemulate.experimental.compare import AutoEmulate
from autoemulate.experimental.emulators.gaussian_process.kernel import rbf, rbf_plus_constant
from autoemulate.experimental.emulators.transformed.base import TransformedEmulator
from autoemulate.experimental.transforms import StandardizeTransform, PCATransform

em = TransformedEmulator(
    x_train,
    y_train,
    model=GaussianProcessExact,
    x_transforms=[StandardizeTransform()],
    y_transforms=[StandardizeTransform()],
    covar_module_fn=rbf_plus_constant,
)

em.fit(x_train, y_train)


In [None]:
from torchmetrics import R2Score
from autoemulate.experimental.model_selection import evaluate

evaluate(em.predict(x_train), y_train, metric=R2Score)

In [None]:
evaluate(em.predict(x_test), y_test, metric=R2Score)


In [None]:
from autoemulate.experimental.compare import AutoEmulate

ae = AutoEmulate(x, y, models=[GaussianProcessExact], log_level="debug", n_iter=2, n_splits=2)


In [None]:
# plt.scatter(df_x.iloc[:, 0], df_y.iloc[:, 1])

In [None]:

ae.plot(0)


In [None]:
df_x

In [None]:

from autoemulate.experimental.sensitivity_analysis import SensitivityAnalysis

problem = {
    "num_vars": 2,
    "names": ["target_snr_db", "phase_offset"],
    "bounds": [(-2.0, 30.0), (-1.0, 1.0)],
}

sa = SensitivityAnalysis(emulator=ae.best_result().model, problem=problem)

df_sa = sa.run()

In [None]:
df_sa

In [None]:
sa.plot_sobol(df_sa)

## 1. Simple HMC example.

In [None]:
from autoemulate.experimental.calibration.bayes import BayesianCalibration

Start with an "observation" that the GP has been trained on. 

Specifically, we will pretend we have N noisy experimental measurements. We should be able to recover the input parameters.

In [None]:
idx = -1 # which simulated value to pick
n_obs = 10
noise_scale = 0.05 # set noise as some ratio of the observed value

observations = {"SER": torch.Tensor([0.8])}
observations

In [None]:
df_x

In [None]:

parameters_range = {
    "target_snr_db": (-2.0, 30.0),
    "phase_offset": (-1.0, 1.0)
}
# use the simulator parameter_range 
bc = BayesianCalibration(em, parameters_range, observations, 10.0)

Run MCMC (note that below we have set the number of MCMC steps to a very low number, don't expect convergence).

In [None]:
mcmc = bc.run_mcmc(
    warmup_steps=100, 
    num_samples=1000,
    sampler='nuts',
)

The returned Pyro MCMC object has methods for accessing the generated samples (`mcmc.get_samples()`) or, as shown below, to get their summary statistics.

In [None]:
mcmc.summary()

## 2. Plotting with Arviz

We have an option to turn the MCMC object into an Arviz object, which can be passed to any of their plotting function.

In [None]:
import arviz as az

In [None]:
az_data = bc.to_arviz(mcmc, posterior_predictive=True)

In [None]:
az.plot_trace(az_data)

In [None]:
az.plot_pair(az_data, kind='kde')

In [None]:
az.plot_ppc(az_data, kind='scatter')

In [None]:
az.plot_autocorr(az_data)

## 3. Use sensitivity analysis and history matching to refine problem before running HMC.

The `MCMC_calibration` object has an option to provide a list of parameters to calibrate. A common approach is to select these based on results of `SensitivityAnalysis`.

Similarly, the user provides parameter ranges from withing which to sample parameter values. This can be simply the range of the simulator. Alternatively, one can use `HistoryMatching` to reduce the parameter range and pass that to the `MCMC_calibration` instead. 

Below we demonstrate how to do both.

In [None]:
from autoemulate.experimental.sensitivity_analysis import SensitivityAnalysis
from autoemulate.experimental.calibration.history_matching import HistoryMatching

1. Run sensitivity analysis and get top N parameters (here we just get the top 1).

In [None]:
problem = {
        "num_vars": 2,
        "names": sim.param_names,
        "bounds": sim.param_bounds,
    }

sa = SensitivityAnalysis(gp, problem=problem)
df = sa.run("sobol")

top_param = sa.top_n_sobol_params(df, 1)

# the output is just a list of strings, this could be set by hand
top_param

2. Run history matching and generate new parameter bounds from NROY samples (if get any).

In [None]:
# start with some GP predictions
x_new = sim.sample_inputs(20)
output = gp.predict(torch.tensor(x_new, dtype=torch.float32))
pred_means, pred_vars = (
    output.mean.float().detach(),
    output.variance.float().detach(),
)

In [None]:
# generate NROY samples
hm = HistoryMatching(
    # take mean of observations and add noise
    observations={k: [v.mean(), 10.0] for k,v in observations.items()},
    threshold=5.0,
    rank=2
)
implausability = hm.calculate_implausibility(pred_means, pred_vars)
nroy_samples = hm.get_nroy(implausability, x_new)
nroy_samples

The newly generated range is slightly narrower than the range of the simulator.

In [None]:
# get new param bounds
nroy_param_range = hm.generate_param_bounds(nroy_samples, param_names = sim.param_names)
nroy_param_range

3. Pass results to the `MCMC_calibration` object.

In [None]:
bc_nroy = BayesianCalibration(
    gp, 
    nroy_param_range if nroy_param_range is not None else sim.parameters_range, 
    observations, 
    10.0,
    top_param
    )

In [None]:
mcmc_nroy = bc_nroy.run_mcmc(warmup_steps=10, num_samples=100)

In [None]:
mcmc_nroy.summary()