In [None]:
import torch

# probabilistic programming
import pyro 

# MCMC plotting
import arviz as az
import matplotlib.pyplot as plt
from getdist.arviz_wrapper import arviz_to_mcsamples
from getdist import plots

# autoemulate imports
from autoemulate.simulations.epidemic import Epidemic
from autoemulate.core.compare import AutoEmulate
from autoemulate.calibration.bayes import BayesianCalibration
from autoemulate.emulators import GaussianProcessRBF

# suppress warnings in notebook for readability
import os
import warnings

# ignore warnings
warnings.filterwarnings("ignore")
os.environ["PYTHONWARNINGS"] = "ignore"

# random seed for reproducibility
random_seed = 42

In [None]:
from autoemulate.data.utils import set_random_seed
set_random_seed(random_seed)
pyro.set_rng_seed(random_seed)

# Evidence computation

## 1. Simulate data

In this example, we'll use the `Epidemic` simulator, which returns the peak infection rate given two input parameters, `beta`(the transimission rate per day) and `gamma` (the recovery rate per day).

In [None]:
simulator = Epidemic(log_level="error")
x = simulator.sample_inputs(1000)
y, _ = simulator.forward_batch(x)

Below we plot the simulated data. The peak infection rate is higher when the transmission rate increases and the recovery rate decreases and the two parameters are correlated with each other.

In [None]:
transmission_rate = x[:, 0]
recovery_rate = x[:, 1]

plt.scatter(transmission_rate, recovery_rate, c=y, cmap='viridis')
plt.xlabel('Transmission rate (beta)')
plt.ylabel('Recovery rate (gamma)')
plt.colorbar(label="Peak infection rate")
plt.show

Below we pick the initial parameter values and simulate the output. We then add noise to generate 100 "observations". 

In [None]:
true_beta = 0.3
true_gamma = 0.15 

# simulator expects inputs of shape [1, number of inputs]
params = torch.tensor([true_beta, true_gamma]).view(1, -1)
true_infection_rate = simulator.forward(params)

n_obs = 100
stdev = 0.05
noise = torch.normal(mean=0, std=stdev, size=(n_obs,))
observed_infection_rates = true_infection_rate[0] + noise

observations = {"infection_rate": observed_infection_rates}

We can now use these observations to infer which input parameters were most likely to have produced them.

## 2. Calibrate with simulator

In this example, we have a fast simulator with only two input parameters, so we can use the simulator. The below code shows how to do this directly with Pyro. 

In [None]:
import pyro.distributions as dist
from pyro.infer import MCMC
from pyro.infer.mcmc import RandomWalkKernel

import sys
from pathlib import Path

sys.path.insert(0, str((Path.cwd() / "docs" / "tutorials" / "tasks").resolve()))

from model import model

# run Bayesian inference with MCMC


kernel = RandomWalkKernel(model, init_step_size=2.5)
mcmc_sim = MCMC(
    kernel,
    warmup_steps=500,
    num_samples=5000,
    num_chains=10
)
mcmc_sim.run()


Below we plot the posterior samples of the input parameters.

In [None]:
sim_samples = mcmc_sim.get_samples()
    
plt.scatter(sim_samples['beta'], sim_samples['gamma'], alpha=0.5)
plt.xlabel('Transmission rate (beta)')
plt.ylabel('Recovery rate (gamma)')
plt.show()

## 3. Compute Evidence using AutoEmulate

AutoEmulate provides a simplified interface for computing Bayesian evidence from MCMC samples using the Harmonic method.

In [None]:
from autoemulate.calibration.evidence import EvidenceComputation

# Create evidence computation object
ec = EvidenceComputation(mcmc_sim, model, temperature=0.8, log_level="info")

# Compute the evidence
results = ec.compute_evidence(epochs=30, verbose=True)

# Display results
print(f"\n{'='*60}")
print(f"Evidence Computation Results")
print(f"{'='*60}")
print(f"Log Evidence:          {results['ln_evidence']:.4f}")
print(f"Log Inverse Evidence:  {results['ln_inv_evidence']:.4f}")
print(f"Error bounds:          [{results['error_lower']:.4f}, {results['error_upper']:.4f}]")
print(f"{'='*60}")
print(f"Number of chains:      {results['num_chains']}")
print(f"Samples per chain:     {results['num_samples_per_chain']}")
print(f"Number of parameters:  {results['num_parameters']}")
print(f"{'='*60}")

### Understanding the Results

The `compute_evidence()` method returns a dictionary with the following key information:

- **`ln_evidence`**: The natural logarithm of the evidence (log marginal likelihood). This is the primary quantity used for model comparison via Bayes factors.
- **`ln_inv_evidence`**: The log inverse evidence (as computed by Harmonic). Note: `ln_evidence = -ln_inv_evidence`.
- **`error_lower`, `error_upper`**: Asymmetric error bounds on the log inverse evidence estimate. Tight errors (< 0.1) indicate reliable estimation.

**Using Evidence for Model Comparison:**

To compare two models, compute the Bayes factor:
```python
BF = exp(ln_evidence_model1 - ln_evidence_model2)
```

- BF > 10: Strong evidence for model 1
- BF > 3: Moderate evidence for model 1  
- BF â‰ˆ 1: No preference between models

In [None]:
# Optional: Access internal Harmonic objects for advanced analysis
chains = ec.get_chains()
flow_model = ec.get_flow_model()
evidence_obj = ec.get_evidence_object()

print(f"Chains object: {chains}")
print(f"Flow model: {flow_model}")
print(f"Evidence estimator: {evidence_obj}")

## 4. Advanced: Using the Low-Level API (Optional)

The simplified interface above replaces approximately 60 lines of manual code. For advanced users who need more control, the underlying functions are still available:

### Manual Log Probability Extraction

```python
from autoemulate.calibration.bayes import extract_log_probabilities

# Manually extract log probabilities
samples, log_probs = extract_log_probabilities(mcmc_sim, model)
print(f"Samples shape: {samples.shape}")  # (num_chains, num_samples_per_chain, ndim)
print(f"Log probs shape: {log_probs.shape}")  # (num_chains, num_samples_per_chain)
```

### Direct Harmonic API Usage

```python
import harmonic as hm

# Create Harmonic Chains
chains = hm.Chains(samples.shape[2])
chains.add_chains_3d(samples, log_probs)

# Split and train flow model
chains_train, chains_infer = hm.utils.split_data(chains, training_proportion=0.5)
flow = hm.model.RQSplineModel(samples.shape[2], standardize=True, temperature=0.8)
flow.fit(chains_train.samples, epochs=30, verbose=True)

# Compute evidence
ev = hm.Evidence(chains_infer.nchains, flow)
ev.add_chains(chains_infer)
ln_inv_evidence = ev.ln_evidence_inv
errors = ev.compute_ln_inv_evidence_errors()
```

However, we recommend using the `EvidenceComputation` class for most use cases as it handles error checking, logging, and provides a cleaner interface.