# Refactored workflow example

This notebook runs the same history matching workflow as in the integration tutorial but uses the refactored history matcher that is currently in experimental as well as the GP emulator in experimental.

In [21]:
import os
import gpytorch
import torch

import numpy as np
import pandas as pd

# imports from main
from autoemulate.compare import AutoEmulate
from autoemulate.emulators.gaussian_process import constant_mean, rbf
from autoemulate.experimental_design import LatinHypercube
from autoemulate.history_matching import HistoryMatching as HMold
from autoemulate.history_matching_dashboard import HistoryMatchingDashboard
from autoemulate.simulations.naghavi_cardiac_ModularCirc import NaghaviSimulator, extract_parameter_ranges

# imports from experimental
from autoemulate.experimental.emulators.gaussian_process.exact import (
    GaussianProcessExact,
)
from autoemulate.experimental.history_matching import HistoryMatching

## Simulate

Set up the Simulator and generate data OR read a data file if have run this previously.

In [2]:
parameters_range = extract_parameter_ranges(
    '../../../docs/data/naghavi_model_parameters.json'
)

simulator = NaghaviSimulator(
    parameters_range=parameters_range, 
    output_variables=['lv.P_i', 'lv.P_o'],  # Only the ones you're interested in
    n_cycles=300, 
    dt=0.001,
)

In [3]:
inputs_path = 'parameters.csv'
results_path = 'simulator_results.csv'

# Check if the results file already exists
if not os.path.exists(results_path):
    N_samples = 100
    lhd = LatinHypercube(list(parameters_range.values()))
    sample_array = lhd.sample(N_samples)
    sample_df = pd.DataFrame(sample_array, columns=parameters_range.keys())
    
    # Run batch simulations with the samples generated in Cell 1
    results = simulator.run_batch_simulations(sample_df)
    # Convert results to DataFrame for analysis
    results_df = pd.DataFrame(results)
    # Save the results to a CSV file
    results_df.to_csv(results_path, index=False)
    sample_df.to_csv(inputs_path, index=False)
else:
    results_df = pd.read_csv(results_path)
    sample_df = pd.read_csv(inputs_path)
    # have to run simulator once to populate simulator.output_names
    _ = simulator.sample_forward(sample_df.iloc[0])

y = torch.from_numpy(results_df.to_numpy()).float()
x = torch.from_numpy(sample_df.to_numpy()).float()

## Train a GP

(this should be done with AutoEmulate obviously)

In [4]:
gp_pytorch = GaussianProcessExact(
        x,
        y,
        gpytorch.likelihoods.MultitaskGaussianLikelihood,
        constant_mean,
        rbf,
    )

In [5]:
gp_pytorch.fit(x, y)

## History Matching

In [6]:
# Define observed data with means and variances
observations = {
    'lv.P_i_min': (5.0, 0.1),   # Minimum of minimum LV pressure
    'lv.P_i_max': (20.0, 0.1),   # Maximum of minimum LV pressure
    'lv.P_i_mean': (10.0, 0.1),  # Mean of minimum LV pressure
    'lv.P_i_range': (15.0, 0.5), # Range of minimum LV pressure
    'lv.P_o_min': (1.0, 0.1),  # Minimum of maximum LV pressure
    'lv.P_o_max': (13.0, 0.1),  # Maximum of maximum LV pressure
    'lv.P_o_mean': (12.0, 0.1), # Mean of maximum LV pressure
    'lv.P_o_range': (20.0, 0.5)  # Range of maximum LV pressure
}

# Create history matcher
hm = HistoryMatching(
    simulator=simulator,
    observations=observations,
    threshold=3.0
)



History Matching involves:
- sampling parameters
- making predictions for those parameterings
- evaluating implausability of predictions
- identifying which of the paraneters are not ruled out yet (NROY)

In [8]:
x = hm.sample_params(5)
pred_means, pred_vars, _ = hm.predict(x, emulator=gp_pytorch)
implausability = hm.calculate_implausibility(pred_means, pred_vars)
nroy_indices = hm.get_nroy(implausability)

implausability, nroy_indices

(tensor([[0.5718, 6.1953, 1.6543, 4.7003, 2.3911, 3.0088, 2.5649, 7.1212],
         [0.5718, 6.1953, 1.6543, 4.7003, 2.3911, 3.0088, 2.5649, 7.1212],
         [0.5718, 6.1953, 1.6543, 4.7003, 2.3911, 3.0088, 2.5649, 7.1212],
         [0.5718, 6.1953, 1.6543, 4.7003, 2.3911, 3.0088, 2.5649, 7.1212],
         [0.5718, 6.1953, 1.6543, 4.7003, 2.3911, 3.0088, 2.5649, 7.1212]]),
 tensor([], dtype=torch.int64))

We can execture an iterative sample-predict-evaluate procedure with `HM.run()`.

In [10]:
emulator = hm.run(
    n_waves=20,
    n_samples_per_wave=20,
    emulator_predict=True,
    emulator=gp_pytorch,
)

History Matching: 100%|██████████| 20/20 [00:00<00:00, 37.11wave/s, samples=20, failed=0, NROY=0, min_impl=0.57, max_impl=7.12]


In [11]:
hm.tested_params.shape, hm.impl_scores.shape

(torch.Size([400, 16]), torch.Size([400, 8]))

## Dashboard

In [None]:
dashboard = HistoryMatchingDashboard(
    samples=hm.tested_params,
    impl_scores=hm.impl_scores,
    param_names=simulator.param_names,  
    output_names=simulator.output_names, 
    )

In [None]:
dashboard.display()

## Testing

Compare implausability scores between original and new HM implementation (given the same GP predictions).

In [30]:
# 1: train an sklearn GP model
preprocessing_methods = [{"name" : "PCA", "params" : {"reduced_dim": 2}}]
em = AutoEmulate()
em.setup(
    sample_df, 
    y, 
    models=["gp"], 
    scale_output = True, 
    reduce_dim_output=True, 
    preprocessing_methods=preprocessing_methods, 
    print_setup=False
)

best_model = em.compare()
gp_sklearn = em.get_model('GaussianProcess')
gp_final = em.refit(gp_sklearn)

Cross-validating:   0%|          | 0/1 [00:00<?, ?it/s]

In [44]:
# 2. generate samples and predictions
samples = hm.sample_params(200)
pred_means, pred_std = gp_final.predict(samples, return_std=True)
pred_vars = pred_means**2

In [48]:
# 3: get implausability scores using original HM implementation
hmold = HMold(
    simulator=simulator,
    observations=observations,
    threshold=3.0
)

impl_old = hmold.calculate_implausibility(pred_means, pred_vars)["I"]

In [46]:
# 4: get implausability scores using new HM implementation
impl_new = hm.calculate_implausibility(torch.from_numpy(pred_means), torch.from_numpy(pred_vars))

In [54]:
# 5: check the results are the same
torch.allclose(torch.from_numpy(impl_old), impl_new)

True