In [None]:
import pandas as pd 
import numpy as np
import os 

### Read the input and output of the simulator

##### Define the path to the folder of the outputs and the samples.csv
The outputs here are generated using ModularCirc Library (Tutorial_03)

In [None]:
out_path = '/Users/mfamili/work/ModularCirc/Tutorials/Tutorial_03/Outputs/Out_01'
in_path = '/Users/mfamili/work/ModularCirc/Tutorials/Tutorial_03/samples_Naghavi.csv'

In [None]:
parameters = pd.read_csv(in_path)
len(parameters)

##### Read and process outputs
- Read the outputs and convert them to a list of dictionaries. report if any output files are missing
- convert outputs to a numpy array of (number of samples, number of outputs)

In [None]:
outputs = []
num_sim = 1000
no_sim_result_list = []
# Loop through the expected range of files
for i in range(num_sim):
    filename = f"all_outputs_{i}.csv"
    file_path = os.path.join(out_path, filename)
    
    if os.path.exists(file_path):  # Check if the file exists
        try:
            # Read the DataFrame and append it to the list
            df = pd.read_csv(file_path)
            outputs.append(df)
        except Exception as e:
            print(f"Error reading {file_path}: {e}")
            outputs.append(False)  # Append False if there's an error
    else:
        no_sim_result_list.append(i)
        print(f"File {file_path} doesn't exist")
        outputs.append(False)  # Append False if the file doesn't exist

outs = np.array([df.max().values - df.min().values for df in outputs])
print(np.array(outs).shape)
print(np.array(outs).dtype)


##### Update the parameters to drop the parameter sets where the output is missing

In [None]:
updated_parameters = parameters.drop(no_sim_result_list)
updated_parameters= updated_parameters.to_numpy()
updated_parameters.shape

In [None]:
preprocessing_methods = [
    {"name": "PCA", "params": {"reduced_dim": 8}},
    {
        "name": "VAE",
        "params": {"reduced_dim": 3, "hidden_layers": [64, 32], "epochs": 100},
    },]
#    {"name": "None", "params": {}},]



#preprocessing_methods = [{"name": "PCA", "params": {"n_components": 8}}]

##### Setup autoemulate 
- here we are choosing to preprocess the outputs in order to reduce the dimentionality


In [None]:
from autoemulate.compare import AutoEmulate
em = AutoEmulate()
em.setup(updated_parameters, outs, models=["gp"])


In [None]:
best_model = em.compare()

In [None]:
best_model

In [None]:
em.preprocessing_results.keys()


In [None]:
em.summarise_cv(model='gp')

In [None]:
em.summarise_cv()  # Skip None models by filtering

In [None]:
em.evaluate(em.get_model()) 

In [None]:
em.plot_eval(em.get_model(), input_index=[0,1,2,3,4], output_index=[0,1,2])

In [None]:
em.plot_cv(style="residual_vs_predicted")

In [None]:
from ModularCirc.Models.NaghaviModel import NaghaviModel, NaghaviModelParameters, TEMPLATE_TIME_SETUP_DICT
from ModularCirc import BatchRunner
import os
br = BatchRunner('LHS', 0)
br.setup_sampler('parameters_01.json')
br.sample(1)
map_ = {
    'lv.t_tr' : ['lv.t_tr',],
    'la.t_tr' : ['la.t_tr',],
    'la.delay' : ['la.delay',],
    'lv.tau' : ['lv.tau',],
    'la.tau' : ['la.tau',],
    'lv.t_max' : ['lv.t_max',],
    'la.t_max' : ['la.t_max',],
}
br.map_sample_timings(
    ref_time=1000.,
    map=map_
    )
br.samples
br.samples.to_csv('samples_Naghavi.csv', index=False)
br.map_vessel_volume()
br._samples[['ao.v', 'art.v', 'ven.v']].describe().T
br.setup_model(model=NaghaviModel, po=NaghaviModelParameters, time_setup=TEMPLATE_TIME_SETUP_DICT)
test = br.run_batch(n_jobs=2)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import uniform
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

# Define our simulator
def simulator(params, noise=0.0):
    """
    A simple simulator that takes parameters and returns outputs.
    In real applications, this would be a complex numerical model.
    
    params: array of parameter values [x1, x2]
    noise: noise level to add to the output
    """
    x1, x2 = params
    # Nonlinear function as our simulator
    y = np.sin(x1) * np.cos(x2) + x1 * x2 / 5.0
    
    # Add some noise if specified
    if noise > 0:
        y += np.random.normal(0, noise)
    
    return y

# Generate "observed" historical data
# In a real application, this would be actual measurements
np.random.seed(42)
true_params = [2.5, 1.8]  # True parameter values that generated the historical data
observed_data = simulator(true_params, noise=0.2)
print(f"Observed historical data: {observed_data}")

# Define parameter space for sampling
param_ranges = [(0, 5), (0, 5)]  # Ranges for parameters x1 and x2

# Latin Hypercube Sampling to explore the parameter space
# For simplicity, we'll use random sampling instead
n_samples = 100
samples = np.array([
    [uniform.rvs(param_ranges[0][0], param_ranges[0][1]-param_ranges[0][0]) for _ in range(n_samples)],
    [uniform.rvs(param_ranges[1][0], param_ranges[1][1]-param_ranges[1][0]) for _ in range(n_samples)]
]).T

# Run the simulator for each sample
simulator_outputs = np.array([simulator(params) for params in samples])

# Calculate the implausibility metric
# This measures how far simulator outputs are from observed data
# normalized by the uncertainty
def implausibility(sim_output, obs_data, obs_error=0.2, model_error=0.1):
    """
    Calculate implausibility metric
    
    sim_output: simulator output
    obs_data: observed data
    obs_error: observation error standard deviation
    model_error: model discrepancy standard deviation
    """
    total_variance = obs_error**2 + model_error**2
    return np.abs(sim_output - obs_data) / np.sqrt(total_variance)

# Calculate implausibility for each sample
implausibility_scores = implausibility(simulator_outputs, observed_data)

# Build a Gaussian Process emulator
# This will help us predict simulator outputs for new parameter values
kernel = C(1.0) * RBF(length_scale=[1.0, 1.0])
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10)
gp.fit(samples, simulator_outputs)

# Function to predict simulator output using GP emulator
def emulate(params):
    """Use GP emulator to predict simulator output"""
    return gp.predict([params])[0]

# Function to calculate emulated implausibility
def emulated_implausibility(params, obs_data, obs_error=0.2, model_error=0.1, emulator_error=0.1):
    """Calculate implausibility using the emulator"""
    emulated_output = emulate(params)
    total_variance = obs_error**2 + model_error**2 + emulator_error**2
    return np.abs(emulated_output - obs_data) / np.sqrt(total_variance)

# Create a grid of parameter values to evaluate the emulator
n_grid = 50
x1_grid = np.linspace(param_ranges[0][0], param_ranges[0][1], n_grid)
x2_grid = np.linspace(param_ranges[1][0], param_ranges[1][1], n_grid)
X1, X2 = np.meshgrid(x1_grid, x2_grid)

# Calculate implausibility for each grid point
Z = np.zeros((n_grid, n_grid))
for i in range(n_grid):
    for j in range(n_grid):
        params = [X1[i, j], X2[i, j]]
        Z[i, j] = emulated_implausibility(params, observed_data)

# Define a cutoff for implausibility
# Points with implausibility below this are considered "not implausible"
implausibility_cutoff = 3.0

# Find the parameter sets that are not ruled out (not implausible)
not_implausible_mask = Z < implausibility_cutoff

# Plot the results
plt.figure(figsize=(12, 10))

# Plot the implausibility surface
plt.subplot(2, 2, 1)
plt.contourf(X1, X2, Z, levels=20, cmap='viridis_r')
plt.colorbar(label='Implausibility')
plt.scatter(samples[:, 0], samples[:, 1], c='white', s=10, alpha=0.5)
plt.scatter(true_params[0], true_params[1], c='red', s=50, marker='*')
plt.xlabel('Parameter x1')
plt.ylabel('Parameter x2')
plt.title('Implausibility Surface')

# Plot the not implausible region
plt.subplot(2, 2, 2)
plt.contourf(X1, X2, not_implausible_mask.astype(float), levels=1, cmap='Blues')
plt.scatter(samples[:, 0], samples[:, 1], c='black', s=10, alpha=0.5)
plt.scatter(true_params[0], true_params[1], c='red', s=50, marker='*')
plt.xlabel('Parameter x1')
plt.ylabel('Parameter x2')
plt.title('Not Implausible Region')

# Plot sample points colored by implausibility
plt.subplot(2, 2, 3)
plt.scatter(samples[:, 0], samples[:, 1], c=implausibility_scores, cmap='viridis_r')
plt.colorbar(label='Implausibility')
plt.scatter(true_params[0], true_params[1], c='red', s=50, marker='*')
plt.xlabel('Parameter x1')
plt.ylabel('Parameter x2')
plt.title('Sample Points')

# Plot emulator predictions vs simulator outputs
emulator_predictions = gp.predict(samples)
plt.subplot(2, 2, 4)
plt.scatter(simulator_outputs, emulator_predictions)
plt.plot([min(simulator_outputs), max(simulator_outputs)], 
         [min(simulator_outputs), max(simulator_outputs)], 'k--')
plt.xlabel('Simulator Output')
plt.ylabel('Emulator Prediction')
plt.title('Emulator vs Simulator')

plt.tight_layout()
plt.show()

# Find the parameter set with the lowest implausibility
best_idx = np.unravel_index(np.argmin(Z), Z.shape)
best_params = [X1[best_idx], X2[best_idx]]
best_implausibility = Z[best_idx]

print(f"True parameters: {true_params}")
print(f"Best matching parameters: {best_params}")
print(f"Best implausibility: {best_implausibility}")
print(f"Simulator output at best parameters: {simulator(best_params)}")
print(f"Observed data: {observed_data}")

# Perform a history matching wave
# In a real application, you would perform multiple waves
print("\nPerforming a second wave of history matching...")

# Use the not implausible points as the basis for a new wave
not_implausible_points = []
for i in range(n_grid):
    for j in range(n_grid):
        if not_implausible_mask[i, j]:
            not_implausible_points.append([X1[i, j], X2[i, j]])

# Sample from the not implausible region
if len(not_implausible_points) > 0:
    not_implausible_points = np.array(not_implausible_points)
    
    # Take a random sample from the not implausible points
    wave2_samples_idx = np.random.choice(len(not_implausible_points), 
                                         min(100, len(not_implausible_points)), 
                                         replace=False)
    wave2_samples = not_implausible_points[wave2_samples_idx]
    
    # Run the simulator for wave 2 samples
    wave2_outputs = np.array([simulator(params) for params in wave2_samples])
    
    # Build a new GP emulator for wave 2
    wave2_gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10)
    wave2_gp.fit(wave2_samples, wave2_outputs)
    
    # Calculate implausibility for wave 2
    wave2_implausibility = implausibility(wave2_outputs, observed_data)
    
    # Find the parameter set with the lowest implausibility in wave 2
    best_wave2_idx = np.argmin(wave2_implausibility)
    best_wave2_params = wave2_samples[best_wave2_idx]
    best_wave2_implausibility = wave2_implausibility[best_wave2_idx]
    
    print(f"Wave 2 best parameters: {best_wave2_params}")
    print(f"Wave 2 best implausibility: {best_wave2_implausibility}")
    print(f"Wave 2 simulator output: {simulator(best_wave2_params)}")
    print(f"Observed data: {observed_data}")
    
    # Plot wave 2 results
    plt.figure(figsize=(10, 8))
    plt.scatter(wave2_samples[:, 0], wave2_samples[:, 1], c=wave2_implausibility, cmap='viridis_r')
    plt.colorbar(label='Implausibility')
    plt.scatter(true_params[0], true_params[1], c='red', s=100, marker='*', label='True Parameters')
    plt.scatter(best_wave2_params[0], best_wave2_params[1], c='green', s=100, marker='x', label='Best Match')
    plt.xlabel('Parameter x1')
    plt.ylabel('Parameter x2')
    plt.title('Wave 2 History Matching')
    plt.legend()
    plt.show()
else:
    print("No not implausible points found for wave 2")