This notebook will do sensitivity analysis of emulations but without using autoemulate, just using SALib functions

In [None]:
import os
import pandas as pd
from autoemulate.core.compare import AutoEmulate
from autoemulate.core.sensitivity_analysis import SensitivityAnalysis
from ModularCirc import BatchRunner
import torch
from autoemulate.core.save import ModelSerialiser
from autoemulate.core.logging_config import get_configured_logger
import matplotlib.pyplot as plt

from SALib.analyze.sobol import analyze
from SALib.sample import saltelli

from comparative_gsa.utils import helpers
figsize = (9, 5)

In [None]:
param_filename = 'parameters_naghavi_constrained_fixed_T_v_tot_v_ref_lower_k_pas'

n_samples = 2048

simulation_out_path = f'../outputs/simulations/output_{n_samples}_samples_{param_filename}/'

parameters_json_file = os.path.join(simulation_out_path, 'parameters.json')

# Load the simulation input parameters
input_params = pd.read_csv(os.path.join(simulation_out_path, f'input_samples_{n_samples}.csv'))

# Load the summary statistics
summary_stats = pd.read_csv(os.path.join(simulation_out_path, f'simulations_summary.csv'))

output_to_emulate = 'p_la_max'

emulators_path = os.path.join(simulation_out_path, 'emulators', output_to_emulate)

In [None]:
# Use ModularCirc's BatchRunner to condense parameter names. This requires setting up the sampler first.
br = BatchRunner()

br.setup_sampler(parameters_json_file)

br._parameters_2_sample

In [None]:
# Get column names for parameters up to 'lv.k_pas'
parameter_names = list(input_params.columns[:input_params.columns.get_loc('lv.k_pas') + 1])

In [None]:
# Extract parameter names and bounds from the dictionary
parameters_range = br._parameters_2_sample

parameter_names = list(parameters_range.keys())
parameter_bounds = list(parameters_range.values())

parameter_bounds

In [None]:
input_params[parameter_names].describe().loc[['min', 'max']].T.values

In [None]:
problem = {
    'num_vars': len(parameter_names),
    'names': parameter_names,
    'bounds': input_params[parameter_names].describe().loc[['min', 'max']].T.values,
}
ae_results = helpers.ae_load_result(os.path.join(emulators_path, 'MLP_0_20250820_162654'))

In [None]:
param_values = saltelli.sample(problem, 4096*2*2*2*2, calc_second_order=True)
param_values.shape

In [None]:
param_values = pd.DataFrame(param_values, columns=parameter_names)

In [None]:
param_values.describe().loc[['min', 'max']].T.values

In [None]:
# Instead, load the pre-existing problem definition and saltelli samples
problem_path = '../outputs/simulations/output_245760_samples_parameters_naghavi_constrained_fixed_T_v_tot_v_ref_lower_k_pas/problem.pkl'

# Load the problem pickle file
import pickle
import pandas as pd
with open(problem_path, 'rb') as f:
    problem = pickle.load(f)

saltelli_samples_path = '../outputs/simulations/output_245760_samples_parameters_naghavi_constrained_fixed_T_v_tot_v_ref_lower_k_pas/saltelli_samples.csv'
param_values = pd.read_csv(saltelli_samples_path)


In [None]:
problem

In [None]:
# Turn x into a pytorch tensor
x = torch.tensor(param_values.values, dtype=torch.float32)

In [None]:
# Use the input file to predict with the emulator
y_pred = ae_results.model.predict(x)

# Turning the y_pred into numpy
y_pred_np = y_pred.detach().cpu().numpy().squeeze()

In [None]:
# Do the sobol_analyse for GSA
sobol_indices = analyze(problem, y_pred_np, calc_second_order=True)

In [None]:
from autoemulate.core.sensitivity_analysis import SensitivityAnalysis
from autoemulate.core.sensitivity_analysis import _sobol_results_to_df 

In [None]:
results = {
    output_to_emulate: sobol_indices
}

In [None]:
sobol_df = _sobol_results_to_df(results)

In [None]:
# Due to a bug in autoemulate plotting, we must swap ST and S1 rows.

# Get the indices of rows where index == 'ST'
mask_st = sobol_df['index'] == 'ST'
mask_s1 = sobol_df['index'] == 'S1'

# For those rows, change the index to be 'S1'
sobol_df.loc[mask_st, 'index'] = 'S1'

# For those rows, change the index to be 'ST'
sobol_df.loc[mask_s1, 'index'] = 'ST'

In [None]:
figsize = (9, 5)

SensitivityAnalysis.plot_sobol(sobol_df, index="S1", figsize=figsize) 

In [None]:
figsize = (9, 5)

SensitivityAnalysis.plot_sobol(sobol_df, index="ST", figsize=figsize) 

In [None]:
# Plot a histogram of y_pred_np
plt.hist(y_pred_np, bins=1000, alpha=0.7)
plt.xlabel('Predicted Values')
plt.ylabel('Frequency')
plt.title('Histogram of Predicted Values')
plt.show()

In [None]:
fig, axes = plt.subplots(nrows=4, ncols=4, figsize=(20, 18))
axes = axes.flatten()

for i, param_name in enumerate(param_values.columns):
    axes[i].scatter(y_pred_np, param_values[param_name], alpha=0.1, s=0.1)
    axes[i].set_xlabel(f'{output_to_emulate} emulated')
    axes[i].set_ylabel(param_name)
    axes[i].set_title(param_name)

# Add a super title
plt.suptitle(f'{output_to_emulate} corr with parameters', fontsize=16)

plt.tight_layout()
plt.show()
