In [1]:
import os
import pandas as pd
from autoemulate.core.compare import AutoEmulate
import torch
figsize = (9, 5)
import numpy as np

In [2]:
param_filename = 'parameters_naghavi_constrained_fixed_T_v_tot_v_ref_lower_k_pas_further'

# Load the simulations summary stats to get feature names
summary_stats = pd.read_csv(os.path.join(f'../../outputs/simulations_for_sa/n_samples_16_n_evals_480_{param_filename}/simulations_summary.csv'))
all_features = summary_stats.columns.to_list()

n_samples     = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096]
n_model_evals = [480, 960, 1920, 3840, 7680, 15360, 30720, 61440, 122880]

In [3]:
# Create the save directory
save_dir = f'../../outputs/sa_results/aggregated_dfs/{param_filename}'
os.makedirs(save_dir, exist_ok=True)

In [4]:
# Create an empty df to append loaded df's to
combined_sobol_df = pd.DataFrame()

for i_output_feature in all_features:

    print(f'Plotting for {i_output_feature}')

    for i_n_samples, i_n_model_evals in zip(n_samples, n_model_evals):

        print(f'Loading simulations for {i_n_samples} samples and {i_n_model_evals} model evaluations')

        sa_results_path = f'../../outputs/sa_results/n_samples_{i_n_samples}_n_evals_{i_n_model_evals}_{param_filename}/'

        # Load the sobol_df
        sobol_df = pd.read_csv(os.path.join(sa_results_path, 
                                            i_output_feature, 
                                            'simulations',
                                            'sobol_df.csv'))
        
        sobol_df['n_saltelli_samples'] = i_n_samples
        sobol_df['n_model_evals'] = i_n_model_evals
        sobol_df['data_type'] = 'simulations'
        sobol_df['n_simulations_trained_on'] = np.nan
        sobol_df['emulator'] = 'N/A'

        # Append to combined_sobol_df
        combined_sobol_df = pd.concat([combined_sobol_df, sobol_df], ignore_index=True)

        # Now load sobol_df for emulators trained on different number of simulations
        # Get names of all subfolders with "emulations_trained_on_" in the name
        emulation_folders = [d for d in os.listdir(os.path.join(sa_results_path, i_output_feature)) if "emulations_trained_on_" in d]

        for i_emu_folder in emulation_folders:

            # Get the number of sims the emulator was trained on from the folder name
            n_sims_trained_on = int(i_emu_folder.split('_')[-2])

            print(f'Loading emulations for {i_n_samples} samples and {i_n_model_evals} model evaluations, trained on {n_sims_trained_on} simulations')

            # Load the sobol_df
            sobol_df = pd.read_csv(os.path.join(sa_results_path, i_output_feature, i_emu_folder, 'sobol_df.csv'))

            sobol_df['n_saltelli_samples'] = i_n_samples
            sobol_df['n_model_evals'] = i_n_model_evals
            sobol_df['data_type'] = 'emulations'
            sobol_df['n_simulations_trained_on'] = n_sims_trained_on
            sobol_df['emulator'] = 'GP'

            # Append to combined_sobol_df
            combined_sobol_df = pd.concat([combined_sobol_df, sobol_df], ignore_index=True)

# Reset index
combined_sobol_df.reset_index(drop=True, inplace=True)

# Save combined_sobol_df to a CSV file
combined_sobol_df.to_csv(os.path.join(save_dir, 'combined_sobol_df.csv'), index=False)


Plotting for v_ao_mean
Loading simulations for 16 samples and 480 model evaluations
Loading emulations for 16 samples and 480 model evaluations, trained on 16 simulations
Loading emulations for 16 samples and 480 model evaluations, trained on 128 simulations
Loading emulations for 16 samples and 480 model evaluations, trained on 32 simulations
Loading emulations for 16 samples and 480 model evaluations, trained on 512 simulations
Loading emulations for 16 samples and 480 model evaluations, trained on 1024 simulations
Loading emulations for 16 samples and 480 model evaluations, trained on 64 simulations
Loading emulations for 16 samples and 480 model evaluations, trained on 256 simulations
Loading simulations for 32 samples and 960 model evaluations
Loading emulations for 32 samples and 960 model evaluations, trained on 16 simulations
Loading emulations for 32 samples and 960 model evaluations, trained on 128 simulations
Loading emulations for 32 samples and 960 model evaluations, train