# Sobol indices

In [None]:
from pathlib import Path
from copy import deepcopy

from gsa_framework.sensitivity_methods.saltelli_sobol import sobol_indices_stability
from gsa_framework.convergence_robustness_validation.convergence import Convergence
from gsa_framework.utils import read_hdf5_array, read_pickle, write_pickle
from setups_paper_gwp import *

In [None]:
path_base = Path('/data/user/kim_a/paper_gsa/')

if __name__ == "__main__":    

    # Sobol stability dictionaries
    num_params = 10000
    iterations = 100 * num_params
    num_steps = 50
    num_bootstrap = 120

    gsa = setup_salt(num_params, iterations, setup_morris4_model, path_base)

    # Convergence class
    conv = Convergence(
        gsa.filepath_Y,
        gsa.num_params,
        gsa.generate_gsa_indices,
        gsa.gsa_label,
        gsa.write_dir,
        num_steps=num_steps,
    )
    np.random.seed(gsa.seed)
    stability_seeds = np.random.randint(
        low=0,
        high=2147483647,
        size=(len(conv.iterations_for_convergence), num_bootstrap),
    )

    filename_S = "stability.S.{}.{}.{}Step{}.{}.{}.pickle".format(
        gsa.gsa_label,
        gsa.sampling_label,
        gsa.iterations,
        conv.iterations_step,
        num_bootstrap,
        gsa.seed,
    )
    filepath_S = gsa.write_dir / "arrays" / filename_S
    if filepath_S.exists():
        print("--> {} already exists".format(filename_S))
        S_dict_stability = read_pickle(filepath_S)
    else:
        Y = read_hdf5_array(gsa.filepath_Y).flatten()
        S_dict_stability = sobol_indices_stability(
            Y,
            num_params,
            conv.iterations_for_convergence,
            num_bootstrap,
            stability_seeds,
        )
        write_pickle(S_dict_stability, filepath_S)

# Assemble multiple stability_dicts

## 1. Morris4

In [None]:
import numpy as np
from pathlib import Path
from copy import deepcopy

from gsa_framework.utils import read_hdf5_array, read_pickle, write_pickle

In [None]:
def create_stability_dict_from_seeds(seeds, model_dir_array, filenames_stability_dict):
    stability_dict_all = {}
    for gsa_method, path in filenames_stability_dict.items():
        stability_dict_raw = {}
        steps_temp = {}
        for seed in seeds:
            filepath = model_dir_array / path.replace('seed', str(seed))
            data = read_pickle(filepath)
            steps_temp[seed] = list(data.keys())
            stability_dict_raw[seed] = data
        steps = np.array(list(steps_temp.values()))
        assert (steps == steps[0]).all() # make sure that steps are the same
        steps = steps[0]
        # Init stability_dict
        stability_dict = {}
        for step in steps:
            for seed in seeds:
                stability_dict[step] = {}
                for stat_name,data in stability_dict_raw[seed][step].items():
                    stability_dict[step][stat_name] = np.zeros([0,data.shape[1]])
        # Concatenate data in stability_dict
        for step in steps:
            for seed in seeds:
                for stat_name,data in stability_dict_raw[seed][step].items():
                    stability_dict[step][stat_name] = np.vstack(
                        [
                            stability_dict[step][stat_name],
                            stability_dict_raw[seed][step][stat_name],
                        ]
                    )
        stability_dict_all[gsa_method] = stability_dict
    return stability_dict_all

def get_stability_dict_all_seeds(seeds, model_dir_array, filenames_stability_dict):
    # Get data
    str_seed = ''
    for seed in seeds:
        if len(str_seed) == 0:
            str_seed = "{}".format(seed)
        else:
            str_seed += "_{}".format(seed)

    S_dict = {}
    for gsa_method, filename in filenames_stability_dict.items():
        filename_stability_concatenated = filename.replace('seed', str_seed)
        list_ = filename_stability_concatenated.split('.')
        list_[-3] = "{:d}".format(int(list_[-3])*len(seeds))
        filename_stability_concatenated = ".".join(list_)
        print(filename_stability_concatenated)
        filepath = model_dir_array / filename_stability_concatenated
        if filepath.exists():
            S_dict[gsa_method] = read_pickle(filepath)
        else:
            stability_dict_all = create_stability_dict_from_seeds(seeds, model_dir_array, filenames_stability_dict)
            S_dict_gsa = stability_dict_all[gsa_method]
            S_dict[gsa_method] = S_dict_gsa
            write_pickle(S_dict_gsa, filepath)
    return S_dict

In [None]:
# for Morris
num_params = 10000
path_base = Path('/data/user/kim_a/paper_gsa/')
model_dir_array = path_base / "{}_morris4".format(num_params) / "arrays" 
seeds = [3407, 6000814]
filenames_stability_dict_all_models = {
    1000: {
        'corr': "stability.S.correlationsGsa.randomSampling.4000Step80.60.seed.pickle",
        'delt': "stability.S.deltaGsaNr0.latinSampling.8000Step160.60.seed.pickle",
        "xgbo": "stability.S.xgboostGsa_Lr0.1G0Mcw30Md2RegL10RegA0Ne500Ss0.6Cbt0.3_.randomSampling.4000Step80.60.seed.pickle",  
    },
    5000: {
        'corr': "stability.S.correlationsGsa.randomSampling.20000Step400.60.seed.pickle",
        'delt': "stability.S.deltaGsaNr0.latinSampling.40000Step800.60.seed.pickle",
        'xgbo': "stability.S.xgboostGsa_Lr0.2G0Mcw300Md2RegL0RegA0Ne800Ss0.3Cbt0.3_.randomSampling.20000Step400.60.seed.pickle",
    },
    10000: {
        'corr': "stability.S.correlationsGsa.randomSampling.40000Step800.60.seed.pickle",
        'delt': "stability.S.deltaGsaNr0.latinSampling.80000Step1600.60.seed.pickle",
        'xgbo': "stability.S.xgboostGsa_Lr0.2G0Mcw600Md2RegL0RegA0Ne1500Ss0.2Cbt0.2_.randomSampling.40000Step800.60.seed.pickle",
    }
}

S_dict = get_stability_dict_all_seeds(seeds, model_dir_array, filenames_stability_dict_all_models[num_params])

In [None]:
S_dict['corr']

In [None]:
a = model_dir_array / "stability.S.correlationsGsa.randomSampling.4000Step80.60.3407.pickle"
aa = read_pickle(a)

In [None]:
b = model_dir_array / "stability.S.correlationsGsa.randomSampling.4000Step80.60.6000814.pickle"
bb = read_pickle(b)

In [None]:
itera = 3920
ai = aa[itera]['spearman']
bi = bb[itera]['spearman']

In [None]:
c = np.vstack([ai, bi])

In [None]:
c.shape

In [None]:
import scipy.stats as st
t,y = st.t.interval(0.95, 1, loc=np.mean(c), scale=st.sem(c))

In [None]:
max(t), max(y)