In [None]:
from gsa_framework.test_functions import Morris4
from gsa_framework.methods.correlations import CorrelationCoefficients
from gsa_framework.methods.saltelli_sobol import SaltelliSobol
from gsa_framework.methods.gradient_boosting import GradientBoosting
from gsa_framework.methods.delta_moment import DeltaMoment
from gsa_framework.validation import Validation
from gsa_framework.convergence import Convergence
from pathlib import Path
import time

if __name__ == "__main__":

    path_base = Path('/data/user/kim_a/paper_gsa/')

    # 1. Models
    num_params = 1000
    num_influential = num_params // 100
    iterations_validation = 2000
    write_dir = path_base / "{}_morris4".format(num_params)
    model = Morris4(num_params=num_params, num_influential=num_influential)
    gsa_seed = 3407
    validation_seed = 7043
    num_influential_validation = num_influential

    fig_format = ["pickle"]  # can have elements "pdf", "html", "pickle"

    # TODO Choose which GSA to perform
    flag_sobol = 0
    flag_correlation = 0
    flag_xgboost = 0
    flag_delta = 1

    if flag_sobol:
        iterations = 100 * num_params
        gsa = SaltelliSobol(iterations=iterations, model=model, write_dir=write_dir)
        # S_dict = gsa.generate_gsa_indices()
        S_dict = gsa.perform_gsa()
        first = S_dict["First order"]
        total = S_dict["Total order"]
#         gsa.plot_sa_results(
#             S_dict,
#             S_dict_analytical=model.S_dict_analytical,
#             fig_format=fig_format,
#         )

        t0 = time.time()
        val = Validation(
            model=model,
            iterations=iterations_validation,
            seed=validation_seed,
            default_x_rescaled=None,
            write_dir=write_dir,
        )
        tag = "TotalIndex"
        influential_Y = val.get_influential_Y_from_gsa(total, num_influential_validation, tag=tag)
        t1 = time.time()
        print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
        val.plot_histogram_Y_all_Y_inf(
            influential_Y, num_influential_validation, tag=tag, fig_format=fig_format
        )
        
#         conv = Convergence(
#             gsa.filepath_Y,
#             gsa.num_params,
#             gsa.generate_gsa_indices,
#             gsa.gsa_label,
#             write_dir,
#             num_steps=100,
#         )
#         conv.run_convergence(parameter_inds=parameter_inds, fig_format=fig_format)

    if flag_correlation:
        iterations = 4 * num_params
        gsa = CorrelationCoefficients(
            iterations=iterations,
            model=model,
            write_dir=write_dir,
            seed=gsa_seed,
        )
        S_dict = gsa.perform_gsa()
        pearson = S_dict["pearson"]
        spearman = S_dict["spearman"]
#         gsa.plot_sa_results(S_dict, S_boolean=model.S_boolean, fig_format=fig_format)

        t0 = time.time()
        val = Validation(
            model=model,
            iterations=iterations_validation,
            seed=validation_seed,
            default_x_rescaled=None,
            write_dir=write_dir,
        )
        tag = "SpearmanIndex"
        influential_Y = val.get_influential_Y_from_gsa(
            spearman, num_influential_validation, tag=tag
        )
        t1 = time.time()
        print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
        val.plot_histogram_Y_all_Y_inf(
            influential_Y, num_influential_validation, tag=tag, fig_format=fig_format
        )

#         conv = Convergence(
#             gsa.filepath_Y,
#             gsa.num_params,
#             gsa.generate_gsa_indices,
#             gsa.gsa_label,
#             write_dir,
#             num_steps=100,
#         )
#         conv.run_convergence(
#             parameter_inds=parameter_inds,
#             fig_format=fig_format,
#         )

    if flag_delta:
        iterations = 4 * num_params
        num_resamples = 1
        gsa = DeltaMoment(
            iterations=iterations,
            model=model,
            write_dir=write_dir,
            num_resamples=num_resamples,
            seed=gsa_seed,
        )
        S_dict = gsa.perform_gsa()
        S_dict.pop('delta_conf')
        gsa.plot_sa_results(
            S_dict,
            S_boolean=model.S_boolean,
            fig_format=fig_format,
        )
        

    if flag_xgboost:
        if num_params == 1000:
            num_boost_round = 300
            tuning_parameters = {
                "max_depth": 2,  # higher than 10 is definitely not good
                "eta": 0.25,
                "objective": "reg:squarederror",
                "n_jobs": -1,
                "refresh_leaf": True,
                "subsample": 0.35,
                "min_child_weight": 0.5,
            }
        elif num_params == 5000:
            num_boost_round = 300
            tuning_parameters = {
                "max_depth": 2,  # higher than 10 is definitely not good
                "eta": 0.25,
                "objective": "reg:squarederror",
                "n_jobs": -1,
                "refresh_leaf": True,
                "subsample": 0.65,
                "min_child_weight": 0.5,
            }
        elif num_params == 10000:
            num_boost_round = 300
            tuning_parameters = {
                "max_depth": 2,  # higher than 10 is definitely not good
                "eta": 0.25,
                "objective": "reg:squarederror",
                "n_jobs": -1,
                "refresh_leaf": True,
                "subsample": 0.65,
                "min_child_weight": 0.5,
            }
        iterations = 10000
        gsa = GradientBoosting(
            iterations=iterations,
            model=model,
            write_dir=write_dir,
            seed=gsa_seed,
            tuning_parameters=tuning_parameters,
            num_boost_round=num_boost_round,
            xgb_model=None,
        )
        S_dict, r2, ev = gsa.perform_gsa(flag_save_S_dict=True, return_stats=True)
        print(r2, ev)
        # fscores = S_dict["fscores"]
        # gsa.plot_sa_results(
        #     S_dict,
        #     S_boolean=model.S_boolean,
        #     fig_format=fig_format,
        # )
        #
        # t0 = time.time()
        # val = Validation(
        #     model=model,
        #     iterations=iterations_validation,
        #     seed=validation_seed,
        #     default_x_rescaled=None,
        #     write_dir=write_dir,
        # )
        # tag = "FscoresIndex"
        # influential_Y = val.get_influential_Y_from_gsa(
        #     fscores, num_influential_validation, tag=tag
        # )
        # t1 = time.time()
        # print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
        # val.plot_histogram_Y_all_Y_inf(
        #     influential_Y, num_influential_validation, tag=tag, fig_format=fig_format
        # )

        # conv = Convergence(
        #     gsa.filepath_Y,
        #     gsa.num_params,
        #     gsa.generate_gsa_indices,
        #     gsa.gsa_label,
        #     write_dir,
        #     num_steps=100,
        # )
        # conv.run_convergence(
        #     parameter_inds=parameter_inds,
        #     fig_format=fig_format,
        # )

# Stability

In [4]:
from setups_paper_gwp import *
from copy import deepcopy
from gsa_framework.sensitivity_analysis.correlations import corrcoef_parallel_stability_spearman
from gsa_framework.test_functions import Morris4

In [5]:
path_base = Path('/data/user/kim_a/paper_gsa/')
# read X and Y
num_params = 10000
num_influential = num_params // 100
write_dir = path_base / "{}_morris4".format(num_params)
model = Morris4(num_params=num_params, num_influential=num_influential)
gsa_seed = 3407
fig_format = ["pickle"]  # can have elements "pdf", "html", "pickle"

iter_corr = 4*num_params
gsa = CorrelationCoefficients(
    iterations=iter_corr,
    model=model,
    write_dir=write_dir,
    seed=gsa_seed,
)

X_rescaled = read_hdf5_array(gsa.filepath_X_rescaled)
Y = read_hdf5_array(gsa.filepath_Y).flatten()

num_steps = 50
num_bootstrap = 60

# Convergence class
conv = Convergence(
    gsa.filepath_Y,
    gsa.num_params,
    gsa.generate_gsa_indices,
    gsa.gsa_label,
    gsa.write_dir,
    num_steps=num_steps,
)

write_dir_stability = gsa.write_dir / 'stability_intermediate_{}'.format(gsa.gsa_label)
write_dir_stability.mkdir(parents=True, exist_ok=True)
# Generate random seeds
np.random.seed(gsa.seed)
stability_seeds = np.random.randint(
    low=0,
    high=2147483647,
    size=(len(conv.iterations_for_convergence), num_bootstrap),
)

In [6]:
%%time
filename_S = "stability.S.{}.{}.{}Step{}.{}.{}.pickle".format(
    gsa.gsa_label, gsa.sampling_label, gsa.iterations, conv.iterations_step, num_bootstrap, gsa.seed,
)
filepath_S = gsa.write_dir / "arrays" / filename_S
if filepath_S.exists():
    print("--> {} already exists".format(filename_S))
    S_dict_stability = read_pickle(filepath_S)
else:
    S_dict_stability = {}
    for i,iterations_current in enumerate(conv.iterations_for_convergence):
        S_array = np.zeros([0,num_params])
        print("{}".format(iterations_current))
        filename_S_current = "S.{}Step{}.{}.{}.pickle".format(iterations_current,conv.iterations_step,num_bootstrap,gsa.seed)
        filepath_S_current = write_dir_stability / filename_S_current
        if filepath_S_current.exists():
            print("--> {} already exists".format(filename_S_current))
            S_dict = read_pickle(filepath_S_current)
        else:
            for j in range(num_bootstrap):
                stability_seed = stability_seeds[i,j]
                np.random.seed(stability_seed)
                choice = np.random.choice(np.arange(gsa.iterations), iterations_current, replace=False)
                Y_current = Y[choice]
                X_current = X_rescaled[choice,:]
                S_current = corrcoef_parallel_stability_spearman(Y_current, X_current)['spearman']
                S_array = np.vstack([S_array, S_current])
            S_dict = {iterations_current: {"spearman": S_array}}
            write_pickle(S_dict, filepath_S_current)
        S_dict_stability.update(S_dict)
    write_pickle(S_dict_stability, filepath_S)



800
1600
2400
3200
4000
4800
5600
6400
7200
8000
8800
9600
10400
11200
12000
12800
13600
14400
15200
16000
16800
17600
18400
19200
20000
20800
21600
22400
23200
24000
24800
25600
26400
27200
28000
28800
29600
30400
31200
32000
32800
33600
34400
35200
36000
36800
37600
38400
39200
CPU times: user 3h 7min 31s, sys: 2h 7min 5s, total: 5h 14min 37s
Wall time: 9h 17min 49s


In [7]:
from setups_paper_gwp import *
from copy import deepcopy
from gsa_framework.sensitivity_analysis.correlations import corrcoef_parallel_stability_spearman
from gsa_framework.test_functions import Morris4

In [8]:
path_base = Path('/data/user/kim_a/paper_gsa/')
# read X and Y
num_params = 1000
num_influential = num_params // 100
write_dir = path_base / "{}_morris4".format(num_params)
model = Morris4(num_params=num_params, num_influential=num_influential)
gsa_seed = 3407
fig_format = ["pickle"]  # can have elements "pdf", "html", "pickle"

iter_corr = 4*num_params
gsa = CorrelationCoefficients(
    iterations=iter_corr,
    model=model,
    write_dir=write_dir,
    seed=gsa_seed,
)

X_rescaled = read_hdf5_array(gsa.filepath_X_rescaled)
Y = read_hdf5_array(gsa.filepath_Y).flatten()

num_steps = 50
num_bootstrap = 60

# Convergence class
conv = Convergence(
    gsa.filepath_Y,
    gsa.num_params,
    gsa.generate_gsa_indices,
    gsa.gsa_label,
    gsa.write_dir,
    num_steps=num_steps,
)

write_dir_stability = gsa.write_dir / 'stability_intermediate_{}'.format(gsa.gsa_label)
write_dir_stability.mkdir(parents=True, exist_ok=True)
# Generate random seeds
np.random.seed(gsa.seed)
stability_seeds = np.random.randint(
    low=0,
    high=2147483647,
    size=(len(conv.iterations_for_convergence), num_bootstrap),
)

In [9]:
%%time
filename_S = "stability.S.{}.{}.{}Step{}.{}.{}.pickle".format(
    gsa.gsa_label, gsa.sampling_label, gsa.iterations, conv.iterations_step, num_bootstrap, gsa.seed,
)
filepath_S = gsa.write_dir / "arrays" / filename_S
if filepath_S.exists():
    print("--> {} already exists".format(filename_S))
    S_dict_stability = read_pickle(filepath_S)
else:
    S_dict_stability = {}
    for i,iterations_current in enumerate(conv.iterations_for_convergence):
        S_array = np.zeros([0,num_params])
        print("{}".format(iterations_current))
        filename_S_current = "S.{}Step{}.{}.{}.pickle".format(iterations_current,conv.iterations_step,num_bootstrap,gsa.seed)
        filepath_S_current = write_dir_stability / filename_S_current
        if filepath_S_current.exists():
            print("--> {} already exists".format(filename_S_current))
            S_dict = read_pickle(filepath_S_current)
        else:
            for j in range(num_bootstrap):
                stability_seed = stability_seeds[i,j]
                np.random.seed(stability_seed)
                choice = np.random.choice(np.arange(gsa.iterations), iterations_current, replace=False)
                Y_current = Y[choice]
                X_current = X_rescaled[choice,:]
                S_current = corrcoef_parallel_stability_spearman(Y_current, X_current)['spearman']
                S_array = np.vstack([S_array, S_current])
            S_dict = {iterations_current: {"spearman": S_array}}
            write_pickle(S_dict, filepath_S_current)
        S_dict_stability.update(S_dict)
    write_pickle(S_dict_stability, filepath_S)



80
160
240
320
400
480
560
640
720
800
880
960
1040
1120
1200
1280
1360
1440
1520
1600
1680
1760
1840
1920
2000
2080
2160
2240
2320
2400
2480
2560
2640
2720
2800
2880
2960
3040
3120
3200
3280
3360
3440
3520
3600
3680
3760
3840
3920
CPU times: user 2min 52s, sys: 7min 6s, total: 9min 59s
Wall time: 25min 48s


In [10]:
from setups_paper_gwp import *
from copy import deepcopy
from gsa_framework.sensitivity_analysis.correlations import corrcoef_parallel_stability_spearman
from gsa_framework.test_functions import Morris4

In [11]:
path_base = Path('/data/user/kim_a/paper_gsa/')
# read X and Y
num_params = 5000
num_influential = num_params // 100
write_dir = path_base / "{}_morris4".format(num_params)
model = Morris4(num_params=num_params, num_influential=num_influential)
gsa_seed = 3407
fig_format = ["pickle"]  # can have elements "pdf", "html", "pickle"

iter_corr = 4*num_params
gsa = CorrelationCoefficients(
    iterations=iter_corr,
    model=model,
    write_dir=write_dir,
    seed=gsa_seed,
)

X_rescaled = read_hdf5_array(gsa.filepath_X_rescaled)
Y = read_hdf5_array(gsa.filepath_Y).flatten()

num_steps = 50
num_bootstrap = 60

# Convergence class
conv = Convergence(
    gsa.filepath_Y,
    gsa.num_params,
    gsa.generate_gsa_indices,
    gsa.gsa_label,
    gsa.write_dir,
    num_steps=num_steps,
)

write_dir_stability = gsa.write_dir / 'stability_intermediate_{}'.format(gsa.gsa_label)
write_dir_stability.mkdir(parents=True, exist_ok=True)
# Generate random seeds
np.random.seed(gsa.seed)
stability_seeds = np.random.randint(
    low=0,
    high=2147483647,
    size=(len(conv.iterations_for_convergence), num_bootstrap),
)

In [None]:
%%time
filename_S = "stability.S.{}.{}.{}Step{}.{}.{}.pickle".format(
    gsa.gsa_label, gsa.sampling_label, gsa.iterations, conv.iterations_step, num_bootstrap, gsa.seed,
)
filepath_S = gsa.write_dir / "arrays" / filename_S
if filepath_S.exists():
    print("--> {} already exists".format(filename_S))
    S_dict_stability = read_pickle(filepath_S)
else:
    S_dict_stability = {}
    for i,iterations_current in enumerate(conv.iterations_for_convergence):
        S_array = np.zeros([0,num_params])
        print("{}".format(iterations_current))
        filename_S_current = "S.{}Step{}.{}.{}.pickle".format(iterations_current,conv.iterations_step,num_bootstrap,gsa.seed)
        filepath_S_current = write_dir_stability / filename_S_current
        if filepath_S_current.exists():
            print("--> {} already exists".format(filename_S_current))
            S_dict = read_pickle(filepath_S_current)
        else:
            for j in range(num_bootstrap):
                stability_seed = stability_seeds[i,j]
                np.random.seed(stability_seed)
                choice = np.random.choice(np.arange(gsa.iterations), iterations_current, replace=False)
                Y_current = Y[choice]
                X_current = X_rescaled[choice,:]
                S_current = corrcoef_parallel_stability_spearman(Y_current, X_current)['spearman']
                S_array = np.vstack([S_array, S_current])
            S_dict = {iterations_current: {"spearman": S_array}}
            write_pickle(S_dict, filepath_S_current)
        S_dict_stability.update(S_dict)
    write_pickle(S_dict_stability, filepath_S)



400
800
1200
1600
2000
2400
2800
3200
3600
4000
4400
4800
