In [None]:
from gsa_framework.test_functions import Morris
from gsa_framework.methods.correlations import CorrelationCoefficients
from gsa_framework.methods.extended_FAST import eFAST
from gsa_framework.methods.saltelli_sobol import SaltelliSobol
from gsa_framework.methods.gradient_boosting import GradientBoosting
from gsa_framework.methods.delta_moment import DeltaMoment
from gsa_framework.validation import Validation
from gsa_framework.convergence import Convergence
from pathlib import Path
import time

if __name__ == "__main__":

#     path_base = Path(
#         "/Users/akim/PycharmProjects/gsa_framework/dev/write_files/paper_gsa/"
#     )
    path_base = Path('/data/user/kim_a/paper_gsa/gsa_framework_files')

    # 1. Models
    num_params = 1000
    # num_influential = num_params // 100
    num_influential = 100
    iterations_validation = 2000
    if num_influential == num_params // 100:
        write_dir = path_base / "morris_model_{}_1percent".format(num_params)
    elif num_influential == 100:
        write_dir = path_base / "morris_model_{}_100".format(num_params)
    model = Morris(num_params=num_params, num_influential=num_influential)
    gsa_seed = 3407
    validation_seed = 7043

    fig_format = ["html", "pickle"]  # can have elements "pdf", "html", "pickle"

    num_params_correlation_plot = 10
    parameter_inds = list(range(num_params_correlation_plot)) + list(
        range(num_influential, num_influential + num_params_correlation_plot)
    )

    # TODO Choose which GSA to perform
    flag_sobol = 0
    flag_correlation = 0
    flag_eFAST = 0
    flag_xgboost = 1
    flag_delta = 0

    if flag_sobol:
        iterations = 100 * num_params
        gsa = SaltelliSobol(iterations=iterations, model=model, write_dir=write_dir)
        # S_dict = gsa.generate_gsa_indices()
        S_dict = gsa.perform_gsa()
        first = S_dict["First order"]
        total = S_dict["Total order"]
        # gsa.plot_sa_results(
        #     S_dict,
        #     S_dict_analytical=model.S_dict_analytical,
        #     fig_format=fig_format,
        # )

        # t0 = time.time()
        # val = Validation(
        #     model=model,
        #     iterations=iterations_validation,
        #     seed=validation_seed,
        #     default_x_rescaled=None,
        #     write_dir=write_dir,
        # )
        # tag = "SaltelliTotalIndex"
        # influential_Y = val.get_influential_Y_from_gsa(total, num_influential, tag=tag)
        # t1 = time.time()
        # print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
        # val.plot_histogram_Y_all_Y_inf(
        #     influential_Y, num_influential, tag=tag, fig_format=fig_format
        # )
        #
#         conv = Convergence(
#             gsa.filepath_Y,
#             gsa.num_params,
#             gsa.generate_gsa_indices,
#             gsa.gsa_label,
#             write_dir,
#             num_steps=100,
#         )
#         conv.run_convergence(parameter_inds=parameter_inds, fig_format=fig_format)

    if flag_correlation:
        iterations = 10000  # * num_params
        gsa = CorrelationCoefficients(
            iterations=iterations,
            model=model,
            write_dir=write_dir,
            seed=gsa_seed,
        )
        S_dict = gsa.perform_gsa()
#         pearson = S_dict["pearson"]
#         spearman = S_dict["spearman"]
        # gsa.plot_sa_results(S_dict, S_boolean=model.S_boolean, fig_format=fig_format)

        # t0 = time.time()
        # val = Validation(
        #     model=model,
        #     iterations=iterations_validation,
        #     seed=validation_seed,
        #     default_x_rescaled=None,
        #     write_dir=write_dir,
        # )
        # tag = "SpearmanIndex"
        # influential_Y = val.get_influential_Y_from_gsa(
        #     spearman, num_influential, tag=tag
        # )
        # t1 = time.time()
        # print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
        # val.plot_histogram_Y_all_Y_inf(
        #     influential_Y, num_influential, tag=tag, fig_format=fig_format
        # )

#         conv = Convergence(
#             gsa.filepath_Y,
#             gsa.num_params,
#             gsa.generate_gsa_indices,
#             gsa.gsa_label,
#             write_dir,
#             num_steps=100,
#         )
#         conv.run_convergence(
#             parameter_inds=parameter_inds,
#             fig_format=fig_format,
#         )

    if flag_eFAST:
        iterations = 100 * num_params
        M = 2
        gsa = eFAST(
            M=M, iterations=iterations, model=model, write_dir=write_dir, seed=gsa_seed
        )
        S_dict = gsa.perform_gsa()
        # S_dict = gsa.generate_gsa_indices()
        first = S_dict["First order"]
        total = S_dict["Total order"]
        # gsa.plot_sa_results(
        #     S_dict,
        #     S_dict_analytical=model.S_dict_analytical,
        #     fig_format=fig_format,
        # )

        # t0 = time.time()
        # val = Validation(
        #     model=model,
        #     iterations=iterations_validation,
        #     seed=validation_seed,
        #     default_x_rescaled=None,
        #     write_dir=write_dir,
        # )
        # tag = "eFastTotalIndex"
        # influential_Y = val.get_influential_Y_from_gsa(total, num_influential, tag=tag)
        # t1 = time.time()
        # print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
        # val.plot_histogram_Y_all_Y_inf(
        #     influential_Y, num_influential, tag=tag, fig_format=fig_format
        # )

#         conv = Convergence(
#             gsa.filepath_Y,
#             gsa.num_params,
#             gsa.generate_gsa_indices,
#             gsa.gsa_label,
#             write_dir,
#             num_steps=100,
#             M=M,
#         )
#         conv.run_convergence(
#             parameter_inds=parameter_inds,
#             fig_format=fig_format,
#         )

    if flag_xgboost:
        if num_params == 1000:
            num_boost_round = 300
            tuning_parameters = {
                "max_depth": 2,  # higher than 10 is definitely not good
                "eta": 0.25,
                "objective": "reg:squarederror",
                "n_jobs": -1,
                "refresh_leaf": True,
                "subsample": 0.35,
                "min_child_weight": 0.5,
            }
        elif num_params == 5000:
            num_boost_round = 300
            tuning_parameters = {
                "max_depth": 2,  # higher than 10 is definitely not good
                "eta": 0.25,
                "objective": "reg:squarederror",
                "n_jobs": -1,
                "refresh_leaf": True,
                "subsample": 0.65,
                "min_child_weight": 0.5,
            }
        elif num_params == 10000:
            num_boost_round = 300
            tuning_parameters = {
                "max_depth": 2,  # higher than 10 is definitely not good
                "eta": 0.25,
                "objective": "reg:squarederror",
                "n_jobs": -1,
                "refresh_leaf": True,
                "subsample": 0.65,
                "min_child_weight": 0.5,
            }
        iterations = 10000
        gsa = GradientBoosting(
            iterations=iterations,
            model=model,
            write_dir=write_dir,
            seed=gsa_seed,
            tuning_parameters=tuning_parameters,
            num_boost_round=num_boost_round,
            xgb_model=None,
        )
        S_dict, r2, ev = gsa.perform_gsa(flag_save_S_dict=True, return_stats=True)
        print(r2, ev)
        # fscores = S_dict["fscores"]
        # gsa.plot_sa_results(
        #     S_dict,
        #     S_boolean=model.S_boolean,
        #     fig_format=fig_format,
        # )
        #
        # t0 = time.time()
        # val = Validation(
        #     model=model,
        #     iterations=iterations_validation,
        #     seed=validation_seed,
        #     default_x_rescaled=None,
        #     write_dir=write_dir,
        # )
        # tag = "FscoresIndex"
        # influential_Y = val.get_influential_Y_from_gsa(
        #     fscores, num_influential, tag=tag
        # )
        # t1 = time.time()
        # print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
        # val.plot_histogram_Y_all_Y_inf(
        #     influential_Y, num_influential, tag=tag, fig_format=fig_format
        # )

        # conv = Convergence(
        #     gsa.filepath_Y,
        #     gsa.num_params,
        #     gsa.generate_gsa_indices,
        #     gsa.gsa_label,
        #     write_dir,
        #     num_steps=100,
        # )
        # conv.run_convergence(
        #     parameter_inds=parameter_inds,
        #     fig_format=fig_format,
        # )

    if flag_delta:
        iterations = 30000#2 * num_params
        num_resamples = 1
        gsa = DeltaMoment(
            iterations=iterations,
            model=model,
            write_dir=write_dir,
            num_resamples=num_resamples,
            seed=gsa_seed,
        )
        S_dict = gsa.perform_gsa()
#         delta = S_dict["delta"]
# #         gsa.plot_sa_results(
# #             S_dict,
# #             S_boolean=model.S_boolean,
# #             fig_format=fig_format,
# #         )
#         conv = Convergence(
#             gsa.filepath_Y,
#             gsa.num_params,
#             gsa.generate_gsa_indices,
#             gsa.gsa_label,
#             write_dir,
#             num_steps=25,
#         )
#         conv.run_convergence(parameter_inds=parameter_inds, fig_format=fig_format)


In [None]:
gsa.plot_sa_results(
    S_dict,
    S_boolean=model.S_boolean,
    fig_format=fig_format,
)

In [None]:
import multiprocessing 

In [None]:
multiprocessing.cpu_count()

# DASK

In [None]:
from dask.distributed import Client, LocalCluster
from dask_jobqueue import SLURMCluster
import os
import dask

In [None]:
which_pc = "merlin_paper_gsa"
if 'merlin' in which_pc:
    path_dask_logs = '/data/user/kim_a/dask_logs'
    if not os.path.exists(path_dask_logs):
        os.makedirs(path_dask_logs)
    cluster = SLURMCluster(cores     = 8,
                           processes = 3,
                           memory    ="90GB", 
                           walltime  = '20:00:00',
                           interface ='ib0',
                           local_directory = path_dask_logs,
                           log_directory   = path_dask_logs,
                           queue="daily",
                           ) 
elif 'local' in which_pc:
    cluster = LocalCluster(memory_limit='7GB') 

In [None]:
client = Client(cluster)

In [None]:
n_workers = 3
cluster.scale(n_workers)

In [None]:
client

In [None]:
# client.close()
# cluster.close()

In [None]:
from gsa_framework.test_functions import Morris
from gsa_framework.lca import LCAModel
from gsa_framework.methods.correlations import CorrelationCoefficients
from gsa_framework.methods.extended_FAST import eFAST
from gsa_framework.methods.saltelli_sobol import SaltelliSobol
from gsa_framework.methods.gradient_boosting import GradientBoosting
from gsa_framework.methods.delta_moment import DeltaMoment
from gsa_framework.validation import Validation
from gsa_framework.convergence import Convergence
from pathlib import Path
import time
import brightway2 as bw
import numpy as np
from gsa_framework.plotting import histogram_Y1_Y2
from gsa_framework.utils import read_hdf5_array


def compute_per_worker_delta(num_params):
    path_base = Path('/data/user/kim_a/paper_gsa/gsa_framework_files')
    # 1. Models
    # num_influential = num_params // 100
    num_influential = 100
    iterations_validation = 2000
    if num_influential == num_params // 100:
        write_dir = path_base / "morris_model_{}_1percent".format(num_params)
    elif num_influential == 100:
        write_dir = path_base / "morris_model_{}_100".format(num_params)
    model = Morris(num_params=num_params, num_influential=num_influential)
    gsa_seed = 3407
    validation_seed = 7043

    fig_format = ["html", "pickle"]  # can have elements "pdf", "html", "pickle"
    num_params_correlation_plot = 10
    parameter_inds = list(range(num_params_correlation_plot)) + list(
        range(num_influential, num_influential + num_params_correlation_plot)
    )
    iterations = 30000 #2 * num_params
    num_resamples = 1
    
    gsa = DeltaMoment(
        iterations=iterations,
        model=model,
        write_dir=write_dir,
        num_resamples=num_resamples,
        seed=gsa_seed,
    )
#     S_dict = gsa.perform_gsa()
#     delta = S_dict["delta"]
    conv = Convergence(
        gsa.filepath_Y,
        gsa.num_params,
        gsa.generate_gsa_indices,
        gsa.gsa_label,
        write_dir,
        num_steps=25,
    )
    conv.run_convergence(parameter_inds=parameter_inds, fig_format=fig_format)

In [None]:
def compute_per_worker_delta_lca():
    path_base = Path('/data/user/kim_a/paper_gsa/gsa_framework_files')
    # Define some variables
    num_params = 10000
    iterations = 20000
    num_resamples = 1
    # LCA model
    bw.projects.set_current("GSA for paper")
    co = bw.Database("CH consumption 1.0")
    act = [act for act in co if "Food" in act['name']][0]
    demand = {act: 1}
    method = ("IPCC 2013", "climate change", "GTP 100a")
    # Define some variables
    write_dir = path_base / "lca_model_{}".format(num_params)
    model = LCAModel(demand, method, write_dir, num_params=num_params)
    gsa_seed = 3403
    
    fig_format = ["html", "pickle"]  # can have elements "pdf", "html", "pickle"
    num_params_correlation_plot = 10
    parameter_inds = [0,1,2]
    
    # Setup GSA
    gsa = DeltaMoment(
        iterations=iterations,
        model=model,
        write_dir=write_dir,
        num_resamples=num_resamples,
        seed=gsa_seed,
    )
    conv = Convergence(
        gsa.filepath_Y,
        gsa.num_params,
        gsa.generate_gsa_indices,
        gsa.gsa_label,
        write_dir,
        num_steps=25,
    )
    conv.run_convergence(parameter_inds=parameter_inds, fig_format=fig_format)

In [None]:
def compute_per_worker_xgboost_lca():
    path_base = Path('/data/user/kim_a/paper_gsa/gsa_framework_files')

    # LCA model
    bw.projects.set_current("GSA for paper")
    co = bw.Database("CH consumption 1.0")
    act = [act for act in co if "Food" in act["name"]][0]
    demand = {act: 1}
    method = ("IPCC 2013", "climate change", "GTP 100a")

    # Define some variables
    num_params = 10000
    write_dir = path_base / "lca_model_{}".format(num_params)
    model = LCAModel(demand, method, write_dir, num_params=num_params)
    gsa_seed = 3403
    fig_format = ["html", "pickle"]

    parameter_inds_convergence_plot = [1,2,3]  # TODO choose for convergence

    num_boost_round = 400
    tuning_parameters = {
         'max_depth': 6,  
         'eta': 0.1,
         'objective': 'reg:squarederror',
         'n_jobs': -1,
         'refresh_leaf': True,
         'subsample': 0.6,
         'min_child_weight': 0.5,
    }
    iterations = 2 * num_params
    gsa = GradientBoosting(
        iterations=iterations,
        model=model,
        write_dir=write_dir,
        seed=gsa_seed,
        tuning_parameters=tuning_parameters,
        num_boost_round=num_boost_round,
        xgb_model=None,
    )
    S_dict, r2, ev = gsa.perform_gsa(flag_save_S_dict=True, return_stats=True)
    try:
        print(r2, ev)
    except:
        pass

    conv = Convergence(
        gsa.filepath_Y,
        gsa.num_params,
        gsa.generate_gsa_indices,
        gsa.gsa_label,
        write_dir,
        num_steps=25,
    )
    conv.run_convergence(
        parameter_inds=parameter_inds_convergence_plot,
        fig_format=fig_format,
    )


In [None]:
compute_per_worker_xgboost_lca()

In [None]:
# task_per_worker_delta = dask.delayed(compute_per_worker_delta)
model_evals = []
# for num_params in [10000]:
#     model_eval = task_per_worker_delta(num_params)
#     model_evals.append(model_eval)

# task_per_worker_delta_lca = dask.delayed(compute_per_worker_delta_lca)
# model_eval = task_per_worker_delta_lca()
# model_evals.append(model_eval)

task_per_worker_xgboost_lca = dask.delayed(compute_per_worker_xgboost_lca)
model_eval = task_per_worker_xgboost_lca()
model_evals.append(model_eval)

In [None]:
model_evals

In [None]:
%%time
dask.compute(model_evals)

In [None]:
import pickle
from pathlib import Path
path_base = Path("/data/user/kim_a/oases_gsa/LSA_scores/LSA_scores_96180_96709.pickle")
with open(path_base, 'rb') as f:
    a = pickle.load(f)

In [None]:
a[96252]['scores'][0]

In [None]:
from gsa_framework.test_functions import Morris4
from gsa_framework.methods.correlations import CorrelationCoefficients
from gsa_framework.methods.saltelli_sobol import SaltelliSobol
from gsa_framework.methods.gradient_boosting import GradientBoosting
from gsa_framework.methods.delta_moment import DeltaMoment
from gsa_framework.validation import Validation
from gsa_framework.convergence import Convergence
from pathlib import Path
import time

if __name__ == "__main__":

    path_base = Path('/data/user/kim_a/paper_gsa/')

    # 1. Models
    num_params = 10000
    num_influential = num_params // 100
    iterations_validation = 2000
    write_dir = path_base / "{}_morris4".format(num_params)
    model = Morris4(num_params=num_params, num_influential=num_influential)
    gsa_seed = 3407
    validation_seed = 7043
    num_influential_validation = 2*num_influential

    fig_format = ["pickle"]  # can have elements "pdf", "html", "pickle"

    # TODO Choose which GSA to perform
    flag_sobol = 1
    flag_correlation = 0
    flag_xgboost = 0
    flag_delta = 0

    if flag_sobol:
        iterations = 100 * num_params
        gsa = SaltelliSobol(iterations=iterations, model=model, write_dir=write_dir)
        # S_dict = gsa.generate_gsa_indices()
        S_dict = gsa.perform_gsa()
        first = S_dict["First order"]
        total = S_dict["Total order"]
        gsa.plot_sa_results(
            S_dict,
            S_dict_analytical=model.S_dict_analytical,
            fig_format=fig_format,
        )

        t0 = time.time()
        val = Validation(
            model=model,
            iterations=iterations_validation,
            seed=validation_seed,
            default_x_rescaled=None,
            write_dir=write_dir,
        )
        tag = "TotalIndex"
        influential_Y = val.get_influential_Y_from_gsa(total, num_influential_validation, tag=tag)
        t1 = time.time()
        print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
#         val.plot_histogram_Y_all_Y_inf(
#             influential_Y, num_influential_validation, tag=tag, fig_format=fig_format
#         )
        
#         conv = Convergence(
#             gsa.filepath_Y,
#             gsa.num_params,
#             gsa.generate_gsa_indices,
#             gsa.gsa_label,
#             write_dir,
#             num_steps=100,
#         )
#         conv.run_convergence(parameter_inds=parameter_inds, fig_format=fig_format)

    if flag_correlation:
        iterations = 4 * num_params
        gsa = CorrelationCoefficients(
            iterations=iterations,
            model=model,
            write_dir=write_dir,
            seed=gsa_seed,
        )
        S_dict = gsa.perform_gsa()
        pearson = S_dict["pearson"]
        spearman = S_dict["spearman"]
#         gsa.plot_sa_results(S_dict, S_boolean=model.S_boolean, fig_format=fig_format)

        t0 = time.time()
        val = Validation(
            model=model,
            iterations=iterations_validation,
            seed=validation_seed,
            default_x_rescaled=None,
            write_dir=write_dir,
        )
        tag = "SpearmanIndex"
        influential_Y = val.get_influential_Y_from_gsa(
            spearman, num_influential_validation, tag=tag
        )
        t1 = time.time()
        print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
        val.plot_histogram_Y_all_Y_inf(
            influential_Y, num_influential_validation, tag=tag, fig_format=fig_format
        )

#         conv = Convergence(
#             gsa.filepath_Y,
#             gsa.num_params,
#             gsa.generate_gsa_indices,
#             gsa.gsa_label,
#             write_dir,
#             num_steps=100,
#         )
#         conv.run_convergence(
#             parameter_inds=parameter_inds,
#             fig_format=fig_format,
#         )

    if flag_delta:
        iterations = 8 * num_params
        num_resamples = 1
        gsa = DeltaMoment(
            iterations=iterations,
            model=model,
            write_dir=write_dir,
            num_resamples=num_resamples,
            seed=gsa_seed,
        )
        S_dict = gsa.perform_gsa()
        S_dict.pop('delta_conf')
        delta = S_dict['delta']
        gsa.plot_sa_results(
            S_dict,
            S_boolean=model.S_boolean,
            fig_format=fig_format,
        )
        t0 = time.time()
        val = Validation(
            model=model,
            iterations=iterations_validation,
            seed=validation_seed,
            default_x_rescaled=None,
            write_dir=write_dir,
        )
        tag = "DeltaIndex"
        influential_Y = val.get_influential_Y_from_gsa(
            delta, num_influential_validation, tag=tag
        )
        t1 = time.time()
        print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
        val.plot_histogram_Y_all_Y_inf(
            influential_Y, num_influential_validation, tag=tag, fig_format=fig_format
        )
        
        

    if flag_xgboost:
        if num_params == 1000:
            num_boost_round = 300
            tuning_parameters = {
                "max_depth": 2,  # higher than 10 is definitely not good
                "eta": 0.25,
                "objective": "reg:squarederror",
                "n_jobs": -1,
                "refresh_leaf": True,
                "subsample": 0.35,
                "min_child_weight": 0.5,
            }
        elif num_params == 5000:
            num_boost_round = 300
            tuning_parameters = {
                "max_depth": 2,  # higher than 10 is definitely not good
                "eta": 0.25,
                "objective": "reg:squarederror",
                "n_jobs": -1,
                "refresh_leaf": True,
                "subsample": 0.65,
                "min_child_weight": 0.5,
            }
        elif num_params == 10000:
            num_boost_round = 300
            tuning_parameters = {
                "max_depth": 2,  # higher than 10 is definitely not good
                "eta": 0.25,
                "objective": "reg:squarederror",
                "n_jobs": -1,
                "refresh_leaf": True,
                "subsample": 0.65,
                "min_child_weight": 0.5,
            }
        iterations = 10000
        gsa = GradientBoosting(
            iterations=iterations,
            model=model,
            write_dir=write_dir,
            seed=gsa_seed,
            tuning_parameters=tuning_parameters,
            num_boost_round=num_boost_round,
            xgb_model=None,
        )
        S_dict, r2, ev = gsa.perform_gsa(flag_save_S_dict=True, return_stats=True)
        print(r2, ev)
        # fscores = S_dict["fscores"]
        # gsa.plot_sa_results(
        #     S_dict,
        #     S_boolean=model.S_boolean,
        #     fig_format=fig_format,
        # )
        #
        # t0 = time.time()
        # val = Validation(
        #     model=model,
        #     iterations=iterations_validation,
        #     seed=validation_seed,
        #     default_x_rescaled=None,
        #     write_dir=write_dir,
        # )
        # tag = "FscoresIndex"
        # influential_Y = val.get_influential_Y_from_gsa(
        #     fscores, num_influential_validation, tag=tag
        # )
        # t1 = time.time()
        # print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
        # val.plot_histogram_Y_all_Y_inf(
        #     influential_Y, num_influential_validation, tag=tag, fig_format=fig_format
        # )

        # conv = Convergence(
        #     gsa.filepath_Y,
        #     gsa.num_params,
        #     gsa.generate_gsa_indices,
        #     gsa.gsa_label,
        #     write_dir,
        #     num_steps=100,
        # )
        # conv.run_convergence(
        #     parameter_inds=parameter_inds,
        #     fig_format=fig_format,
        # )