In [None]:
from gsa_framework.test_functions import SobolGstar
from gsa_framework.methods.correlations import CorrelationCoefficients
from gsa_framework.methods.extended_FAST import eFAST
from gsa_framework.methods.saltelli_sobol import SaltelliSobol
from gsa_framework.methods.gradient_boosting import GradientBoosting
from gsa_framework.validation import Validation
from gsa_framework.convergence import Convergence
from pathlib import Path
import numpy as np
import time
from scipy.stats import spearmanr

if __name__ == "__main__":

#     path_base = Path(
#         "/Users/akim/PycharmProjects/gsa_framework/dev/write_files/paper_gsa/"
#     )
    path_base = Path('/data/user/kim_a/paper_gsa/gsa_framework_files')

    # 1. Models
    num_params = 1000
    num_influential = num_params // 100
    iterations_validation = 2000
    write_dir = path_base / "sobol_Gstar_model_{}".format(num_params)
    gsa_seed = 3407
    sobol_g_star_seed = 345897
    validation_seed = 7043
    if num_influential == 10:
        a = np.array(
            [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
            + [9] * (num_params - num_influential)
        )
        alpha = np.ones(num_params)  # case 2 from Saltelli paper
    else:
        a = 9 * np.ones(num_params)
        a[:num_influential] = np.linspace(0, 4, num_influential, endpoint=True)
        alpha = np.ones(num_params)
    np.random.seed(sobol_g_star_seed)
    delta = np.random.rand(num_params)  # delta doesn't affect input importance
    np.random.seed(None)
    model = SobolGstar(
        num_params=num_params,
        num_influential=num_influential,
        a=a,
        alpha=alpha,
        delta=delta,
    )
    fig_format = []  # can have elements "pdf", "html", "pickle"

    num_params_corr_plot = 10
    parameter_inds = list(range(num_params_corr_plot)) + list(
        range(num_influential, num_influential + num_params_corr_plot)
    )

    # TODO Choose which GSA to perform
    flag_sobol = 0
    flag_eFAST = 1
    flag_xgboost = 0

#     if flag_sobol:
#         iterations = 400 * num_params
#         gsa = SaltelliSobol(iterations=iterations, model=model, write_dir=write_dir)
#         S_dict = gsa.perform_gsa()
#         first = S_dict["First order"]
#         total = S_dict["Total order"]
#         gsa.plot_sa_results(
#             S_dict,
#             S_dict_analytical=model.S_dict_analytical,
#             fig_format=fig_format,
#         )

#         t0 = time.time()
#         val = Validation(
#             model=model,
#             iterations=iterations_validation,
#             seed=validation_seed,
#             default_x_rescaled=None,
#             write_dir=write_dir,
#         )
#         tag_total = "SaltelliTotalIndex"
#         influential_Y_total = val.get_influential_Y_from_gsa(total, num_influential, tag=tag_total)
#         tag_first = "SaltelliFirstIndex"
#         influential_Y_first = val.get_influential_Y_from_gsa(first, num_influential, tag=tag_first)
#         t1 = time.time()
#         print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
#         # val.plot_correlation_Y_all_Y_inf(
#         #     influential_Y, num_influential, tag=tag, fig_format=fig_format
#         # )
# #         val.plot_histogram_Y_all_Y_inf(
# #             influential_Y, num_influential, tag=tag, fig_format=fig_format
# #         )
#         corr_coef_total = np.corrcoef(np.vstack([influential_Y_total, val.Y_all]))[0,1]
#         corr_coef_first = np.corrcoef(np.vstack([influential_Y_first, val.Y_all]))[0,1]
#         spearman_total, _ = spearmanr(influential_Y_total, val.Y_all)
#         spearman_first, _ = spearmanr(influential_Y_first, val.Y_all)
#         print(" \
#              corr_coef_total {} \n \
#              corr_coef_first {} \n \
#              spearman_total {} \n \
#              spearman_first {} \n".format(corr_coef_total, corr_coef_first, spearman_total, spearman_first)
#         )

#         conv = Convergence(
#             gsa.filepath_Y,
#             gsa.num_params,
#             gsa.generate_gsa_indices,
#             gsa.gsa_label,
#             write_dir,
#             num_steps=100,
#         )
#         conv.run_convergence(parameter_inds=parameter_inds, fig_format=fig_format)

    if flag_eFAST:
        iterations = 3000 * num_params
        M = 4
        gsa = eFAST(
            M=M, iterations=iterations, model=model, write_dir=write_dir, seed=gsa_seed
        )
        S_dict = gsa.perform_gsa()
        first = S_dict["First order"]
        total = S_dict["Total order"]
        gsa.plot_sa_results(
            S_dict,
            S_dict_analytical=model.S_dict_analytical,
            fig_format=fig_format,
        )

        t0 = time.time()
        val = Validation(
            model=model,
            iterations=iterations_validation,
            seed=validation_seed,
            default_x_rescaled=None,
            write_dir=write_dir,
        )
        tag = "eFastTotalIndex"
        influential_Y = val.get_influential_Y_from_gsa(total, num_sinfluential, tag=tag)
        t1 = time.time()
        print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
        val.plot_histogram_Y_all_Y_inf(
            influential_Y, num_influential, tag=tag, fig_format=fig_format
        )
        
        conv = Convergence(
            gsa.filepath_Y,
            gsa.num_params,
            gsa.generate_gsa_indices,
            gsa.gsa_label,
            write_dir,
            num_steps=100,
            M=M,
        )
        conv.run_convergence(
            parameter_inds=parameter_inds, fig_format=fig_format
        )

    if flag_xgboost:
        gsa = GradientBoosting(iterations=iterations, model=model, write_dir=write_dir)
        S_dict = gsa.perform_gsa()
        fscores = S_dict["fscores"]
        gsa.plot_sa_results(S_dict, S_boolean=model.S_boolean)
        val = Validation(fscores, model, num_influential=model.num_influential)
        val.generate_plots(plot_histogram=True, plot_correlation=True)


# Validation of LCA

In [None]:
from gsa_framework.lca import LCAModel
from gsa_framework.methods.correlations import CorrelationCoefficients
from gsa_framework.methods.extended_FAST import eFAST
from gsa_framework.methods.saltelli_sobol import SaltelliSobol
from gsa_framework.methods.gradient_boosting import GradientBoosting
from gsa_framework.validation import Validation
from gsa_framework.convergence import Convergence
from pathlib import Path
import brightway2 as bw
import time
import numpy as np
from gsa_framework.plotting import histogram_Y1_Y2
from gsa_framework.utils import read_hdf5_array

if __name__ == "__main__":

#     path_base = Path(
#         "/Users/akim/PycharmProjects/gsa_framework/dev/write_files/paper_gsa/"
#     )
    path_base = Path('/data/user/kim_a/paper_gsa/gsa_framework_files')

    # LCA model
    bw.projects.set_current("GSA for paper")
    co = bw.Database("CH consumption 1.0")
    act = [act for act in co if "Food" in act["name"]][0]
    demand = {act: 1}
    method = ("IPCC 2013", "climate change", "GTP 100a")

    # Define some variables
    num_params = 10000
    num_influential = num_params // 100
    iterations_validation = 500
    write_dir = path_base / "lca_model_{}".format(num_params)
    model = LCAModel(demand, method, write_dir, num_params=num_params)
    gsa_seed = 3403
    validation_seed = 7043
    fig_format = ["html", "pickle"]
    
    iterations = 2 * num_params
    gsa = CorrelationCoefficients(
        iterations=iterations,
        model=model,
        write_dir=write_dir,
        seed=gsa_seed,
    )
#     S_dict = gsa.perform_gsa()
    S_dict = gsa.generate_gsa_indices()
    pearson = S_dict["pearson"]
    spearman = S_dict["spearman"]
    gsa.plot_sa_results(S_dict, fig_format=fig_format)

    t0 = time.time()
    val = Validation(
        model=model,
        iterations=iterations_validation,
        seed=validation_seed,
        default_x_rescaled=None,
        write_dir=write_dir,
    )
    tag = "SpearmanIndex"
    influential_Y = val.get_influential_Y_from_gsa(
        spearman, num_influential, tag=tag
    )
    t1 = time.time()
    print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
    val.plot_histogram_Y_all_Y_inf(
        influential_Y, num_influential, tag=tag, fig_format=fig_format
    )

# Run XGBoost LCA

In [None]:
from gsa_framework.lca import LCAModel
from gsa_framework.methods.correlations import CorrelationCoefficients
from gsa_framework.methods.extended_FAST import eFAST
from gsa_framework.methods.saltelli_sobol import SaltelliSobol
from gsa_framework.methods.gradient_boosting import GradientBoosting
from gsa_framework.validation import Validation
from gsa_framework.convergence import Convergence
from pathlib import Path
import brightway2 as bw
import time
import numpy as np
from gsa_framework.plotting import histogram_Y1_Y2
from gsa_framework.utils import read_hdf5_array

if __name__ == "__main__":

#     path_base = Path(
#         "/Users/akim/PycharmProjects/gsa_framework/dev/write_files/paper_gsa/"
#     )
    path_base = Path('/data/user/kim_a/paper_gsa/gsa_framework_files')

    # LCA model
    bw.projects.set_current("GSA for paper")
    co = bw.Database("CH consumption 1.0")
    act = [act for act in co if "Food" in act["name"]][0]
    demand = {act: 1}
    method = ("IPCC 2013", "climate change", "GTP 100a")

    # Define some variables
    num_params = 10000
    num_influential = num_params // 100
    iterations_validation = 500
    write_dir = path_base / "lca_model_{}".format(num_params)
    model = LCAModel(demand, method, write_dir, num_params=num_params)
    gsa_seed = 3403
    validation_seed = 7043
    fig_format = ["html", "pickle"]

    parameter_inds_convergence_plot = [1,2,3]  # TODO choose for convergence

    num_boost_round = 200
    tuning_parameters = {
         'max_depth': 6,  
         'eta': 0.1,
         'objective': 'reg:squarederror',
         'n_jobs': -1,
         'refresh_leaf': True,
         'subsample': 0.4,
         'min_child_weight': 0.5,
    }
    iterations = 2 * num_params
    gsa = GradientBoosting(
        iterations=iterations,
        model=model,
        write_dir=write_dir,
        seed=gsa_seed,
        tuning_parameters=tuning_parameters,
        num_boost_round=num_boost_round,
        xgb_model=None,
    )
    S_dict = gsa.perform_gsa(flag_save_S_dict=True)
    fscores = S_dict["fscores"]
    gsa.plot_sa_results(
        S_dict,
        S_boolean=model.S_boolean,
        fig_format=fig_format,
    )

    t0 = time.time()
    val = Validation(
        model=model,
        iterations=iterations_validation,
        seed=validation_seed,
        default_x_rescaled=None,
        write_dir=write_dir,
    )
    tag = "FscoresIndex"
    influential_Y = val.get_influential_Y_from_gsa(fscores, num_influential, tag=tag)
    t1 = time.time()
    print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
    val.plot_histogram_Y_all_Y_inf(
        influential_Y, num_influential, tag=tag, fig_format=fig_format
    )

#     conv = Convergence(
#         gsa.filepath_Y,
#         gsa.num_params,
#         gsa.generate_gsa_indices,
#         gsa.gsa_label,
#         write_dir,
#         num_steps=100,
#     )
#     conv.run_convergence(
#         parameter_inds=parameter_inds,
#         fig_format=fig_format,
#     )