In [None]:
from gsa_framework.test_functions import SobolGstar
from gsa_framework.methods.correlations import CorrelationCoefficients
from gsa_framework.methods.extended_FAST import eFAST
from gsa_framework.methods.saltelli_sobol import SaltelliSobol
from gsa_framework.methods.gradient_boosting import GradientBoosting
from gsa_framework.validation import Validation
from gsa_framework.convergence import Convergence
from pathlib import Path
import numpy as np
import time
from scipy.stats import spearmanr

if __name__ == "__main__":

#     path_base = Path(
#         "/Users/akim/PycharmProjects/gsa_framework/dev/write_files/paper_gsa/"
#     )
    path_base = Path('/data/user/kim_a/paper_gsa/gsa_framework_files')

    # 1. Models
    num_params = 1000
    num_influential = num_params // 100
    iterations_validation = 2000
    write_dir = path_base / "sobol_Gstar_model_{}".format(num_params)
    gsa_seed = 3407
    sobol_g_star_seed = 345897
    validation_seed = 7043
    if num_influential == 10:
        a = np.array(
            [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
            + [9] * (num_params - num_influential)
        )
        alpha = np.ones(num_params)  # case 2 from Saltelli paper
    else:
        a = 9 * np.ones(num_params)
        a[:num_influential] = np.linspace(0, 4, num_influential, endpoint=True)
        alpha = np.ones(num_params)
    np.random.seed(sobol_g_star_seed)
    delta = np.random.rand(num_params)  # delta doesn't affect input importance
    np.random.seed(None)
    model = SobolGstar(
        num_params=num_params,
        num_influential=num_influential,
        a=a,
        alpha=alpha,
        delta=delta,
    )
    fig_format = []  # can have elements "pdf", "html", "pickle"

    num_params_corr_plot = 10
    parameter_inds = list(range(num_params_corr_plot)) + list(
        range(num_influential, num_influential + num_params_corr_plot)
    )

    # TODO Choose which GSA to perform
    flag_sobol = 0
    flag_eFAST = 1
    flag_xgboost = 0

#     if flag_sobol:
#         iterations = 400 * num_params
#         gsa = SaltelliSobol(iterations=iterations, model=model, write_dir=write_dir)
#         S_dict = gsa.perform_gsa()
#         first = S_dict["First order"]
#         total = S_dict["Total order"]
#         gsa.plot_sa_results(
#             S_dict,
#             S_dict_analytical=model.S_dict_analytical,
#             fig_format=fig_format,
#         )

#         t0 = time.time()
#         val = Validation(
#             model=model,
#             iterations=iterations_validation,
#             seed=validation_seed,
#             default_x_rescaled=None,
#             write_dir=write_dir,
#         )
#         tag_total = "SaltelliTotalIndex"
#         influential_Y_total = val.get_influential_Y_from_gsa(total, num_influential, tag=tag_total)
#         tag_first = "SaltelliFirstIndex"
#         influential_Y_first = val.get_influential_Y_from_gsa(first, num_influential, tag=tag_first)
#         t1 = time.time()
#         print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
#         # val.plot_correlation_Y_all_Y_inf(
#         #     influential_Y, num_influential, tag=tag, fig_format=fig_format
#         # )
# #         val.plot_histogram_Y_all_Y_inf(
# #             influential_Y, num_influential, tag=tag, fig_format=fig_format
# #         )
#         corr_coef_total = np.corrcoef(np.vstack([influential_Y_total, val.Y_all]))[0,1]
#         corr_coef_first = np.corrcoef(np.vstack([influential_Y_first, val.Y_all]))[0,1]
#         spearman_total, _ = spearmanr(influential_Y_total, val.Y_all)
#         spearman_first, _ = spearmanr(influential_Y_first, val.Y_all)
#         print(" \
#              corr_coef_total {} \n \
#              corr_coef_first {} \n \
#              spearman_total {} \n \
#              spearman_first {} \n".format(corr_coef_total, corr_coef_first, spearman_total, spearman_first)
#         )

#         conv = Convergence(
#             gsa.filepath_Y,
#             gsa.num_params,
#             gsa.generate_gsa_indices,
#             gsa.gsa_label,
#             write_dir,
#             num_steps=100,
#         )
#         conv.run_convergence(parameter_inds=parameter_inds, fig_format=fig_format)

    if flag_eFAST:
        iterations = 3000 * num_params
        M = 4
        gsa = eFAST(
            M=M, iterations=iterations, model=model, write_dir=write_dir, seed=gsa_seed
        )
        S_dict = gsa.perform_gsa()
        first = S_dict["First order"]
        total = S_dict["Total order"]
        gsa.plot_sa_results(
            S_dict,
            S_dict_analytical=model.S_dict_analytical,
            fig_format=fig_format,
        )

        t0 = time.time()
        val = Validation(
            model=model,
            iterations=iterations_validation,
            seed=validation_seed,
            default_x_rescaled=None,
            write_dir=write_dir,
        )
        tag = "eFastTotalIndex"
        influential_Y = val.get_influential_Y_from_gsa(total, num_sinfluential, tag=tag)
        t1 = time.time()
        print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
        val.plot_histogram_Y_all_Y_inf(
            influential_Y, num_influential, tag=tag, fig_format=fig_format
        )
        
        conv = Convergence(
            gsa.filepath_Y,
            gsa.num_params,
            gsa.generate_gsa_indices,
            gsa.gsa_label,
            write_dir,
            num_steps=100,
            M=M,
        )
        conv.run_convergence(
            parameter_inds=parameter_inds, fig_format=fig_format
        )

    if flag_xgboost:
        gsa = GradientBoosting(iterations=iterations, model=model, write_dir=write_dir)
        S_dict = gsa.perform_gsa()
        fscores = S_dict["fscores"]
        gsa.plot_sa_results(S_dict, S_boolean=model.S_boolean)
        val = Validation(fscores, model, num_influential=model.num_influential)
        val.generate_plots(plot_histogram=True, plot_correlation=True)


In [92]:
import brightway2 as bw
import numpy as np
bw.projects.set_current("GSA for paper")
co = bw.Database("CH consumption 1.0")
act = [act for act in co if "food" in act['name']][0]
demand = {act: 1}
method = ("IPCC 2013", "climate change", "GTP 100a")
lca = bw.LCA(demand, method)
lca.lci()
lca.lcia()
lca.score

0.2937040305605457

In [93]:
lca.technosphere_matrix

<18326x18326 sparse matrix of type '<class 'numpy.float64'>'
	with 208807 stored elements in Compressed Sparse Row format>

In [5]:
from pathlib import Path
from gsa_framework.utils import read_pickle
path = Path('/data/user/kim_a/setac_gsa/LSA_scores/3/')

In [84]:
def get_lsa_scores_pickle(path):
    """Get LCIA scores stored in the ``path``, where each parameter was sampled only few (eg 3-10) times.

    Parameters
    ----------
    path : str
        Path that contains pickle files with LCIA scores, where each uncertain exchange in the technosphere
        is varied independently of all other exchanges but only few times. Needed to determine which exchanges
        do NOT change LCIA score of the given functional unit and method, and hence, can be disregarded in the GSA.
        Pickle files explicitly contain ``(database, code)`` tuples for input and output activities, in order to
        most certainly define exchanges.

    Returns
    -------
    scores : dict
        Keys are indices of the exchanges as they appear in the lca.tech_params, values are LCIA scores.

    """
    path = Path(path)
    files = [filepath.name for filepath in path.iterdir() if "LSA_scores_" in filepath.name and filepath.is_file()]
    starts = [int(filepath.split("_")[2]) for filepath in files]
    ind_sort = np.argsort(starts)
    files_sorted = [files[i] for i in ind_sort]

    scores, inputs, outputs = [], [], []
    for file in files_sorted:
        filepath = path / file
        temp = read_pickle(filepath)
        inputs  += [vals["input"] for vals in temp.values()]
        outputs += [vals["output"] for vals in temp.values()]
        scores  += [vals["scores"] for vals in temp.values()]
    num_exchanges = len(inputs)

    input_row_dict = {}
    for input_ in list(set(inputs)):
        input_row_dict[input_] = lca.activity_dict[input_]
    output_col_dict = {}
    for output_ in list(set(outputs)):
        output_col_dict[output_] = lca.activity_dict[output_]

    scores_dict = {}
    for i in range(num_exchanges):
        row = input_row_dict[inputs[i]]
        col = output_col_dict[outputs[i]]
        where_temp = np.where(
            np.logical_and(
                np.logical_and(
                    lca.tech_params['row'] == row,
                    lca.tech_params['col'] == col,
                ),
                lca.tech_params['uncertainty_type'] > 1,
            )
        )[0]
        assert len(where_temp) == 1
        scores_dict[where_temp[0]] = scores[i]
    return scores_dict

def get_nonzero_params_from_num_params(scores_dict, num_params):
    keys = np.array(list(scores_dict.keys()))
    vals = np.array(list(scores_dict.values()))
    vals = np.hstack([vals, np.tile(2484.925996825716, (len(vals), 1))])
    # Variance of LSA scores for each input / parameter
    var = np.var(vals, axis=1)
    where_var = np.argsort(var)[:num_params]
    print(len(var[where_var]>0))
#     assert np.all(var[where_var]>0)
    params_yes = keys[where_var]
    params_no = np.setdiff1d(keys, params_yes)
#     params_yes.sort(), params_no.sort()
    return params_yes, params_no, var

In [26]:
scores_dict = get_lsa_scores_pickle(path)

In [85]:
num_params = 10000
uncertain_tech_params_where_subset, _, aa = get_nonzero_params_from_num_params(scores_dict, num_params)
uncertain_tech_params_subset = lca.tech_params[uncertain_tech_params_where_subset]

10000


In [67]:
uncertain_tech_params_subset['scale'][560:580]

array([0.47958314, 0.81853527, 1.0049876 , 0.2295648 , 0.20493902,
       0.28284273, 0.20493902, 0.24617067, 0.11224972, 0.42261094,
       0.91104335, 0.20976177, 0.05196152, 0.4036087 , 0.45166358,
       0.05099019, 0.44944412, 0.2       , 0.41231057, 0.05656854],
      dtype=float32)

In [87]:
np.where(var>0)[0]

array([     0,      1,      2, ..., 162296, 162297, 162298])