# Setups

In [1]:
import bw2data as bd
import bw2calc as bc
import numpy as np
from copy import deepcopy
from pathlib import Path
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import stats_arrays as sa
from scipy.stats import spearmanr

from gsa_framework.models.life_cycle_assessment import LCAModelBase
from gsa_framework.sensitivity_analysis.correlations import Correlations
from gsa_framework.sensitivity_analysis.saltelli_sobol import SaltelliSobol
from gsa_framework.utils import read_pickle, write_pickle, write_hdf5_array, read_hdf5_array
from setups_paper_gwp import setup_corr, setup_lca_model_protocol
# from gsa_framework.utils import read_hdf5_array, write_hdf5_array
# from gsa_framework.visualization.plotting import plot_correlation_Y1_Y2, plot_histogram_Y1_Y2

from decimal import Decimal

In [2]:
path_base = Path('/data/user/kim_a')

# LCA model
bd.projects.set_current("GSA for protocol")
co = bd.Database("CH consumption 1.0")
demand_act = [act for act in co if "Food" in act["name"]]
assert len(demand_act) == 1
demand_act = demand_act[0]
demand = {demand_act: 1}
method = ("IPCC 2013", "climate change", "GWP 100a", "uncertain")

num_params = 20000
iter_corr = 4*num_params
gsa_corr = setup_corr(num_params, iter_corr, setup_lca_model_protocol, path_base)
S = gsa_corr.perform_gsa()
spearman = S['spearman']
spearman_sorted = np.argsort(np.abs(spearman))[::-1]

model, write_dir, gsa_seed = setup_lca_model_protocol(
    path_base,
    num_params=None,
    write_dir=None,
)

write_dir_arr = write_dir / "arrays"
write_dir_sct = write_dir / "supply_chain"

path_lsa = model.write_dir / "LSA_scores"
path_lsa_include_inds_bio = path_lsa / "include_inds_bio.pickle"
include_inds_bio = read_pickle(path_lsa_include_inds_bio)
path_lsa_include_inds_cf = path_lsa / "include_inds_cf.pickle"
include_inds_cf = read_pickle(path_lsa_include_inds_cf)

include_inds_tech_forX = np.arange(model.uncertain_exchange_lengths['tech'])
include_inds_bio_forX  = model.uncertain_exchange_lengths['tech'] + include_inds_bio
include_inds_cf_forX   = model.uncertain_exchange_lengths['tech'] + \
                         model.uncertain_exchange_lengths['bio']  + include_inds_cf
parameter_choice_rm_noninf = np.hstack(
    [include_inds_tech_forX, include_inds_bio_forX, include_inds_cf_forX]
)

scores_dict = model.get_lsa_scores_pickle(model.write_dir / "LSA_scores", model.uncertain_exchanges_types)
num_params_lsa = 20000 #10000
where_high_var = model.get_where_high_var(scores_dict, num_params_lsa)
parameter_choice_rm_lowinf = parameter_choice_rm_noninf[where_high_var]

num_params_ranking = 200
gsa_seed_ranking = 555666
iterations_ranking = num_params_ranking * 1600
parameter_choice_inf_lsa = parameter_choice_rm_lowinf[spearman_sorted[:num_params_ranking]]
parameter_choice_inf_lsa = np.sort(parameter_choice_inf_lsa)

# fp = write_dir / "arrays" / "parameter_choice_rm_lowinf.pickle"
# write_pickle(parameter_choice_rm_lowinf, fp)

# fp = write_dir / "arrays" / "model.pickle"
# write_pickle(model, fp)

Unitcube samples ->    0.022 s
Rescaled samples ->    0.001 s
Model outputs    ->    0.000 s
GSA indices      ->    0.021 s
Total GSA time   ->    0.045 s 

LCA score is 211.57670115973556
Total number of uncertain exchanges is 408722
   tech=186602, bio=222049, cf=71


# Graph traversal screening

In [3]:
import numpy as np
import bw2data as bd
import bw2calc as bc

from dev.utils_graph_traversal import \
    filter_uncertain_technosphere_exchanges, \
    filter_uncertain_biosphere_exchanges, \
    filter_uncertain_characterization_exchanges, \
    collect_uncertain_exchanges

bd.projects.set_current("GSA for protocol")
co = bd.Database("CH consumption 1.0")
demand_act = [act for act in co if "Food" in act["name"]]
assert len(demand_act) == 1
demand_act = demand_act[0]
demand = {demand_act: 1}
method = ("IPCC 2013", "climate change", "GWP 100a", "uncertain")
lca = bc.LCA(demand, method)
lca.lci()
lca.lcia()

def graph_traversal_screening(lca, model, write_dir, cutoff, num_params_ranking):
    
    cutoff_str = '%.2E' % Decimal(cutoff)
    filename = "cutoff{}.params{}.pickle".format(cutoff_str, num_params_ranking)
    filepath = write_dir / filename
    
    if filepath.exists():
        data = read_pickle(filepath)
    else:
        exchanges_dict_filename = "exchanges_dict.cutoff{}.pickle".format(cutoff_str)
        exchanges_dict_filepath = write_dir / exchanges_dict_filename
        if exchanges_dict_filepath.exists():
            exchanges_dict = read_pickle(exchanges_dict_filepath)
            tech_inds_uncertain = exchanges_dict['tech']
            bio_inds_uncertain = exchanges_dict['bio']
            cf_inds_uncertain = exchanges_dict['cf']
        else:
            # tech_inds = filter_technosphere_exchanges(lca, cutoff)
            tech_inds_uncertain = filter_uncertain_technosphere_exchanges(lca, cutoff, 1e8)
            # bio_inds  = filter_biosphere_exchanges(lca, cutoff)
            bio_inds_uncertain = filter_uncertain_biosphere_exchanges(lca, cutoff)
            # ch_inds = filter_characterization_exchanges(lca, cutoff)
#             cf_inds_uncertain = filter_uncertain_characterization_exchanges(lca, 0)
#             print(cf_inds_uncertain)
            cf_inds_uncertain_temp = lca.cf_params[lca.cf_params['uncertainty_type']>1]['row']
            cf_inds_uncertain_temp = np.sort(cf_inds_uncertain_temp)
            cf_inds_uncertain = [(row, None, np.inf) for row in cf_inds_uncertain_temp]
            exchanges_dict = {
                'tech': tech_inds_uncertain,
                'bio': bio_inds_uncertain,
                'cf': cf_inds_uncertain,
            }
            write_pickle(exchanges_dict, exchanges_dict_filepath)

        num_params_all = len(tech_inds_uncertain) + len(bio_inds_uncertain) + len(cf_inds_uncertain)
        subset_exchanges_dict_all = collect_uncertain_exchanges(exchanges_dict, num_params_all)
        
        subset_exchanges_dict = collect_uncertain_exchanges(exchanges_dict, num_params_ranking)

        row_ind = 0
        col_ind = 1
        sdicts = {
            'ranking': subset_exchanges_dict,
            'all': subset_exchanges_dict_all,
        }
        data = {}
        for sname, sdict in sdicts.items():
            parameter_choice_dict = {}
            for uncertain_exchange_type, exc_list in sdict.items():
                params = model.uncertain_params[uncertain_exchange_type]
                where_list = []
                if uncertain_exchange_type != 'cf':
                    for exc in exc_list:
                        where = np.where(np.logical_and(
                            params['row'] == exc[row_ind],
                            params['col'] == exc[col_ind],
                        ))[0]
                        assert len(where)==1
                        where_list.append(where[0])
                else:
                    for exc in exc_list:
                        where = np.where(params['row'] == exc[row_ind])[0]
                        assert len(where) == 1
                        where_list.append(where[0])
                where_list = sorted(where_list)
                parameter_choice_dict[uncertain_exchange_type] = np.array(where_list)

            parameter_choice_inf_tech = np.array(parameter_choice_dict['tech'])
            parameter_choice_inf_bio  = np.array(parameter_choice_dict['bio']) + model.uncertain_exchange_lengths['tech']
            parameter_choice_inf_cf   = np.array(parameter_choice_dict['cf']) + model.uncertain_exchange_lengths['tech'] + model.uncertain_exchange_lengths['bio']

            parameter_choice_inf_graph = np.hstack(
                [parameter_choice_inf_tech, parameter_choice_inf_bio, parameter_choice_inf_cf]
            )
            data[sname] = {
                "parameter_choice_dict": parameter_choice_dict, 
                "parameter_choice_inf_graph": parameter_choice_inf_graph
            }
            write_pickle(data, filepath)
    return data

In [4]:
option_step3 = "without_3"
write_dir_sct_with_3 = write_dir / 'supply_chain'
write_dir_sct_without_3 = write_dir / 'supply_chain_without_step_3'
if option_step3 == "with_3":
    write_dir_sct = write_dir_sct_with_3
elif option_step3 == "without_3":
    write_dir_sct = write_dir_sct_without_3    
    
data_all = {}
# percentage of the total score, cutoff=0.005 means 0.5 percent
cutoffs = np.array([1e-2, 1e-3, 1e-4])
nums_params_ranking = np.array([100,200,400,800,1600])

for cutoff in cutoffs:
    print("--> {}".format(cutoff))
    data_all[cutoff] = {}
    for num_params_ranking in nums_params_ranking:
        data_current = graph_traversal_screening(lca, model, write_dir_sct, cutoff, num_params_ranking)

--> 0.01
71
71
71
71
71
--> 0.001
71
71
71
71
71
--> 0.0001
71
71
71
71
71


# Validation of screening for LSA and SCT with different num_params_ranking

In [None]:
# from dask.distributed import Client, LocalCluster
# from dask_jobqueue import SLURMCluster
# from pathlib import Path
# import os
# import dask

# which_pc = "merlin_protocol_gsa"
# if 'merlin' in which_pc:
#     path_dask_logs = Path('/data/user/kim_a/dask_logs')
#     path_dask_logs.mkdir(parents=True, exist_ok=True)
#     cluster = SLURMCluster(cores     = 8,
#                            memory    ="40GB", 
#                            walltime  = '02:00:00',
#                            interface ='ib0',
#                            local_directory = path_dask_logs.as_posix(),
#                            log_directory   = path_dask_logs.as_posix(),
#                            queue="daily",
#                            ) 
# elif 'local' in which_pc:
#     cluster = LocalCluster(memory_limit='7GB') 

In [None]:
# client = Client(cluster)

In [None]:
# n_workers = 10
# cluster.scale(n_workers)

In [None]:
# client

In [None]:
# cluster.close()
# client.close()

In [None]:
# task_per_worker = dask.delayed(val.get_influential_Y_from_parameter_choice)

In [5]:
from gsa_framework.convergence_robustness_validation import Validation

default_uncertain_amounts = np.hstack([
    v for v in model.default_uncertain_amounts.values()
])

iterations_validation = 2000
validation_seed = 100023423
lca_scores_axis_title = r"$\text{LCIA scores, [kg CO}_2\text{-eq}]$"
if __name__ == "__main__":
    val = Validation(
        model=model,
        iterations=iterations_validation,
        seed=validation_seed,
        default_x_rescaled=default_uncertain_amounts,
        write_dir=write_dir,
        model_output_name=lca_scores_axis_title,
    )    
    

In [6]:
%%time
nums_params_ranking = [100,200,400,800,1600]
cutoff = 1e-4
    
model_evals = []
for num_params_ranking in nums_params_ranking:
    print(num_params_ranking)
#     parameter_choice_inf_lsa = parameter_choice_rm_lowinf[inf_sorted[:num_params_ranking]]
#     parameter_choice_inf_lsa = np.sort(parameter_choice_inf_lsa)
#     tag = "localSA"
#     model_eval = task_per_worker(influential_inputs=parameter_choice_inf_lsa, tag=tag)
#     model_evals.append(model_eval)
#     Y_subset = val.get_influential_Y_from_parameter_choice(influential_inputs=parameter_choice_inf_lsa, tag=tag)
    
    tag = 'sct_without_3'
    data = graph_traversal_screening(lca, model, write_dir_sct, cutoff, num_params_ranking)
    parameter_choice_inf_sct = data['ranking']["parameter_choice_inf_graph"]
#     model_eval = task_per_worker(influential_inputs=parameter_choice_inf_lsa, tag=tag)
#     model_evals.append(model_eval)
    Y_subset = val.get_influential_Y_from_parameter_choice(influential_inputs=parameter_choice_inf_sct, tag=tag)

100
200
400
800
1600
CPU times: user 19h 52min 38s, sys: 35min 28s, total: 20h 28min 7s
Wall time: 1h 12min 22s


In [None]:
# %%time
# dask.compute(model_evals)

# GSA results for paper2

In [None]:
cutoff = 1e-4
num_params_ranking = 200
data = graph_traversal_screening(lca, model, write_dir_sct, cutoff, num_params_ranking)
parameter_choice_inf_sct = data['ranking']['parameter_choice_inf_graph']

print(parameter_choice_inf_sct.shape, parameter_choice_inf_lsa.shape)
np.intersect1d(parameter_choice_inf_sct, parameter_choice_inf_lsa).shape

In [None]:
option = "sct"
if option == "sct":
    parameter_choice_inf_use = parameter_choice_inf_sct
elif option == "localSA":
    parameter_choice_inf_use = parameter_choice_inf_lsa

def get_parameters_tech_bio_cf(parameter_choice, model):
    parameter_choice_inf_tech = parameter_choice[
        parameter_choice < model.uncertain_exchange_lengths['tech']
    ]
    tech_inds = parameter_choice_inf_tech
    tech_params = model.uncertain_params['tech'][tech_inds]
    tech_where = model.uncertain_params_selected_where_dict['tech'][tech_inds]

    parameter_choice_inf_bio = parameter_choice[
        np.logical_and(
            parameter_choice >= model.uncertain_exchange_lengths['tech'],
            parameter_choice < model.uncertain_exchange_lengths['bio']+model.uncertain_exchange_lengths['tech'],
        )  
    ]
    bio_inds = parameter_choice_inf_bio-model.uncertain_exchange_lengths['tech']
    bio_params = model.uncertain_params['bio'][bio_inds]
    bio_where = model.uncertain_params_selected_where_dict['bio'][bio_inds]

    parameter_choice_inf_cf = parameter_choice[
        parameter_choice >= model.uncertain_exchange_lengths['bio']+model.uncertain_exchange_lengths['tech'],
    ]
    cf_inds = parameter_choice_inf_cf-model.uncertain_exchange_lengths['tech']-model.uncertain_exchange_lengths['bio']
    cf_params = model.uncertain_params['cf'][cf_inds]
    cf_where = model.uncertain_params_selected_where_dict['cf'][cf_inds]

    uncertain_params = {
        'tech': tech_params,
        'bio': bio_params,
        'cf': cf_params,
    }
    uncertain_params_selected_where_dict = {
        'tech': tech_where,
        'bio': bio_where,
        'cf': cf_where,    
    }
    return uncertain_params, uncertain_params_selected_where_dict 

if __name__ == "__main__":  
    uncertain_params, uncertain_params_selected_where_dict = get_parameters_tech_bio_cf(
        parameter_choice_inf_use, model
    )
    model_ranking = LCAModelBase(
        demand, 
        method, 
        uncertain_params, 
        uncertain_params_selected_where_dict,
    )
    gsa = SaltelliSobol(iterations=iterations_ranking, model=model_ranking, write_dir=write_dir)
#     S_salt = gsa.perform_gsa()
#     fig = gsa.plot_sa_results(S_salt)

In [None]:
filepath_Y_saltelli_sct = write_dir_arr / "Y.saltelliSampling.319968.None.sct.hdf5"
filepath_Y_saltelli_lsa = write_dir_arr / "Y.saltelliSampling.319968.None.localSA.hdf5"

In [None]:
from gsa_framework.sensitivity_methods.saltelli_sobol import sobol_indices

S_sct = sobol_indices(filepath_Y_saltelli_sct, num_params_ranking)
S_lsa = sobol_indices(filepath_Y_saltelli_lsa, num_params_ranking)

In [None]:
# %%time

# filename_Y = "{}.{}{}".format(gsa.filepath_Y.stem, option, gsa.filepath_Y.suffix)
# filepath_Y = gsa.filepath_Y.parent / filename_Y
# filepath_Y

# X = gsa.generate_unitcube_samples(iterations_ranking)
# Xr = model_ranking.rescale(X)
# Y = model_ranking(Xr)
# write_hdf5_array(Y, filepath_Y)

## 1. Validation for 1 to 20 inputs, LSA

In [None]:
%%time
# option = 'sct'
if option == 'localSA':
    total = S_lsa['Total order']
    parameter_choice_inf = parameter_choice_inf_lsa
elif option == 'sct':
    total = S_sct['Total order']
    parameter_choice_inf = parameter_choice_inf_sct
    
total_argsort = np.argsort(total)[::-1]

# spearman_yy = []
# num_ranked_max = 20
# num_ranked_arr = np.arange(1,num_ranked_max+1)
# tag = "TotalRanked.{}".format(option)
# if __name__ == "__main__":
#     for num_ranked in num_ranked_arr:
#         print(num_ranked)
#         parameter_choice_ranked = parameter_choice_inf[total_argsort][:num_ranked]
#         Y_subset = val.get_influential_Y_from_parameter_choice(influential_inputs=parameter_choice_ranked, tag=tag)
#         s, _ = spearmanr(val.Y_all, Y_subset)
#         spearman_yy.append(s)
        
# fig = go.Figure()
# fig.add_trace(
#     go.Scatter(
#         x=num_ranked_arr,
#         y=spearman_yy,
#         mode="markers+lines",
#         showlegend=False,
# #         marker_color = color_blue_rgb,
#     ),
# )
# fig.update_xaxes(title='Number of varying influential inputs')
# fig.update_yaxes(title='Spearman correlation between Y_all and Y_inf')



# Uncertainty reduction

In [None]:
# num_ranked = 20
# parameter_choice_ranked = parameter_choice_inf[total_argsort][:num_ranked]
# uparams_ranked, uparams_where_dict_ranked = get_parameters_tech_bio_cf(
#     parameter_choice_ranked, model
# )

## Exp 1: Change uncertainties of bio flows to lognormal

In [None]:
# bio_to_lognormal_inds = [0,1,4,6]
# # 1. Bio_params
# dt = model.lca.bio_params.dtype
# uncertain_bio_params = model.lca.bio_params[model.lca.bio_params['uncertainty_type']>1]
# bio_params_copy = deepcopy(np.array([a for a in model.lca.bio_params], dtype = dt))
# bio_params_modified = deepcopy(np.array([a for a in model.lca.bio_params], dtype = dt))

# ## change normal to lognormal
# for ind in bio_to_lognormal_inds:
#     bio_ind = uparams_where_dict_ranked['bio'][ind]
#     print(bio_params_modified[bio_ind])
#     bio_params_modified[bio_ind]["uncertainty_type"] = sa.LognormalUncertainty.id
#     bio_params_modified[bio_ind]['loc'] = np.log(bio_params_copy[bio_ind]['loc'])
#     print(bio_params_modified[bio_ind])
    
# uncertain_bio_params_modified_where = np.where(bio_params_modified['uncertainty_type']>1)[0]
# uncertain_bio_params_modified  = bio_params_modified[uncertain_bio_params_modified_where]

# uncertain_tech_params_modified_where = model.uncertain_params_selected_where_dict['tech']
# uncertain_tech_params_modified = model.lca.tech_params[uncertain_tech_params_modified_where]

# uncertain_cf_params_modified_where = model.uncertain_params_selected_where_dict['cf']
# uncertain_cf_params_modified = model.lca.cf_params[uncertain_cf_params_modified_where]

## Exp 2: Change uncertainties of bio flows to lognormal and reduce scale 2 times in all others

In [None]:
# reduce_tech_inds = np.arange(uparams_ranked['tech'].shape[0])
# # 1. Tech_params
# dt = model.lca.tech_params.dtype
# uncertain_tech_params = model.lca.tech_params[model.lca.tech_params['uncertainty_type']>1]
# tech_params_copy = deepcopy(np.array([a for a in model.lca.tech_params], dtype = dt))
# tech_params_modified = deepcopy(np.array([a for a in model.lca.tech_params], dtype = dt))

# ## Reduce twice
# for ind in reduce_tech_inds:
#     tech_ind = uparams_where_dict_ranked['tech'][ind]
#     print(tech_params_modified[tech_ind])
#     assert tech_params_modified[tech_ind]["uncertainty_type"] == sa.LognormalUncertainty.id
#     tech_params_modified[tech_ind]['scale'] = tech_params_copy[tech_ind]['scale'] / 2
#     print(tech_params_modified[tech_ind])
    
# uncertain_tech_params_modified_where = np.where(tech_params_modified['uncertainty_type']>1)[0]
# uncertain_tech_params_modified  = tech_params_modified[uncertain_tech_params_modified_where]

# reduce_cf_inds = np.arange(uparams_ranked['cf'].shape[0])
# # 3. CF_params
# dt = model.lca.cf_params.dtype
# uncertain_cf_params = model.lca.cf_params[model.lca.cf_params['uncertainty_type']>1]
# cf_params_copy = deepcopy(np.array([a for a in model.lca.cf_params], dtype = dt))
# cf_params_modified = deepcopy(np.array([a for a in model.lca.cf_params], dtype = dt))

# ## Reduce twice
# for ind in reduce_cf_inds:
#     cf_ind = uparams_where_dict_ranked['cf'][ind]
#     print(cf_params_modified[cf_ind])
#     assert cf_params_modified[cf_ind]["uncertainty_type"] == sa.NormalUncertainty.id
#     cf_params_modified[cf_ind]['scale'] = cf_params_copy[cf_ind]['scale'] / 2
#     print(cf_params_modified[cf_ind])
    
# uncertain_cf_params_modified_where = np.where(cf_params_modified['uncertainty_type']>1)[0]
# uncertain_cf_params_modified  = cf_params_modified[uncertain_cf_params_modified_where]

In [None]:
# # 1. Bio_params
# dt = model.lca.bio_params.dtype
# uncertain_bio_params = model.lca.bio_params[model.lca.bio_params['uncertainty_type']>1]
# bio_params_copy = deepcopy(np.array([a for a in model.lca.bio_params], dtype = dt))
# bio_params_modified = deepcopy(np.array([a for a in model.lca.bio_params], dtype = dt))

# ## change normal to lognormal
# bio_to_lognormal_inds = [0,1,4,6]
# for ind in bio_to_lognormal_inds:
#     bio_ind = uparams_where_dict_ranked['bio'][ind]
#     print(bio_params_modified[bio_ind])
#     bio_params_modified[bio_ind]["uncertainty_type"] = sa.LognormalUncertainty.id
#     bio_params_modified[bio_ind]['loc'] = np.log(bio_params_copy[bio_ind]['loc'])
#     print(bio_params_modified[bio_ind])

# reduce_bio_inds = [2,3,5,7,8,9]
# ## Reduce twice
# for ind in reduce_bio_inds:
#     bio_ind = uparams_where_dict_ranked['bio'][ind]
#     print(bio_params_modified[bio_ind])
#     assert bio_params_modified[bio_ind]["uncertainty_type"] == sa.LognormalUncertainty.id
#     bio_params_modified[bio_ind]['scale'] = bio_params_copy[bio_ind]['scale'] / 2
#     print(bio_params_modified[bio_ind])
    
# uncertain_bio_params_modified_where = np.where(bio_params_modified['uncertainty_type']>1)[0]
# uncertain_bio_params_modified  = bio_params_modified[uncertain_bio_params_modified_where]

# Exp: Run MC with narrowed params

In [None]:
# %%time
# option = 'exp2'
# if option == 'exp1':
#     tag = 'BioModified_normal.exp1'
# elif option == 'exp2':
#     tag = '20Reduced_scale2.exp2'

# uparams_narrow = {
#     'tech': uncertain_tech_params_modified,
#     'bio': uncertain_bio_params_modified,
#     'cf': uncertain_cf_params_modified,
# }
# uparams_where_dict_narrow = {
#     'tech':  uncertain_tech_params_modified_where,
#     'bio':   uncertain_bio_params_modified_where,
#     'cf':   uncertain_cf_params_modified_where,
# }

# model_narrow = LCAModelBase(
#     demand,
#     method,
#     uncertain_params = uparams_narrow,
#     uncertain_params_selected_where_dict=uparams_where_dict_narrow,
# )

# fp_Xr = "/data/user/kim_a/protocol_gsa/arrays/validation.X.rescaled.all.{}.{}.hdf5".format(
#     iterations_validation, validation_seed
# )
# fp_Y_narrow = "/data/user/kim_a/protocol_gsa/arrays/validation.Y.narrow.{}.{}.{}.hdf5".format(
#     iterations_validation, validation_seed, tag
# )
# fp_Y_narrow = Path(fp_Y_narrow)
# Xr_prev = read_hdf5_array(fp_Xr)

# if not fp_Y_narrow.exists():
#     np.random.seed(validation_seed)
#     X = np.random.rand(iterations_validation, model_narrow.num_params)
#     Xr = model_narrow.rescale(X)
#     del X
#     Y_narrow = model_narrow(Xr)
#     write_hdf5_array(Y_narrow, fp_Y_narrow)
# else:
#     Y_narrow = read_hdf5_array(fp_Y_narrow).flatten()

In [None]:
# from gsa_framework.visualization.plotting import plot_histogram_Y1_Y2

# ii = parameter_choice_ranked[19]
# Y1 = Xr_prev[:,ii]
# Y2 = Xr[:,ii]
# plot_histogram_Y1_Y2(Y1, Y2)
# plot_histogram_Y1_Y2(val.Y_all, Y_narrow)
# np.std(val.Y_all) / np.std(Y_narrow)

# GSA results for SI of paper2

# Convergence, robustness

In [None]:
filepath_Y = gsa.filepath_Y.parent / gsa.filepath_Y.name.replace("None", "None.{}".format(option))

In [None]:
from gsa_framework.sensitivity_methods.saltelli_sobol import sobol_indices_stability
from gsa_framework.convergence_robustness_validation import Convergence, Robustness
from gsa_framework.utils import read_hdf5_array, read_pickle, write_pickle

num_bootstrap = 1000
num_steps = 50

# Convergence class
conv = Convergence(
    filepath_Y,
    gsa.num_params,
    gsa.generate_gsa_indices,
    gsa.gsa_label,
    gsa.write_dir,
    num_steps=num_steps,
)
np.random.seed(gsa.seed)
stability_seeds = np.random.randint(
    low=0,
    high=2147483647,
    size=(len(conv.iterations_for_convergence), num_bootstrap),
)

filename_S = "stability.S.{}.{}.{}Step{}.{}.{}.{}.pickle".format(
    gsa.gsa_label,
    gsa.sampling_label,
    gsa.iterations,
    conv.iterations_step,
    num_bootstrap,
    gsa.seed,
    option,
)
filepath_S = gsa.write_dir / "arrays" / filename_S
if filepath_S.exists():
    print("--> {} already exists".format(filename_S))
    S_dict_stability = read_pickle(filepath_S)
else:
    Y = read_hdf5_array(filepath_Y).flatten()
    S_dict_stability = sobol_indices_stability(
        Y,
        gsa.num_params,
        conv.iterations_for_convergence,
        num_bootstrap,
        stability_seeds,
    )
    write_pickle(S_dict_stability, filepath_S)

### 1. For convergence, total order should be higher than first order, first order should be non-negative

In [None]:
if option == 'localSA':
    S_salt = S_lsa
elif option == 'sct':
    S_salt = S_sct

x = np.arange(num_params_ranking)

fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=x,
        y=S_salt['First order'],
        mode="markers",
        name="First order",
        marker=dict(
            color="blue",
        ),
        showlegend=True,
    ),
)

fig.add_trace(
    go.Scatter(
        x=x,
        y=S_salt['Total order'],
        mode="markers",
        name="Total order",
        marker=dict(
            color="red",
            symbol="x",
        ),
        showlegend=True,
    ),
)
fig.update_xaxes(title='Model inputs')
fig.update_yaxes(title='Sobol indices')

### 2. Confidence intervals at last step for all inputs

In [None]:
from dev.utils_paper_plotting import *

num_ranks = 20
stability_dicts = [S_dict_stability]
st = Robustness(
    stability_dicts,
    write_dir,
    num_ranks=num_ranks,
    num_params_screening=int(0.90 * num_params_ranking),
)

In [None]:
x = np.arange(num_params_ranking)

sa_names = ['first', 'total']
colors = {
    'first': color_blue_tuple, 
    'total': color_blue_tuple,
}
opacity = 0.65

fig = make_subplots(
    rows=2,
    cols=1,
    shared_xaxes=True,
    subplot_titles = sa_names,
)

col = 1

for row, sa_name in enumerate(sa_names):
    row += 1
    color = colors[sa_name]
    if sa_name == 'first':
        y = S_salt['First order']
    elif sa_name ==  'total':
        y = S_salt['Total order']
    width = st.confidence_intervals[sa_name][-1,:]
    fig.add_trace(
        go.Scatter(
            x=x,
            y=y,
            mode="markers",
            name=sa_name,
            marker=dict(
                color=color_orange_rgb,
            ),
            showlegend=False,
            error_y=dict(
                type='data', # value of error bar given in data coordinates
                symmetric=False,
                array=width/2,
                arrayminus=width/2,
                visible=True,
                color=color_blue_rgb,
            )
        ),
        row=row,
        col=col
    )

fig.update_xaxes(title='Model inputs')
fig.update_yaxes(title='Sobol indices')

### 3. Max confidence invervals at all steps, $Stat_{indices}$

In [None]:
opacity = 0.65

fig = make_subplots(
    rows=2,
    cols=1,
    shared_xaxes=True,
    subplot_titles = sa_names,
)

col = 1
for row, sa_name in enumerate(sa_names):
    row += 1
    showlegend = False
    color = colors[sa_name]
    x = st.iterations[sa_name]
    y = np.zeros(len(x))
    width = st.confidence_intervals_max[sa_name]
    lower = y - width / 2
    upper = y + width / 2
    fig.add_trace(
        go.Scatter(
            x=x,
            y=y,
            mode="lines",
            opacity=1,
            showlegend=showlegend,
            name = sa_name,
            marker=dict(
                color="rgba({},{},{},{})".format(
                    color[0],
                    color[1],
                    color[2],
                    1,
                ),
            ),
        ),
        row=row,
        col=col,
    )
    showlegend = False
    fig.add_trace(
        go.Scatter(
            x=x,
            y=lower,
            mode="lines",
            opacity=opacity,
            showlegend=False,
            marker=dict(
                color="rgba({},{},{},{})".format(
                    color[0],
                    color[1],
                    color[2],
                    opacity,
                ),
            ),
            line=dict(width=0),
        ),
        row=row,
        col=col,
    )
    fig.add_trace(
        go.Scatter(
            x=x,
            y=upper,
            showlegend=False,
            line=dict(width=0),
            mode="lines",
            fillcolor="rgba({},{},{},{})".format(
                color[0],
                color[1],
                color[2],
                opacity,
            ),
            fill="tonexty",
        ),
        row=row,
        col=col,
    )
fig.update_xaxes(title='MC iterations')
fig.update_yaxes(title='Max confidence interval')

# Prioritized list

In [None]:
S_salt = S_lsa

total = S_salt['Total order']
total_argsort = np.argsort(total)[::-1]
total_sorted = total[total_argsort]
first_sorted = S_salt['First order'][total_argsort]
width = st.confidence_intervals['total'][-1,:]
width_sorted = width[total_argsort]

fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=np.arange(len(total_sorted)),
        y=total_sorted,
        mode="markers",
        name="Total order",
        showlegend=False,
        marker_color = color_orange_rgb,
        error_y=dict(
            type='data', # value of error bar given in data coordinates
            symmetric=False,
            array=width_sorted/2,
            arrayminus=width_sorted/2,
            visible=True,
            color=color_blue_rgb,
        )
    ),
)
fig.update_xaxes(title='Model inputs')
fig.update_yaxes(title='Total order indices')

In [None]:
num_ranked = 23

parameter_choice_ranked = parameter_choice_inf[total_argsort][:num_ranked]

len_tech = model.uncertain_exchange_lengths['tech']
where_tech = np.where(parameter_choice_ranked<len_tech)[0]
ind_tech = parameter_choice_ranked[where_tech]
inf_tech_params = {
    where_tech[i]: {
        "type": "tech",
        "param": model.uncertain_params['tech'][ind_tech[i]]
    }
    for i in range(len(where_tech))
}

len_bio = model.uncertain_exchange_lengths['bio']
where_bio = np.where(np.logical_and(
    parameter_choice_ranked>=len_tech,
    parameter_choice_ranked<len_tech+len_bio,
))[0]
ind_bio = parameter_choice_ranked[where_bio]-len_tech
inf_bio_params = {
    where_bio[i]: {
        "type": "bio",
        "param": model.uncertain_params['bio'][ind_bio[i]]
    }
    for i in range(len(where_bio))
}

len_cf = model.uncertain_exchange_lengths['cf']
where_cf = np.where(np.logical_and(
    parameter_choice_ranked>=len_tech+len_bio,
    parameter_choice_ranked<len_tech+len_bio+len_cf,
))[0]
ind_cf = parameter_choice_ranked[where_cf]-len_tech-len_bio
inf_cf_params = {
    where_cf[i]: {
        "type": "cf",
        "param": model.uncertain_params['cf'][ind_cf[i]]
    }
    for i in range(len(where_cf))
}

params = {**inf_tech_params, **inf_bio_params, **inf_cf_params}
params = {k : params[k] for k in sorted(params)}

TECH_IND = 0
BIO_IND = 2
distributions = {
    sa.NormalUncertainty.id: 'normal',
    sa.LognormalUncertainty.id: 'lognml',
    sa.UniformUncertainty.id: 'unifrm',
}

for rank, dict_ in params.items():
    exchange_type = dict_['type']
    param = dict_['param']
    row = param['row']
    col = param['col']
    print(
        "{:2d}. total={:5.3f}, {}, amount={:8.5f}, scale={:5.3f}".format(
            rank, 
            total_sorted[rank],
            distributions[param['uncertainty_type']],
            param['amount'],
            param['scale'],
        )
    )      
    if exchange_type=='tech':
        act_in = bd.get_activity(model.lca.reverse_dict()[TECH_IND][row])
        act_out = bd.get_activity(model.lca.reverse_dict()[TECH_IND][col])
        print("act out:    {}, {}".format(act_out['name'], act_out['location']))
        print("act  in:    {}, {}, {} \n".format(act_in['name'], act_in['unit'], act_in['location']))
    elif exchange_type=='bio':
        act_in = bd.get_activity(model.lca.reverse_dict()[BIO_IND][row])
        act_out = bd.get_activity(model.lca.reverse_dict()[TECH_IND][col])
        print("act out:    {}, {}".format(act_out['name'], act_out['location']))
        print("act  in:    {}, {} \n".format(act_in['name'], act_in['unit']))
    elif exchange_type=='cf':
        act_in = bd.get_activity(model.lca.reverse_dict()[BIO_IND][row])
        print("GWP of:    {} \n".format(act_in['name'])) 

In [None]:
num_ranked = 24
parameter_choice_ranked = parameter_choice_inf[total_argsort][:num_ranked]

if __name__ == "__main__":
    tag = "TotalRanked.graph"
    Y_subset = val.get_influential_Y_from_parameter_choice(influential_inputs=parameter_choice_ranked, tag=tag)

fig=val.plot_correlation_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_ranked.shape[0], tag=tag)
fig.show()

fig=val.plot_histogram_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_ranked.shape[0], tag=tag)
fig.show()

In [None]:
from gsa_framework.visualization.plotting import *

def plot_hist_val(Y_subset, parameter_choice_ranked):
    bin_min = min(val.Y_all)
    bin_max = max(val.Y_all)
    fig = plot_histogram_Y1_Y2(
        val.Y_all,
        Y_subset,
        bin_min=bin_min,
        bin_max=bin_max,
        num_bins=60,
        trace_name1="All parameters vary",
        trace_name2="Only influential vary",
        color1="#636EFA",
        color2="#EF553B",
        opacity=0.65,
        xaxes_title_text=val.model_output_name,
        showtitle=True,
    )
    fig.update_yaxes(range=[0,25])
    fig.write_image(
        val.create_figure_Y_all_Y_inf_histogram_filepath(
            parameter_choice_ranked.shape[0], tag, "pdf"
        ).as_posix()
    )

In [None]:
%%time
fig_format = ["pdf"]
spearman_yy = []
num_ranked_max = 25
num_ranked_arr = np.hstack(
    [
        np.arange(1,10),
        np.arange(10,num_ranked_max,2)
    ]
)
tag = "TotalRanked.{}".format("graph")
if __name__ == "__main__":
    for num_ranked in num_ranked_arr:
        print(num_ranked)
        parameter_choice_ranked = parameter_choice_inf[total_argsort][:num_ranked]
        Y_subset = val.get_influential_Y_from_parameter_choice(influential_inputs=parameter_choice_ranked, tag=tag)
        s, _ = spearmanr(val.Y_all, Y_subset)
        spearman_yy.append(s)
        fig=val.plot_correlation_Y_all_Y_inf(
            Y_subset, num_influential=parameter_choice_ranked.shape[0], tag=tag, fig_format=fig_format,
        )
        plot_hist_val(Y_subset, parameter_choice_ranked)
        
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=num_ranked_arr,
        y=spearman_yy,
        mode="markers+lines",
        showlegend=False,
        marker_color = color_blue_rgb,
    ),
)
fig.update_xaxes(title='Number of varying influential inputs')
fig.update_yaxes(title='Spearman correlation between Y_all and Y_inf')

In [None]:
%%time
fig_format = ["pdf"]
spearman_yy = []
num_ranked_max = 25
num_ranked_arr = np.hstack(
    [
        np.arange(1,10),
        np.arange(10,num_ranked_max,2)
    ]
)
tag = "TotalRanked.{}".format("protocol")
if __name__ == "__main__":
    for num_ranked in num_ranked_arr:
        print(num_ranked)
        parameter_choice_ranked = parameter_choice_inf[total_argsort][:num_ranked]
        Y_subset = val.get_influential_Y_from_parameter_choice(influential_inputs=parameter_choice_ranked, tag=tag)
        s, _ = spearmanr(val.Y_all, Y_subset)
        spearman_yy.append(s)
        fig=val.plot_correlation_Y_all_Y_inf(
            Y_subset, num_influential=parameter_choice_ranked.shape[0], tag=tag, fig_format=fig_format,
        )
        plot_hist_val(Y_subset, parameter_choice_ranked)
        
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=num_ranked_arr,
        y=spearman_yy,
        mode="markers+lines",
        showlegend=False,
        marker_color = color_blue_rgb,
    ),
)
fig.update_xaxes(title='Number of varying influential inputs')
fig.update_yaxes(title='Spearman correlation between Y_all and Y_inf')

# Inverted validation plots

In [None]:
%%time
num_ranked = 9
parameter_choice_ranked = parameter_choice_inf[total_argsort][:num_ranked]
parameter_choice_ranked_inv = np.setdiff1d(np.arange(model.num_params), parameter_choice_ranked)

if __name__ == "__main__":
    tag = "TotalRankedInv"
    Y_subset = val.get_influential_Y_from_parameter_choice(influential_inputs=parameter_choice_ranked_inv, tag=tag)

fig=val.plot_correlation_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_ranked.shape[0], tag=tag)
fig.show()

fig=val.plot_histogram_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_ranked.shape[0], tag=tag)
fig.show()

In [None]:
%%time
num_ranked = 23
parameter_choice_ranked = parameter_choice_inf[total_argsort][:num_ranked]
parameter_choice_ranked_inv = np.setdiff1d(np.arange(model.num_params), parameter_choice_ranked)

if __name__ == "__main__":
    tag = "TotalRankedInv"
    Y_subset = val.get_influential_Y_from_parameter_choice(influential_inputs=parameter_choice_ranked_inv, tag=tag)

fig=val.plot_correlation_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_ranked.shape[0], tag=tag)
fig.show()

fig=val.plot_histogram_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_ranked.shape[0], tag=tag)
fig.show()