In [None]:
import bw2data as bd
import bw2calc as bc
import numpy as np
from copy import deepcopy
from pathlib import Path
from gsa_framework.models.life_cycle_assessment import LCAModelBase
from gsa_framework.sensitivity_analysis.correlations import Correlations
from gsa_framework.sensitivity_analysis.saltelli_sobol import SaltelliSobol
from gsa_framework.utils import read_pickle, write_hdf5_array
from setups_paper_gwp import setup_corr, setup_lca_model_protocol
# from gsa_framework.utils import read_hdf5_array, write_hdf5_array
# from gsa_framework.visualization.plotting import plot_correlation_Y1_Y2, plot_histogram_Y1_Y2

In [None]:
path_base = Path('/data/user/kim_a')

# LCA model
bd.projects.set_current("GSA for protocol")
co = bd.Database("CH consumption 1.0")
demand_act = [act for act in co if "Food" in act["name"]]
assert len(demand_act) == 1
demand_act = demand_act[0]
demand = {demand_act: 1}
method = ("IPCC 2013", "climate change", "GWP 100a", "uncertain")

num_params = 20000
iter_corr = 4*num_params
gsa_corr = setup_corr(num_params, iter_corr, setup_lca_model_protocol, path_base)
S = gsa_corr.perform_gsa()
spearman = S['spearman']
inf_sorted = np.argsort(np.abs(spearman))[::-1]

model, write_dir, gsa_seed = setup_lca_model_protocol(
    path_base,
    num_params=None,
    write_dir=None,
)

path_lsa = model.write_dir / "LSA_scores"
path_lsa_include_inds_bio = path_lsa / "include_inds_bio.pickle"
include_inds_bio = read_pickle(path_lsa_include_inds_bio)
path_lsa_include_inds_cf = path_lsa / "include_inds_cf.pickle"
include_inds_cf = read_pickle(path_lsa_include_inds_cf)

include_inds_tech_forX = np.arange(model.uncertain_exchange_lengths['tech'])
include_inds_bio_forX  = model.uncertain_exchange_lengths['tech'] + include_inds_bio
include_inds_cf_forX   = model.uncertain_exchange_lengths['tech'] + \
                         model.uncertain_exchange_lengths['bio']  + include_inds_cf
parameter_choice_rm_noninf = np.hstack(
    [include_inds_tech_forX, include_inds_bio_forX, include_inds_cf_forX]
)

scores_dict = model.get_lsa_scores_pickle(model.write_dir / "LSA_scores", model.uncertain_exchanges_types)
num_params_lsa = 20000 #10000
where_high_var = model.get_where_high_var(scores_dict, num_params_lsa)
parameter_choice_rm_lowinf = parameter_choice_rm_noninf[where_high_var]

num_params_ranking = 200
gsa_seed_ranking = 555666
iterations_ranking = num_params_ranking * 400
parameter_choice_inf = parameter_choice_rm_lowinf[inf_sorted[:num_params_ranking]]
parameter_choice_inf = np.sort(parameter_choice_inf)

In [None]:
len(parameter_choice_inf)

# Graph traversal screening

In [None]:
import numpy as np
import bw2data as bd
import bw2calc as bc

from dev.utils_graph_traversal import \
    filter_uncertain_technosphere_exchanges, \
    filter_uncertain_biosphere_exchanges, \
    filter_uncertain_characterization_exchanges, \
    collect_uncertain_exchanges


num_params = 200

bd.projects.set_current("GSA for protocol")
co = bd.Database("CH consumption 1.0")
demand_act = [act for act in co if "Food" in act["name"]]
assert len(demand_act) == 1
demand_act = demand_act[0]
demand = {demand_act: 1}
method = ("IPCC 2013", "climate change", "GWP 100a", "uncertain")
lca = bc.LCA(demand, method)
lca.lci()
lca.lcia()

cutoff = 0.007 # percentage of the total score, cutoff=0.005 means 0.5 percent
score_cutoff = cutoff*lca.score

# tech_inds = filter_technosphere_exchanges(lca, cutoff)
tech_inds_uncertain = filter_uncertain_technosphere_exchanges(lca, cutoff)
# bio_inds  = filter_biosphere_exchanges(lca, cutoff)
bio_inds_uncertain = filter_uncertain_biosphere_exchanges(lca, cutoff)
# ch_inds = filter_characterization_exchanges(lca, cutoff)
cf_inds_uncertain = filter_uncertain_characterization_exchanges(lca, cutoff)

exchanges_dict = {
    'tech': tech_inds_uncertain,
    'bio': bio_inds_uncertain,
    'cf': cf_inds_uncertain,
}

subset_exchanges_dict = collect_uncertain_exchanges(exchanges_dict, num_params)

row_ind = 0
col_ind = 1
parameter_choice_dict = {}
for uncertain_exchange_type, exc_list in subset_exchanges_dict.items():
    params = model.uncertain_params[uncertain_exchange_type]
    where_list = []
    if uncertain_exchange_type != 'cf':
        for exc in exc_list:
            where = np.where(np.logical_and(
                params['row'] == exc[row_ind],
                params['col'] == exc[col_ind],
            ))[0]
            assert len(where)==1
            where_list.append(where[0])
    else:
        for exc in exc_list:
            where = np.where(params['row'] == exc[row_ind])[0]
            assert len(where) == 1
            where_list.append(where[0])
    where_list = sorted(where_list)
    parameter_choice_dict[uncertain_exchange_type] = where_list

In [None]:
parameter_choice_inf_tech = parameter_choice_dict['tech']
parameter_choice_inf_bio  = parameter_choice_dict['bio'] + model.uncertain_exchange_lengths['tech']
parameter_choice_inf_cf   = parameter_choice_dict['cf'] + model.uncertain_exchange_lengths['tech'] + model.uncertain_exchange_lengths['bio']

parameter_choice_inf_graph = parameter_choice_inf_tech + parameter_choice_inf_bio + parameter_choice_inf_cf

# Compare our screening with graph traversal

In [None]:
parameter_choice_inf, parameter_choice_inf_graph

# GSA for prioritized list

In [None]:
parameter_choice_inf_tech = parameter_choice_inf[
    parameter_choice_inf < model.uncertain_exchange_lengths['tech']
]
tech_inds = parameter_choice_inf_tech
tech_params = model.uncertain_params['tech'][tech_inds]
tech_where = model.uncertain_params_selected_where_dict['tech'][tech_inds]

parameter_choice_inf_bio = parameter_choice_inf[
    np.logical_and(
        parameter_choice_inf >= model.uncertain_exchange_lengths['tech'],
        parameter_choice_inf < model.uncertain_exchange_lengths['bio']+model.uncertain_exchange_lengths['tech'],
    )  
]
bio_inds = parameter_choice_inf_bio-model.uncertain_exchange_lengths['tech']
bio_params = model.uncertain_params['bio'][bio_inds]
bio_where = model.uncertain_params_selected_where_dict['bio'][bio_inds]

parameter_choice_inf_cf = parameter_choice_inf[
    parameter_choice_inf >= model.uncertain_exchange_lengths['bio']+model.uncertain_exchange_lengths['tech'],
]
cf_inds = parameter_choice_inf_cf-model.uncertain_exchange_lengths['tech']-model.uncertain_exchange_lengths['bio']
cf_params = model.uncertain_params['cf'][cf_inds]
cf_where = model.uncertain_params_selected_where_dict['cf'][cf_inds]

uncertain_params = {
    'tech': tech_params,
    'bio': bio_params,
    'cf': cf_params,
}
uncertain_params_selected_where_dict = {
    'tech': tech_where,
    'bio': bio_where,
    'cf': cf_where,    
}

In [None]:
if __name__ == "__main__":    
    model_ranking = LCAModelBase(
        demand, 
        method, 
        uncertain_params, 
        uncertain_params_selected_where_dict,
    )
    gsa = SaltelliSobol(iterations=iterations_ranking, model=model_ranking, write_dir=write_dir)
#     S_salt = gsa.perform_gsa()
#     fig = gsa.plot_sa_results(S_salt)

In [None]:
iterations_ranking, gsa.filepath_Y

In [None]:
%%time
X = gsa.generate_unitcube_samples(iterations_ranking)
Xr = model_ranking.rescale(X)
Y = model_ranking(Xr)
write_hdf5_array(Y, gsa.filepath_Y)