# Setups

In [None]:
import bw2data as bd
import bw2calc as bc
import numpy as np
from copy import deepcopy
from pathlib import Path
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import stats_arrays as sa
from scipy.stats import spearmanr

from gsa_framework.models.life_cycle_assessment import LCAModelBase
from gsa_framework.sensitivity_analysis.correlations import Correlations
from gsa_framework.sensitivity_analysis.saltelli_sobol import SaltelliSobol
from gsa_framework.utils import read_pickle, write_pickle, write_hdf5_array, read_hdf5_array
from setups_paper_gwp import setup_corr, setup_lca_model_protocol
# from gsa_framework.utils import read_hdf5_array, write_hdf5_array
# from gsa_framework.visualization.plotting import plot_correlation_Y1_Y2, plot_histogram_Y1_Y2

from decimal import Decimal

In [None]:
path_base = Path('/data/user/kim_a')

# LCA model
bd.projects.set_current("GSA for protocol")
co = bd.Database("CH consumption 1.0")
demand_act = [act for act in co if "Food" in act["name"]]
assert len(demand_act) == 1
demand_act = demand_act[0]
demand = {demand_act: 1}
method = ("IPCC 2013", "climate change", "GWP 100a", "uncertain")

num_params = 20000
iter_corr = 4*num_params
gsa_corr = setup_corr(num_params, iter_corr, setup_lca_model_protocol, path_base)
S = gsa_corr.perform_gsa()
spearman = S['spearman']
spearman_sorted = np.argsort(np.abs(spearman))[::-1]

model, write_dir, gsa_seed = setup_lca_model_protocol(
    path_base,
    num_params=None,
    write_dir=None,
)

write_dir_arr = write_dir / "arrays"
write_dir_sct = write_dir / "supply_chain"

path_lsa = model.write_dir / "LSA_scores"
path_lsa_include_inds_bio = path_lsa / "include_inds_bio.pickle"
include_inds_bio = read_pickle(path_lsa_include_inds_bio)
path_lsa_include_inds_cf = path_lsa / "include_inds_cf.pickle"
include_inds_cf = read_pickle(path_lsa_include_inds_cf)

include_inds_tech_forX = np.arange(model.uncertain_exchange_lengths['tech'])
include_inds_bio_forX  = model.uncertain_exchange_lengths['tech'] + include_inds_bio
include_inds_cf_forX   = model.uncertain_exchange_lengths['tech'] + \
                         model.uncertain_exchange_lengths['bio']  + include_inds_cf
parameter_choice_rm_noninf = np.hstack(
    [include_inds_tech_forX, include_inds_bio_forX, include_inds_cf_forX]
)

scores_dict = model.get_lsa_scores_pickle(model.write_dir / "LSA_scores", model.uncertain_exchanges_types)
num_params_lsa = 20000 #10000
where_high_var = model.get_where_high_var(scores_dict, num_params_lsa)
parameter_choice_rm_lowinf = parameter_choice_rm_noninf[where_high_var]

num_params_ranking = 200
gsa_seed_ranking = 555666
iterations_ranking = num_params_ranking * 1600
parameter_choice_inf_lsa = parameter_choice_rm_lowinf[spearman_sorted[:num_params_ranking]]
parameter_choice_inf_lsa = np.sort(parameter_choice_inf_lsa)

# Graph traversal screening

In [None]:
import numpy as np
import bw2data as bd
import bw2calc as bc

from dev.utils_graph_traversal import \
    filter_uncertain_technosphere_exchanges, \
    filter_uncertain_biosphere_exchanges, \
    filter_uncertain_characterization_exchanges, \
    collect_uncertain_exchanges

write_dir_sct = write_dir / 'supply_chain'

bd.projects.set_current("GSA for protocol")
co = bd.Database("CH consumption 1.0")
demand_act = [act for act in co if "Food" in act["name"]]
assert len(demand_act) == 1
demand_act = demand_act[0]
demand = {demand_act: 1}
method = ("IPCC 2013", "climate change", "GWP 100a", "uncertain")
lca = bc.LCA(demand, method)
lca.lci()
lca.lcia()

def graph_traversal_screening(lca, model, write_dir, cutoff, num_params_ranking):
    
    cutoff_str = '%.2E' % Decimal(cutoff)
    filename = "cutoff{}.params{}.pickle".format(cutoff_str, num_params_ranking)
    filepath = write_dir / filename
    
    if filepath.exists():
        data = read_pickle(filepath)
    else:
        exchanges_dict_filename = "exchanges_dict.cutoff{}.pickle".format(cutoff_str)
        exchanges_dict_filepath = write_dir / exchanges_dict_filename
        if exchanges_dict_filepath.exists():
            exchanges_dict = read_pickle(exchanges_dict_filepath)
            tech_inds_uncertain = exchanges_dict['tech']
            bio_inds_uncertain = exchanges_dict['bio']
            cf_inds_uncertain = exchanges_dict['cf']
        else:
            # tech_inds = filter_technosphere_exchanges(lca, cutoff)
            tech_inds_uncertain = filter_uncertain_technosphere_exchanges(lca, cutoff, 1e8)
            # bio_inds  = filter_biosphere_exchanges(lca, cutoff)
            bio_inds_uncertain = filter_uncertain_biosphere_exchanges(lca, cutoff)
            # ch_inds = filter_characterization_exchanges(lca, cutoff)
            cf_inds_uncertain = filter_uncertain_characterization_exchanges(lca, cutoff)
            exchanges_dict = {
                'tech': tech_inds_uncertain,
                'bio': bio_inds_uncertain,
                'cf': cf_inds_uncertain,
            }
            write_pickle(exchanges_dict, exchanges_dict_filepath)

        num_params_all = len(tech_inds_uncertain) + len(bio_inds_uncertain) + len(cf_inds_uncertain)
        subset_exchanges_dict_all = collect_uncertain_exchanges(exchanges_dict, num_params_all)
        
        subset_exchanges_dict = collect_uncertain_exchanges(exchanges_dict, num_params_ranking)

        row_ind = 0
        col_ind = 1
        sdicts = {
            'ranking': subset_exchanges_dict,
            'all': subset_exchanges_dict_all,
        }
        data = {}
        for sname, sdict in sdicts.items():
            parameter_choice_dict = {}
            for uncertain_exchange_type, exc_list in sdict.items():
                params = model.uncertain_params[uncertain_exchange_type]
                where_list = []
                if uncertain_exchange_type != 'cf':
                    for exc in exc_list:
                        where = np.where(np.logical_and(
                            params['row'] == exc[row_ind],
                            params['col'] == exc[col_ind],
                        ))[0]
                        assert len(where)==1
                        where_list.append(where[0])
                else:
                    for exc in exc_list:
                        where = np.where(params['row'] == exc[row_ind])[0]
                        assert len(where) == 1
                        where_list.append(where[0])
                where_list = sorted(where_list)
                parameter_choice_dict[uncertain_exchange_type] = np.array(where_list)

            parameter_choice_inf_tech = np.array(parameter_choice_dict['tech'])
            parameter_choice_inf_bio  = np.array(parameter_choice_dict['bio']) + model.uncertain_exchange_lengths['tech']
            parameter_choice_inf_cf   = np.array(parameter_choice_dict['cf']) + model.uncertain_exchange_lengths['tech'] + model.uncertain_exchange_lengths['bio']

            parameter_choice_inf_graph = np.hstack(
                [parameter_choice_inf_tech, parameter_choice_inf_bio, parameter_choice_inf_cf]
            )
            data[sname] = {
                "parameter_choice_dict": parameter_choice_dict, 
                "parameter_choice_inf_graph": parameter_choice_inf_graph
            }
            write_pickle(data, filepath)
    return data

# GSA results for paper2, prioritized list

In [None]:
cutoff = 1e-4
num_params_ranking = 200
data = graph_traversal_screening(lca, model, write_dir_sct, cutoff, num_params_ranking)
parameter_choice_inf_sct = data['ranking']['parameter_choice_inf_graph']

print(parameter_choice_inf_sct.shape, parameter_choice_inf_lsa.shape)
np.intersect1d(parameter_choice_inf_sct, parameter_choice_inf_lsa).shape

In [None]:
from gsa_framework.sensitivity_methods.saltelli_sobol import sobol_indices

filepath_Y_saltelli_sct = write_dir_arr / "Y.saltelliSampling.319968.None.sct.hdf5"
filepath_Y_saltelli_lsa = write_dir_arr / "Y.saltelliSampling.319968.None.lsa.hdf5"

S_sct = sobol_indices(filepath_Y_saltelli_sct, num_params_ranking)
S_lsa = sobol_indices(filepath_Y_saltelli_lsa, num_params_ranking)

# Prioritized list

In [None]:
option='localSA'
if option=='localSA':
    S_salt = S_lsa
    parameter_choice_inf = parameter_choice_inf_lsa
    method_name = "Local SA rank"
    gsa_name = "Local SA Sobol total"
elif option == "sct":
    S_salt = S_sct
    parameter_choice_inf = parameter_choice_inf_sct
    method_name = "Contributions rank"
    gsa_name = "Contributions Sobol total"

total = S_salt['Total order']
total_argsort = np.argsort(total)[::-1]
total_sorted = total[total_argsort]
first_sorted = S_salt['First order'][total_argsort]


In [None]:
num_ranked =200

parameter_choice_ranked = parameter_choice_inf[total_argsort][:num_ranked]

len_tech = model.uncertain_exchange_lengths['tech']
where_tech = np.where(parameter_choice_ranked<len_tech)[0]
ind_tech = parameter_choice_ranked[where_tech]
inf_tech_params = {
    where_tech[i]: {
        "type": "tech",
        "param": model.uncertain_params['tech'][ind_tech[i]]
    }
    for i in range(len(where_tech))
}

len_bio = model.uncertain_exchange_lengths['bio']
where_bio = np.where(np.logical_and(
    parameter_choice_ranked>=len_tech,
    parameter_choice_ranked<len_tech+len_bio,
))[0]
ind_bio = parameter_choice_ranked[where_bio]-len_tech
inf_bio_params = {
    where_bio[i]: {
        "type": "bio",
        "param": model.uncertain_params['bio'][ind_bio[i]]
    }
    for i in range(len(where_bio))
}

len_cf = model.uncertain_exchange_lengths['cf']
where_cf = np.where(np.logical_and(
    parameter_choice_ranked>=len_tech+len_bio,
    parameter_choice_ranked<len_tech+len_bio+len_cf,
))[0]
ind_cf = parameter_choice_ranked[where_cf]-len_tech-len_bio
inf_cf_params = {
    where_cf[i]: {
        "type": "cf",
        "param": model.uncertain_params['cf'][ind_cf[i]]
    }
    for i in range(len(where_cf))
}

params = {**inf_tech_params, **inf_bio_params, **inf_cf_params}
params = {k : params[k] for k in sorted(params)}

TECH_IND = 0
BIO_IND = 2
distributions = {
    sa.NormalUncertainty.id: 'normal',
    sa.LognormalUncertainty.id: 'lognml',
    sa.UniformUncertainty.id: 'unifrm',
}

data = []
for rank, dict_ in params.items():
    exchange_type = dict_['type']
    param = dict_['param']
    row = param['row']
    col = param['col']
    
#     print(
#         "{:2d}. total={:5.3f}, {}, amount={:8.5f}, scale={:5.3f}".format(
#             rank, 
#             total_sorted[rank],
#             distributions[param['uncertainty_type']],
#             param['amount'],
#             param['scale'],
#         )
#     )      
    if exchange_type=='tech':
        act_in = bd.get_activity(model.lca.reverse_dict()[TECH_IND][row])
        act_out = bd.get_activity(model.lca.reverse_dict()[TECH_IND][col])
#         print("act out:    {}, {}".format(act_out['name'], act_out['location']))
#         print("act  in:    {}, {}, {} \n".format(act_in['name'], act_in['unit'], act_in['location']))
        unit = act_in['unit']
        act_in_str = "{:4s}   {}, {}".format("from", act_in['name'], act_in['location'])
        act_out_str = "{:4s}   {}, {}".format("to", act_out['name'], act_out['location'])
        exchange = "{} {}".format(act_in_str, act_out_str)
    elif exchange_type=='bio':
        act_in = bd.get_activity(model.lca.reverse_dict()[BIO_IND][row])
        act_out = bd.get_activity(model.lca.reverse_dict()[TECH_IND][col])
#         print("act out:    {}, {}".format(act_out['name'], act_out['location']))
#         print("act  in:    {}, {} \n".format(act_in['name'], act_in['unit']))
        unit = act_in['unit']
        act_in_str = "{:4s}   {}".format("from", act_in['name'], act_in)
        act_out_str = "{:4s}   {}, {}".format("to", act_out['name'], act_out['location'])
        exchange = "{} {}".format(act_in_str, act_out_str)
    elif exchange_type=='cf':
        act_in = bd.get_activity(model.lca.reverse_dict()[BIO_IND][row])
#         print("GWP of:    {} \n".format(act_in['name'])) 
        unit = "kg CO2e"
        act_in_str = "{:4s}   {}".format("", act_in['name'])
        exchange = act_in_str
        
    if param['uncertainty_type']==sa.LognormalUncertainty.id:
        distr = "lognormal"
    elif param['uncertainty_type']==sa.NormalUncertainty.id:
        distr = "normal"
    elif param['uncertainty_type']==sa.UniformUncertainty.id:
        distr = "uniform"
    elif param['uncertainty_type']==sa.TriangularUncertainty.id:
        distr = "triangular"
    else:
        distr=param['uncertainty_type']
        
    dict_ = {
        method_name: int(rank + 1),
        'Type': exchange_type,
        "Exchange": exchange,
        "Amount": "{:6.5e} {}".format(param['amount'], unit),
        "Distribution": distr,
        "Standard deviation": "{:e}".format(param['scale']),
        "Shape": "{:6.5e}".format(param['shape']),
        "Minimum": "{:6.5e}".format(param['minimum']),
        "Maximum": "{:6.5e}".format(param['maximum']),
        gsa_name: "{:6.5e}".format(total_sorted[rank]),
    }
    data.append(dict_)
#     if exchange_type=='tech' or exchange_type=='bio':
#         dict_to = {
#             "Exchange": act_out_str,
#         }
#         data.append(dict_to)
    

In [None]:
import pandas as pd
df = pd.DataFrame(data).fillna("")
df

In [None]:
df.to_excel('b.xlsx')

In [None]:
ei_name = 'ecoinvent 3.7.1 cutoff'
bi = bd.Database("biosphere3")
ei = bd.Database(ei_name)
co = bd.Database('CH consumption 1.0')

In [None]:
list(demand_act.exchanges())

# 0. Methane non-fossil

In [None]:
[act for act in bi if 'Methane, non-fossil' in act['name']]

In [None]:
cols = lca.bio_params[lca.bio_params['row']==878]['col']
a = []
for col in cols:
    act = bd.get_activity(lca.reverse_dict()[0][col])
    a.append(act['name'])
# list(set(a))

# 1. Dinitrogen monoxide

In [None]:
cols = lca.bio_params[lca.bio_params['row']==772]['col']
a = []
for col in cols:
    act = bd.get_activity(lca.reverse_dict()[0][col])
    a.append(act['name'])
list(set(a))

# 2. Onion seedling

In [None]:
onion_seedling = [act for act in ei if "onion seedling production, for planting" in act['name'] and 'RoW'==act['location']]
assert len(onion_seedling)==1
onion_seedling = onion_seedling[0]
onion_seedling

In [None]:
lca.activity_dict[onion_seedling.key]

In [None]:
cols = lca.tech_params[lca.tech_params['row']==16002]['col']
lca.tech_params[lca.tech_params['row']==16002]

In [None]:
for col in cols:
    act = bd.get_activity(lca.reverse_dict()[0][col])
    print(act)

In [None]:
list(bd.get_activity(lca.reverse_dict()[0][19345]).exchanges())

In [None]:
onions_co = [act for act in co if "Onions" in act['name']][0]
lca_onions = bc.LCA({onions_co: 0.6488975239934204}, method)
lca_onions.lci()
lca_onions.lcia()
lca_onions.score / lca.score * 100

In [None]:
other_fruitv = [act for act in co if "Other fruiting ve" in act['name']][0]
bell_pepper = [act for act in ei if "market for bell pepper"  in act['name']][0]
for exc in list(bell_pepper.exchanges()):
    print(exc.input, exc.amount)
bell_pepper_inup = [act for act in ei if "bell pepper production"  in act['name'] and 'IN-UP' in act['location']][0]
bell_pepper_inmh = [act for act in ei if "bell pepper production"  in act['name'] and 'IN-MH' in act['location']][0]

lca_other_fruitv = bc.LCA({other_fruitv: 1.8643742157872023}, method)
lca_other_fruitv.lci()
lca_other_fruitv.lcia()
lca_other_fruitv.score / lca.score * 100

lca_bell_pepper = bc.LCA({bell_pepper: 1.8643742157872023*0.098767731120006}, method)
lca_bell_pepper.lci()
lca_bell_pepper.lcia()
lca_bell_pepper.score / lca.score * 100

# 3. Maize silage

In [None]:
maize = [ act for act in ei if 'maize silage production' == act['name'] and 'RoW'==act['location']][0]
lca.activity_dict[maize.key]

In [None]:
cols = lca.tech_params[lca.tech_params['row']==4425]['col']
lca.tech_params[lca.tech_params['row']==4425]

In [None]:
for col in cols:
    act = bd.get_activity(lca.reverse_dict()[0][col])
    print(act)

# 4. China

In [None]:
peach = [ act for act in ei if 'peach production' == act['name'] and 'CN'==act['location']][0]
lca.activity_dict[peach.key]

In [None]:
rows = lca.tech_params[lca.tech_params['col']==7685]['row']
lca.tech_params[lca.tech_params['col']==7685]

In [None]:
for row in rows:
    act = bd.get_activity(lca.reverse_dict()[0][row])
    print(act)

# 7. Oranges

In [None]:
orange = [act for act in ei if "orange production, fresh grade" in act['name'] and 'RoW'==act['location']][0]

In [None]:
lca.activity_dict[orange.key]

In [None]:
cols = lca.tech_params[lca.tech_params['row']==1312]['col']
lca.tech_params[lca.tech_params['row']==1312]

In [None]:
for col in cols:
    act = bd.get_activity(lca.reverse_dict()[0][col])
    print(act)

In [None]:
oranges_citrus = [act for act in co if 'Oranges and other citrus fruits' in act['name']][0]
fruit_juices = [act for act in co if 'Fruit juices' in act['name']][0]

In [None]:
lca_oranges_citrus = bc.LCA({oranges_citrus: 2.1714586942450325}, method)
lca_oranges_citrus.lci()
lca_oranges_citrus.lcia()
print(lca_oranges_citrus.score / lca.score * 100)

lca_fruit_juices = bc.LCA({fruit_juices: 3.9009085829780012}, method)
lca_fruit_juices.lci()
lca_fruit_juices.lcia()
print(lca_fruit_juices.score / lca.score * 100)

In [None]:
0.19721621980302698 + 0.3843171769770443

In [None]:
cheese = [act for act in ei if "milk production, from cow" in act['name'] and 'RoW' in act['location']][0]
cheese

In [None]:
list(cheese.exchanges())

In [None]:
data = []

In [None]:
def print_recursive_calculation(data, activity, lcia_method, lca_obj=None, total_score=None, amount=1, level=0, max_level=3, cutoff=1e-2):
    if lca_obj is None:
        lca_obj = bc.LCA({activity: amount}, lcia_method)
        lca_obj.lci()
        lca_obj.lcia()
        total_score = lca_obj.score
    elif total_score is None:
        raise ValueError
    else:
        lca_obj.redo_lcia({activity: amount})
        if abs(lca_obj.score) <= abs(total_score * cutoff):
            return
#     print("{}{:6.5f} ({:6.4f}): {}".format("    " * level, lca_obj.score / total_score, lca_obj.score, str(activity)))
    dict_ = {
        "level {}".format(level): lca_obj.score / total_score,
        "LCA score": lca_obj.score,
        "activity name": activity['name'],
        "activity unit": activity['unit'],
        "activity location": activity['location'],
    }
    data.append(dict_)
    if level < max_level:
        for exc in activity.technosphere():
            print_recursive_calculation(
                data = data,
                activity=exc.input, 
                lcia_method=lcia_method, 
                lca_obj=lca_obj, 
                total_score=total_score, 
                amount=amount * exc['amount'], 
                level=level + 1, 
                max_level=max_level, 
                cutoff=cutoff
            )
            
# First number is the percentage of the total score, second number is the actual LCA score
print_recursive_calculation(data, demand_act, model.method, max_level=12, cutoff=1e-4)

In [None]:
# import pandas as pd
# df = pd.DataFrame(data).fillna('')
# columns = list(df.columns)
# df = df[columns[0:1] + columns[5:] + columns[1:5]]
# df.to_excel("a.xlsx")