In [None]:
import bw2data as bd
import bw2calc as bc
import numpy as np
from pathlib import Path

from gsa_framework.models import LCAModel
from gsa_framework.convergence_robustness_validation import Validation
from gsa_framework.utils import read_pickle

from setups_paper_gwp import setup_lca_model_protocol

In [None]:
path_base = Path('/data/user/kim_a')
num_params = None
model, write_dir, gsa_seed = setup_lca_model_protocol(
    path_base,
    num_params=num_params,
    write_dir=None,
)

# 1. Run MC when everything varies, 408k inputs
### tech=186k, bio=222k, cf=71 

In [None]:
default_uncertain_amounts = np.hstack([
    v for v in model.default_uncertain_amounts.values()
])

In [None]:
%%time
iterations_validation = 300
validation_seed = 100023423
lca_scores_axis_title = r"$\text{LCA scores, [kg CO}_2\text{-eq}]$"
if __name__ == "__main__":
    val = Validation(
        model=model,
        iterations=iterations_validation,
        seed=validation_seed,
        default_x_rescaled=default_uncertain_amounts,
        write_dir=write_dir,
        model_output_name=lca_scores_axis_title,
    )    

# 2. Run MC after LSA step, where NON-influential are removed,198k inputs
### tech=186k, bio=12238, cf=68

In [None]:
path_lsa = model.write_dir / "LSA_scores"
path_lsa_include_inds_bio = path_lsa / "include_inds_bio.pickle"
include_inds_bio = read_pickle(path_lsa_include_inds_bio)
path_lsa_include_inds_cf = path_lsa / "include_inds_cf.pickle"
include_inds_cf = read_pickle(path_lsa_include_inds_cf)

include_inds_tech_forX = np.arange(model.uncertain_exchange_lengths['tech'])
include_inds_bio_forX  = model.uncertain_exchange_lengths['tech'] + include_inds_bio
include_inds_cf_forX   = model.uncertain_exchange_lengths['tech'] + \
                         model.uncertain_exchange_lengths['bio']  + include_inds_cf
parameter_choice_rm_noninf = np.hstack(
    [include_inds_tech_forX, include_inds_bio_forX, include_inds_cf_forX]
)

In [None]:
%%time
if __name__ == "__main__":
    tag = "LocalSA_rm_noninf"
    Y_subset = val.get_influential_Y_from_parameter_choice(influential_inputs=parameter_choice_rm_noninf, tag=tag)
    
# fig=val.plot_correlation_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_rm_noninf.shape[0], tag=tag)
# fig.show()

# fig=val.plot_histogram_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_rm_noninf.shape[0], tag=tag)
# fig.show()

# 3. Run MC after LSA step, where LOWly influential are removed,
10k, 15k and 20k inputs


In [None]:
# # Make sure  that the chosen num_params in LCA are appropriate
# scores_dict = model.get_lsa_scores_pickle(model.write_dir / "LSA_scores")
# num_params_lsa = 10000 #10000
# where_high_var = model.get_where_high_var(scores_dict, num_params_lsa)
# parameter_choice_rm_lowinf = parameter_choice_rm_noninf[where_high_var]

# if __name__ == "__main__":
#     tag = "LocalSA_rm_lowinf"
#     Y_subset = val.get_influential_Y_from_parameter_choice(influential_inputs=parameter_choice_rm_lowinf, tag=tag)
    
# fig=val.plot_correlation_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_rm_lowinf.shape[0], tag=tag)
# fig.show()

# fig=val.plot_histogram_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_rm_lowinf.shape[0], tag=tag)
# fig.show()

In [None]:
# # Make sure  that the chosen num_params in LCA are appropriate
# scores_dict = model.get_lsa_scores_pickle(model.write_dir / "LSA_scores")
# num_params_lsa = 15000 #10000
# where_high_var = model.get_where_high_var(scores_dict, num_params_lsa)
# parameter_choice_rm_lowinf = parameter_choice_rm_noninf[where_high_var]

# if __name__ == "__main__":
#     tag = "LocalSA_rm_lowinf"
#     Y_subset = val.get_influential_Y_from_parameter_choice(influential_inputs=parameter_choice_rm_lowinf, tag=tag)
    
# fig=val.plot_correlation_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_rm_lowinf.shape[0], tag=tag)
# fig.show()

# fig=val.plot_histogram_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_rm_lowinf.shape[0], tag=tag)
# fig.show()

In [None]:
# Make sure  that the chosen num_params in LCA are appropriate
scores_dict = model.get_lsa_scores_pickle(model.write_dir / "LSA_scores")
num_params_lsa = 20000 #10000
where_high_var = model.get_where_high_var(scores_dict, num_params_lsa)
parameter_choice_rm_lowinf = parameter_choice_rm_noninf[where_high_var]

if __name__ == "__main__":
    tag = "LocalSA_rm_lowinf"
    Y_subset = val.get_influential_Y_from_parameter_choice(influential_inputs=parameter_choice_rm_lowinf, tag=tag)
    
# fig=val.plot_correlation_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_rm_lowinf.shape[0], tag=tag)
# fig.show()

# fig=val.plot_histogram_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_rm_lowinf.shape[0], tag=tag)
# fig.show()

# 4. Run MC for GSA

### DASK

In [None]:
# import dask
# from dask.distributed import Client, LocalCluster
# from dask_jobqueue import SLURMCluster
# from pathlib import Path

In [None]:
# which_pc = "merlin_protocol_gsa"
# if 'merlin' in which_pc:
#     path_dask_logs = Path('/data/user/kim_a/dask_logs')
#     path_dask_logs.mkdir(parents=True, exist_ok=True)
#     cluster = SLURMCluster(cores     = 8, 
#                            memory    ='30GB', 
#                            walltime  = '10:00:00',
#                            interface ='ib0',
#                            local_directory = path_dask_logs.as_posix(),
#                            log_directory   = path_dask_logs.as_posix(),
#                            queue="daily",
#                            ) 
# elif 'local' in which_pc:
#     cluster = LocalCluster(memory_limit='7GB') 

In [None]:
# client = Client(cluster)

In [None]:
# n_workers = 20
# cluster.scale(n_workers)

In [None]:
# cluster.close()


### GSA

In [None]:
import bw2data as bd
import bw2calc as bc
import numpy as np
from pathlib import Path

from gsa_framework.models import LCAModel
from gsa_framework.convergence_robustness_validation import Validation
from gsa_framework.utils import read_pickle

from setups_paper_gwp import *

In [None]:
n_workers = 20

path_base = Path('/data/user/kim_a')
num_params = 20000
iter_corr = 4*num_params
gsa_corr = setup_corr(num_params, iter_corr, setup_lca_model_protocol, path_base)
n_workers_corr = n_workers

options = {
    'corr': {
        "iterations": iter_corr,
        "n_workers":  n_workers_corr,
    },
}

In [None]:
# write_X_chunks(gsa_corr, n_workers_corr)

In [None]:
# # Compute model outputs
# task_per_worker = dask.delayed(compute_scores_per_worker)
# model_evals = []
# for option,dict_ in options.items():
#     iterations = dict_["iterations"]
#     n_workers = dict_["n_workers"]
#     for i in range(n_workers):
#         print(option, num_params, iterations, i, n_workers)
#         model_eval = task_per_worker(option, num_params, iterations, i, n_workers, setup_lca_model_protocol, path_base)
#         model_evals.append(model_eval)
# model_evals

In [None]:
# %%time
# dask.compute(model_evals)

# 5. Perform GSA on 20k parameters
### tech=17'199, bio=2'771, cf=30

### Degree of linearity = 0.98/1, SRC coefficients

In [None]:
# %%time
# from sklearn.linear_model import LinearRegression
# from gsa_framework.utils import read_hdf5_array

# X = read_hdf5_array(gsa_corr.filepath_X_rescaled)
# Y = generate_model_output_from_chunks(gsa_corr, n_workers_corr)
# reg_model = LinearRegression()
# reg_model.fit(X, Y)

In [None]:
# src_4x = reg_model.coef_ * np.std(X, axis=0) / np.std(Y)
# np.sum(src_4x**2)

### Spearman correlations

In [None]:
# len_tech = model.uncertain_exchange_lengths['tech']
# where_tech = np.where(parameter_choice_rm_lowinf<len_tech)[0]

# len_bio = model.uncertain_exchange_lengths['bio']
# where_bio = np.where(np.logical_and(
#     parameter_choice_rm_lowinf>=len_tech,
#     parameter_choice_rm_lowinf<len_tech+len_bio,
# ))[0]

# len_cf = model.uncertain_exchange_lengths['cf']
# where_cf = np.where(np.logical_and(
#     parameter_choice_rm_lowinf>=len_tech+len_bio,
#     parameter_choice_rm_lowinf<len_tech+len_bio+len_cf,
# ))[0]
# where_tech.shape, where_bio.shape, where_cf.shape

In [None]:
S = gsa_corr.perform_gsa()
spearman = S['spearman']

In [None]:
# fig = gsa_corr.plot_sa_results({'Spearman': np.abs(spearman)})

# 6. Run MC with 100 and 50 most influential

In [None]:
# scores_dict = model.get_lsa_scores_pickle(model.write_dir / "LSA_scores")
# num_params_lsa = 20000 #10000
# where_high_var = model.get_where_high_var(scores_dict, num_params_lsa)
# parameter_choice_rm_lowinf = parameter_choice_rm_noninf[where_high_var]

# num_influential = 100
# inf_sorted = np.argsort(np.abs(spearman))[::-1]
# parameter_choice_inf = parameter_choice_rm_lowinf[inf_sorted[:num_influential]]

# if __name__ == "__main__":
#     tag = "SpearmanIndex"
#     Y_subset = val.get_influential_Y_from_parameter_choice(influential_inputs=parameter_choice_inf, tag=tag)

# # fig=val.plot_correlation_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_inf.shape[0], tag=tag)
# # fig.show()

# # fig=val.plot_histogram_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_inf.shape[0], tag=tag)
# # fig.show()

In [None]:
scores_dict = model.get_lsa_scores_pickle(model.write_dir / "LSA_scores")
num_params_lsa = 20000 #10000
where_high_var = model.get_where_high_var(scores_dict, num_params_lsa)
parameter_choice_rm_lowinf = parameter_choice_rm_noninf[where_high_var]

num_influential = 50
inf_spearman = np.sort(np.abs(spearman))[::-1]
inf_sorted = np.argsort(np.abs(spearman))[::-1]
parameter_choice_inf = parameter_choice_rm_lowinf[inf_sorted[:num_influential]]

if __name__ == "__main__":
    tag = "SpearmanIndex"
    Y_subset = val.get_influential_Y_from_parameter_choice(influential_inputs=parameter_choice_inf, tag=tag)

fig=val.plot_correlation_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_inf.shape[0], tag=tag)
fig.show()

fig=val.plot_histogram_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_inf.shape[0], tag=tag)
fig.show()

# 7. Influential exchanges

In [None]:
import stats_arrays as sa

len_tech = model.uncertain_exchange_lengths['tech']
where_tech = np.where(parameter_choice_inf<len_tech)[0]
ind_tech = parameter_choice_inf[where_tech]
inf_tech_params = {
    where_tech[i]: {
        "type": "tech",
        "param": model.uncertain_params['tech'][ind_tech[i]]
    }
    for i in range(len(where_tech))
}

len_bio = model.uncertain_exchange_lengths['bio']
where_bio = np.where(np.logical_and(
    parameter_choice_inf>=len_tech,
    parameter_choice_inf<len_tech+len_bio,
))[0]
ind_bio = parameter_choice_inf[where_bio]-len_tech
inf_bio_params = {
    where_bio[i]: {
        "type": "bio",
        "param": model.uncertain_params['bio'][ind_bio[i]]
    }
    for i in range(len(where_bio))
}

len_cf = model.uncertain_exchange_lengths['cf']
where_cf = np.where(np.logical_and(
    parameter_choice_inf>=len_tech+len_bio,
    parameter_choice_inf<len_tech+len_bio+len_cf,
))[0]
ind_cf = parameter_choice_inf[where_cf]-len_tech-len_bio
inf_cf_params = {
    where_cf[i]: {
        "type": "cf",
        "param": model.uncertain_params['cf'][ind_cf[i]]
    }
    for i in range(len(where_cf))
}

params = {**inf_tech_params, **inf_bio_params, **inf_cf_params}
params = {k : params[k] for k in sorted(params)}

TECH_IND = 0
BIO_IND = 2
distributions = {
    sa.NormalUncertainty.id: 'normal',
    sa.LognormalUncertainty.id: 'lognml',
    sa.UniformUncertainty.id: 'unifrm',
}

for rank, dict_ in params.items():
    exchange_type = dict_['type']
    param = dict_['param']
    row = param['row']
    col = param['col']
    print(
        "{:2d}. spearman={:5.3f}, {}, amount={:8.5f}, scale={:5.3f}".format(
            rank, 
            inf_spearman[rank],
            distributions[param['uncertainty_type']],
            param['amount'],
            param['scale'],
        )
    )      
    if exchange_type=='tech':
        act_in = bw.get_activity(model.lca.reverse_dict()[TECH_IND][row])
        act_out = bw.get_activity(model.lca.reverse_dict()[TECH_IND][col])
        print("act out:    {}, {}".format(act_out['name'], act_out['location']))
        print("act  in:    {}, {}, {} \n".format(act_in['name'], act_in['unit'], act_in['location']))
    elif exchange_type=='bio':
        act_in = bw.get_activity(model.lca.reverse_dict()[BIO_IND][row])
        act_out = bw.get_activity(model.lca.reverse_dict()[TECH_IND][col])
        print("act out:    {}, {}".format(act_out['name'], act_out['location']))
        print("act  in:    {}, {} \n".format(act_in['name'], act_in['unit']))
    elif exchange_type=='cf':
        act_in = bw.get_activity(model.lca.reverse_dict()[BIO_IND][row])
        print("GWP of:    {} \n".format(act_in['name'])) 

In [None]:
# Full information on influential params
params

In [None]:
# ei = bd.Database('ecoinvent 3.7.1 cutoff')
# onion_row = [act for act in ei if "onion seedling production, for planting" in act['name'].lower() 
#              and 'RoW'==act['location']][0]
# onion_nz  = [act for act in ei if "onion seedling production, for planting" in act['name'].lower() 
#              and 'NZ'==act['location']][0]

# 8. LCA scores if std in onions is reduced

In [None]:
import bw2data as bd
import bw2calc as bc
import numpy as np
from gsa_framework.models.life_cycle_assessment import LCAModelCall
from gsa_framework.utils import read_hdf5_array, write_hdf5_array

In [None]:
bd.projects.set_current("GSA for protocol")
co = bd.Database("CH consumption 1.0")
demand_act = [act for act in co if "Food" in act["name"]]
assert len(demand_act) == 1
demand_act = demand_act[0]
demand = {demand_act: 1}
method = ("IPCC 2013", "climate change", "GWP 100a", "uncertain")

bio_to_modify = []
for p in params.values():
    param = p['param']
    if p['type']=='bio' and param['uncertainty_type']==3 and np.allclose(param['scale'], 0.7383766):
        bio_to_modify.append(param)
bio_to_modify = bio_to_modify[:1]
dt = model.lca.bio_params.dtype
bio_params_temp = np.array([a for a in model.lca.bio_params], dtype = dt)
bio_params_uncertain = model.lca.bio_params[model.lca.bio_params['uncertainty_type']>1]
bio_params_modified = np.array([a for a in model.lca.bio_params], dtype = dt)
inds = []
inds_uncertain = []
for b in bio_to_modify:
    ind = np.where(np.logical_and(
        bio_params_temp['row']==b['row'],
        bio_params_temp['col']==b['col'],
    ))[0][0]
    inds.append(ind)
    indu = np.where(np.logical_and(
        bio_params_uncertain['row']==b['row'],
        bio_params_uncertain['col']==b['col'],
    ))[0][0]
    inds_uncertain.append(indu)
for ind in inds:
    bio_params_modified[ind]['loc'] = np.log(bio_params_temp[ind]['loc'])
    bio_params_modified[ind]['uncertainty_type'] = 0#sa.LognormalUncertainty.id
#     bio_params_modified[ind]['scale'] = 0

# iterations = 20

In [None]:
inds

In [None]:
modified_params = {
    'tech': model.lca.tech_params,
    'bio': bio_params_modified,
    'cf': model.lca.cf_params,
}
model2 = LCAModelCall(
    demand, 
    method, 
    modified_params,
)
num_params = len(model2)

In [None]:
%%time
tag = 'BioId0'
fp_Y_narrow = "/data/user/kim_a/protocol_gsa/arrays/validation.Y.narrow.{}.{}.{}.hdf5".format(
    iterations_validation, validation_seed, tag
)
fp_Y_narrow = Path(fp_Y_narrow)
np.random.seed(validation_seed)
X = np.random.rand(iterations_validation, num_params)
Xr = model2.rescale(X)
if fp_Y_narrow.exists():
    Y_narrow = read_hdf5_array(fp_Y_narrow).flatten()
else:
    Y_narrow = model2(Xr)
    write_hdf5_array(Y_narrow, fp_Y_narrow)

In [None]:
fig=val.plot_correlation_Y_all_Y_inf(Y_narrow, num_influential=parameter_choice_inf.shape[0], tag=tag)
fig.show()

fig=val.plot_histogram_Y_all_Y_inf(Y_narrow, num_influential=parameter_choice_inf.shape[0], tag=tag)
fig.show()

In [None]:
fig=val.plot_correlation_Y_all_Y_inf(Y_narrow, num_influential=parameter_choice_inf.shape[0], tag=tag)
fig.show()

fig=val.plot_histogram_Y_all_Y_inf(Y_narrow, num_influential=parameter_choice_inf.shape[0], tag=tag)
fig.show()

In [None]:
fig=val.plot_correlation_Y_all_Y_inf(Y_narrow, num_influential=parameter_choice_inf.shape[0], tag=tag)
fig.show()

fig=val.plot_histogram_Y_all_Y_inf(Y_narrow, num_influential=parameter_choice_inf.shape[0], tag=tag)
fig.show()

In [None]:
fp_Xr = "/data/user/kim_a/protocol_gsa/arrays/validation.X.rescaled.all.2000.100023423.hdf5"
Xr_ = read_hdf5_array(fp_Xr)

In [None]:
np.allclose(Xr[1:], Xr_[1:])

In [None]:
xinds = model2.uncertain_exchange_lengths['tech'] + np.array(inds_uncertain)
xinds

In [None]:
np.allclose(Xr[:,xinds], Xr_[:,xinds])

In [None]:
ii = 0
arr1 = Xr[:,xinds][:,ii]
arr2 = Xr_[:,xinds][:,ii]

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# freq, bins = np.histogram(arr1, bins=range(0, 60, 5))

fig = go.Figure()
fig.add_trace(
    go.Histogram(
        x=arr1,
    ),
)
fig.show()

fig = go.Figure()
fig.add_trace(
    go.Histogram(
        x=arr2,
    ),
)
fig.show()

In [None]:
bio = bd.Database('biosphere3')
co2 = [act for act in bio if "carbon dioxide, from soil or biomass stock" in act['name'].lower()]

In [None]:
co2

In [None]:
co2.as_dict()

In [None]:
aa = model.lca.bio_params[model.lca.bio_params['row']==1815]
aa[aa['uncertainty_type']==3]

# 9. Contribution analysis

In [None]:
demand_act = list(model.lca.demand.keys())[0]
list_methods = [model.method]

## from [Chris' notebook](https://github.com/brightway-lca/brightway2/blob/master/notebooks/Contribution%20analysis%20and%20comparison.ipynb)

In [None]:
def print_recursive_calculation(activity, lcia_method, lca_obj=None, total_score=None, amount=1, level=0, max_level=3, cutoff=1e-2):
    if lca_obj is None:
        lca_obj = bc.LCA({activity: amount}, lcia_method)
        lca_obj.lci()
        lca_obj.lcia()
        total_score = lca_obj.score
    elif total_score is None:
        raise ValueError
    else:
        lca_obj.redo_lcia({activity: amount})
        if abs(lca_obj.score) <= abs(total_score * cutoff):
            return
    print("{}{:4.3f} ({:06.4f}): {:.70}".format("  " * level, lca_obj.score / total_score, lca_obj.score, str(activity)))
    if level < max_level:
        for exc in activity.technosphere():
            print_recursive_calculation(
                activity=exc.input, 
                lcia_method=lcia_method, 
                lca_obj=lca_obj, 
                total_score=total_score, 
                amount=amount * exc['amount'], 
                level=level + 1, 
                max_level=max_level, 
                cutoff=cutoff
            )
            
# First number is the percentage of the total score, second number is the actual LCA score
print_recursive_calculation(demand_act, model.method, max_level=8)

## for activities in the food sector

In [None]:
import pandas as pd
import plotly.graph_objects as go

In [None]:
list_fus = []
for exc in demand_act.exchanges():
    if exc['type'] != 'production':
        list_fus.append({exc['input']: exc['amount']})
    
bd.calculation_setups['food_sector_contribution_analysis'] = {'inv':list_fus, 'ia':list_methods}
myMultiLCA = bc.MultiLCA('food_sector_contribution_analysis')
lcia_unit = bd.Method(list_methods[0]).metadata['unit']
fus = [bd.get_activity(list(el.keys())[0])['name'][:] for el in list_fus]
df = pd.DataFrame(index=fus, columns=[lcia_unit], data=myMultiLCA.results)
df['exchange_amount'] = [list(el.values())[0] for el in list_fus]
df['exchange_unit'] = [bd.get_activity(list(el.keys())[0])['unit'] for el in list_fus]
df.sort_values(lcia_unit, ascending=False, inplace=True)

# with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
#     print(df)


# n_exchanges = len(list_fus)
n_exchanges = 40
df_plot = df.iloc[:n_exchanges]
y = df_plot.index.values
x = df_plot['kg CO2-Eq'].values

In [None]:
fig = go.Figure()
fig.add_trace(
    go.Bar(x=x,y=y,orientation='h',)
)
fig.update_layout(
    height=20*n_exchanges,
    width=650,
    margin=dict(t=0,b=0,l=250,r=0),
    yaxis=dict(autorange="reversed"),
    xaxis_title='LCIA scores, [kg CO2-eq]'
)


In [None]:
pd.set_option("max_rows", None)
df