In [None]:
import numpy as np
from pathlib import Path
import bw2data as bd
import bw2calc as bc
import stats_arrays as sa
import scipy.stats as stats
from gsa_framework.utils import read_pickle, write_pickle
from gsa_framework.models.life_cycle_assessment import LCAModelBase
from gsa_framework.convergence_robustness_validation import Validation
from gsa_framework.sensitivity_analysis.correlations import Correlations
from dev.utils_graph_traversal import filter_uncertain_technosphere_exchanges

from dev.utils_local_sa import get_bio_params_local_sa, get_cf_params_local_sa, get_tech_params_local_sa
from dev.setups_paper_gwp import setup_lca_model_protocol

import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
if __name__ == "__main__":

#     path_base = Path(
#         "/Users/akim/PycharmProjects/gsa-framework-master/dev/write_files/"
#     )
    path_base = Path('/data/user/kim_a')
    write_dir = path_base / "realistic_gsa"
    write_dir.mkdir(exist_ok=True,parents=True)

    bd.projects.set_current("GSA for protocol")
    co = bd.Database("CH consumption 1.0")
    demand_act = [act for act in co if "Food" in act["name"]]
    assert len(demand_act) == 1
    demand_act = demand_act[0]
    demand = {demand_act: 1}
    uncertain_method = ("IPCC 2013", "climate change", "GWP 100a", "uncertain")
    lca = bc.LCA(demand, uncertain_method)
    lca.lci()
    lca.lcia()
    print(lca.score)
    
    # Technosphere
    max_calc = 1e+16
    cutoff = 1e-16
    tech_params_sct_filename = "tech_params_cutoff{:1.0e}_maxcalc{:1.0e}.pickle".format(cutoff, int(max_calc))
    tech_params_sct_filepath = write_dir / tech_params_sct_filename
    if not tech_params_sct_filepath.exists():
        tech_params_sct = filter_uncertain_technosphere_exchanges(lca, cutoff=cutoff, max_calc=max_calc)
        write_pickle(tech_params_sct, tech_params_sct_filepath)
    else:
        tech_params_sct = read_pickle(tech_params_sct_filepath)

In [None]:
def add_uncertainty_measure(params, q_range=0.95):
    """Generate uncertainty measures for all params."""

    from stats_arrays import MCRandomNumberGenerator

    mc = MCRandomNumberGenerator(params)
    params = params[mc.ordering]
    
    dt = np.dtype([
        ('row', '<u4'), 
        ('col', '<u4'), 
        ('std2mean', '<f4'), 
        ('std2mean_maxnormalized', '<f4'), 
#         ('var2mean', '<f4'), 
#         ('range2mean', '<f4'), 
        ('uncertainty_type', 'u1'),
        ('contribution', '<f4'),
        ('contribution_maxnormalized', '<f4'),
    ])

    params_uncertainty_measure = np.zeros(len(params), dtype=dt)
    
    q_low = (1-q_range)/2
    q_high = q_low + q_range

    offset = 0
    for uncertainty_type in mc.choices:
        numparams = mc.positions[uncertainty_type]
        if not numparams:
            continue
        current_params = params[offset:numparams + offset]
        params_uncertainty_measure[offset:numparams + offset]['row'] = current_params['row']
        params_uncertainty_measure[offset:numparams + offset]['col'] = current_params['col']
        params_uncertainty_measure[offset:numparams + offset]['uncertainty_type'] = \
            current_params['uncertainty_type']
        if uncertainty_type == sa.LognormalUncertainty:
            s_normal = current_params['scale']
            m_normal = current_params['loc']
            s = np.sqrt( (np.exp(s_normal**2)-1) * np.exp(2*m_normal + s_normal**2) )
            s = s_normal
            m = np.exp(m_normal + (s_normal**2)/2)
            range_ = stats.lognorm.ppf(q_high,s=s_normal, scale=np.exp(m_normal)) - \
                     stats.lognorm.ppf(q_low, s=s_normal, scale=np.exp(m_normal))
        elif uncertainty_type == sa.NormalUncertainty:
            s = current_params['scale']
            m = current_params['loc']
            range_ = stats.norm.ppf(q_high, loc=m, scale=s) - stats.norm.ppf(q_low, loc=m, scale=s)
        elif uncertainty_type == sa.UniformUncertainty:
            min_ = current_params['minimum']
            max_ = current_params['maximum']
            diff = max_ - min_
            s = diff / np.sqrt(12)
            m = current_params['loc']
            range_ = stats.uniform.ppf(q_high, loc=min_, scale=diff) - stats.uniform.ppf(q_low, loc=min_, scale=diff)
        elif uncertainty_type == sa.TriangularUncertainty:
            a = current_params['minimum']
            b = current_params['maximum']
            c = current_params['loc']
            diff = b - a
            m = (a+b+c)/3
            s = np.sqrt((a**2 + b**2 + c**2 - a*b - b*c - a*c)/18)
            range_ = stats.triang.ppf(q_high, c=(c-a)/diff, loc=a, scale=diff) - \
                     stats.triang.ppf(q_low,  c=(c-a)/diff, loc=a, scale=diff)
        else:
            offset += numparams
            continue

        params_uncertainty_measure[offset:numparams + offset]['std2mean'] = np.abs(s/m)
#         params_uncertainty_measure[offset:numparams + offset]['var2mean'] = np.abs((s**2)/m)
#         params_uncertainty_measure[offset:numparams + offset]['range2mean'] = np.abs(range_/m)

        offset += numparams

    params_uncertainty_measure = params_uncertainty_measure[np.argsort(mc.ordering)]
    
    return params_uncertainty_measure

In [None]:
dt = np.dtype([
    ('row', '<u4'), 
    ('col', '<u4'),
])

params_row_col = np.zeros(len(lca.tech_params), dt)
params_row_col[['row', 'col']] = [(p['row'], p['col']) for p in lca.tech_params]

tech_params_sct_row_col = np.zeros(len(tech_params_sct), dt)
tech_params_sct_row_col[['row', 'col']] = [(p[0], p[1]) for p in tech_params_sct]

where_inds = np.where(np.in1d(params_row_col, tech_params_sct_row_col))[0]
input_params = lca.tech_params[where_inds]

params_uncertain = add_uncertainty_measure(input_params)
order = np.argsort(params_uncertain['std2mean'])[-1::-1]

# num_params = -1
selected_tech_params = params_uncertain[order]

tech_params_sct_dict = {(p[0], p[1]): p[2] for p in tech_params_sct}
for p in selected_tech_params:
    p['contribution'] = tech_params_sct_dict[(p['row'], p['col'])]
selected_tech_params['std2mean_maxnormalized'] = \
    selected_tech_params['std2mean'] / max(selected_tech_params['std2mean'])
selected_tech_params['contribution_maxnormalized'] = \
    selected_tech_params['contribution'] / max(selected_tech_params['contribution'])

uncertainty_measure = 0*selected_tech_params['std2mean'] + 1*selected_tech_params['contribution']

num_params = 2000
selected_tech_params_sorted = selected_tech_params[np.argsort(uncertainty_measure)[-1::-1][:num_params]]

where_selected_tech = where_inds[order][np.argsort(uncertainty_measure)[-1::-1][:num_params]]
where_selected_tech.sort()

# Local SA

In [None]:
%%time 

const_factors = [1/10, 10]

# Technosphere wrt tech_params
tech_params_c = get_tech_params_local_sa(
    where_selected_tech, 
    lca, 
    write_dir, 
    const_factors, 
    tag="sct{}".format(num_params),
)

# Biosphere wrt bio_params
bio_params_c = get_bio_params_local_sa(lca, write_dir, const_factors)

# Characterization wrt cf_params
cf_params_c = get_cf_params_local_sa(lca, write_dir, const_factors)

# Validation

In [None]:
scores_dict = {
    'tech': {key: val['scores'] for key, val in tech_params_c.items()},
    'bio': {key: val['scores'] for key, val in bio_params_c.items()},
    'cf': {key: val['scores'] for key, val in cf_params_c.items()},
}

In [None]:
def get_where_high_var(scores_dict, lca_score, num_params):
    vals = np.zeros([0, 3])
    for scores_dict_exchange_type in scores_dict.values():
        vals_temp = np.array(list(scores_dict_exchange_type.values()))
        vals_temp = np.hstack(
            [vals_temp, np.tile(lca_score, (len(vals_temp), 1))]
        )
        vals = np.vstack([vals, vals_temp])
    # Variance of LSA scores for each input / parameter
    var = np.var(vals, axis=1)
    where_high_var = np.argsort(var)[::-1][:num_params]
    assert np.all(var[where_high_var] > 0)
    where_high_var = np.sort(where_high_var)
    return where_high_var

In [None]:
num_params_inf = 5000
where_high_var = get_where_high_var(scores_dict, lca.score, num_params_inf)

In [None]:
where_tech = np.where(lca.tech_params['uncertainty_type']>1)[0]
where_bio = np.where(lca.bio_params['uncertainty_type']>1)[0]
where_cf = np.where(lca.cf_params['uncertainty_type']>1)[0]

len_tech = len(scores_dict['tech'])
len_bio = len(scores_dict['bio'])
len_cf = len(scores_dict['cf'])

where_tech_inf = where_high_var[where_high_var < len_tech]

where_bio_inf = where_high_var[
    np.logical_and(
        where_high_var >= len_tech,
        where_high_var < len_tech + len_bio,
    )
] - len_tech

where_cf_inf = where_high_var[where_high_var >= len_tech + len_bio]  - len_tech - len_bio

where_params_tech_inf = np.array(list(scores_dict['tech'].keys()))[where_tech_inf]
where_params_bio_inf = np.array(list(scores_dict['bio'].keys()))[where_bio_inf]
where_params_cf_inf = np.array(list(scores_dict['cf'].keys()))[where_cf_inf]

In [None]:
uncertain_params_selected_where_dict = {
    'tech': where_tech,
    'bio': where_bio,
    'cf': where_cf,
}

uncertain_params = {
    'tech': lca.tech_params[where_tech],
    'bio': lca.bio_params[where_bio],
    'cf': lca.cf_params[where_cf],
}

model = LCAModelBase(
    demand,
    uncertain_method,
    uncertain_params,
    uncertain_params_selected_where_dict,
)

In [None]:
tech_params_inf = lca.tech_params[where_params_tech_inf]
tech_uparams = lca.tech_params[where_tech]
include_inds_tech_inf_forX = []
for p in tech_params_inf:
    w = np.where(
        np.logical_and(
            tech_uparams['row'] == p['row'],
            tech_uparams['col'] == p['col'],
        )
    )[0]
    assert len(w)==1
    tech_ind = w[0]
    include_inds_tech_inf_forX.append(tech_ind)  
include_inds_tech_inf_forX = np.array(include_inds_tech_inf_forX) 

bio_params_inf  = lca.bio_params[where_params_bio_inf]
bio_uparams = lca.bio_params[where_bio]
include_inds_bio_inf_forX = []
for p in bio_params_inf:
    w = np.where(
        np.logical_and(
            bio_uparams['row'] == p['row'],
            bio_uparams['col'] == p['col'],
        )
    )[0]
    assert len(w)==1
    bio_ind = w[0]
    include_inds_bio_inf_forX.append(bio_ind)
include_inds_bio_inf_forX = np.array(include_inds_bio_inf_forX) + model.uncertain_exchange_lengths['tech']

cf_params_inf  = lca.cf_params[where_params_cf_inf]
cf_uparams = lca.cf_params[where_cf]
include_inds_cf_inf_forX = []
for p in cf_params_inf:
    w = np.where(
        cf_uparams['row'] == p['row'],
    )[0]
    assert len(w)==1
    cf_ind = w[0]
    include_inds_cf_inf_forX.append(cf_ind)
include_inds_cf_inf_forX = np.array(include_inds_cf_inf_forX) + \
                            model.uncertain_exchange_lengths['tech'] + model.uncertain_exchange_lengths['bio']

parameter_choice_inf = np.hstack(
    [include_inds_tech_inf_forX, include_inds_bio_inf_forX, include_inds_cf_inf_forX]
)

In [None]:
%%time

default_uncertain_amounts = np.hstack([
    v for v in model.default_uncertain_amounts.values()
])

iterations_validation = 2000
validation_seed = 200300400
lca_scores_axis_title = r"$\text{LCA scores, [kg CO}_2\text{-eq}]$"
if __name__ == "__main__":
    val = Validation(
        model=model,
        iterations=iterations_validation,
        seed=validation_seed,
        default_x_rescaled=default_uncertain_amounts,
        write_dir=write_dir,
        model_output_name=lca_scores_axis_title,
    )

In [None]:
if __name__ == "__main__":
    tag = "localSA"
    Y_subset = val.get_influential_Y_from_parameter_choice(influential_inputs=parameter_choice_inf, tag=tag)
    
fig=val.plot_correlation_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_inf.shape[0], tag=tag)
fig.show()

# Spearman correlations with dask

### Dask

In [None]:
# import dask
# from dask.distributed import Client, LocalCluster
# from dask_jobqueue import SLURMCluster
# from pathlib import Path
# from dev.setups_paper_gwp import write_X_chunks

In [None]:
# which_pc = "merlin_protocol_gsa"
# if 'merlin' in which_pc:
#     path_dask_logs = Path('/data/user/kim_a/dask_logs')
#     path_dask_logs.mkdir(parents=True, exist_ok=True)
#     cluster = SLURMCluster(cores     = 8, 
#                            memory    ='20GB', 
#                            walltime  = '04:00:00',
#                            interface ='ib0',
#                            local_directory = path_dask_logs.as_posix(),
#                            log_directory   = path_dask_logs.as_posix(),
#                            queue="daily",
#                            ) 
# elif 'local' in which_pc:
#     cluster = LocalCluster(memory_limit='7GB') 

In [None]:
# client = Client(cluster)

In [None]:
# n_workers = 20
# cluster.scale(n_workers)

In [None]:
# client

In [None]:
# cluster.close()

### GSA

In [None]:
def setup_lca_model_realistic(
    i,
    n_workers,
    path_base, 
    num_params=None, 
    write_dir=None, 
):
    # LCA model
    bd.projects.set_current("GSA for protocol")
    co = bd.Database("CH consumption 1.0")
    demand_act = [act for act in co if "Food" in act["name"]]
    assert len(demand_act) == 1
    demand_act = demand_act[0]
    demand = {demand_act: 1}
    method = ("IPCC 2013", "climate change", "GWP 100a", "uncertain")
    lca = bc.LCA(demand, method)
    lca.lci()
    lca.lcia()
    # Define some variables
    if write_dir is None:
        write_dir = path_base / "realistic_gsa"
        
    gsa_seed = 700800900
    
    dirpath_uparams = write_dir / "arrays" / "Y.randomSampling.20000.{}.None".format(gsa_seed)
    filepath_uparams = dirpath_uparams / "uparams_where_{}_{}.pickle".format(i, n_workers)
    uncertain_params_selected_where_dict = read_pickle(filepath_uparams)
        
    uncertain_params = {
        'tech': lca.tech_params[uncertain_params_selected_where_dict['tech']],
        'bio': lca.bio_params[uncertain_params_selected_where_dict['bio']],
        'cf': lca.cf_params[uncertain_params_selected_where_dict['cf']],
    }

    model = LCAModelBase(
        demand,
        method,
        uncertain_params,
    )
    
    return model, write_dir, gsa_seed

In [None]:
%%time

iterations = 4*num_params_inf

# from gsa_framework.sensitivity_analysis.correlations import Correlations

# uncertain_params_selected_where_dict = {
#     'tech': where_params_tech_inf,
#     'bio': where_params_bio_inf,
#     'cf': where_params_cf_inf,
# }
# uncertain_params = {
#     'tech': lca.tech_params[where_params_tech_inf],
#     'bio': lca.bio_params[where_params_bio_inf],
#     'cf': lca.cf_params[where_params_cf_inf],
# }

# model_screening = LCAModelBase(
#     demand,
#     uncertain_method,
#     uncertain_params,
# )

n_workers = 20

model_screening, write_dir, gsa_seed = setup_lca_model_realistic(0, n_workers, path_base)

gsa_corr = Correlations(
    iterations=iterations,
    model=model_screening,
    write_dir=write_dir,
    seed=gsa_seed,
)

options = {
    'corr': {
        "iterations": iterations,
        "n_workers":  n_workers,
    },
}

In [None]:
# def write_uparams_where(dirpath_Y, uncertain_params_selected_where_dict, n_workers):
#     for i in range(n_workers):
#         filepath_i = dirpath_Y / "uparams_where_{}_{}.pickle".format(
#             i, n_workers
#         )
#         write_pickle(uncertain_params_selected_where_dict, filepath_i)

# # gsa_corr.dirpath_Y.mkdir(parents=True, exist_ok=True)
# # write_X_chunks(gsa, n_workers_corr)
# write_uparams_where(gsa_corr.dirpath_Y, uncertain_params_selected_where_dict, n_workers)

In [None]:
# def compute_scores_per_worker(
#     option, num_params, iterations, i_worker, n_workers, setup_model, path_base
# ):
#     model_screening, write_dir, gsa_seed = setup_model(i_worker, n_workers, path_base, num_params)
#     from gsa_framework.sensitivity_analysis.correlations import Correlations
#     # Setup GSA
#     gsa = Correlations(
#         iterations=iterations,
#         model=model_screening,
#         write_dir=write_dir,
#         seed=gsa_seed,
#     )
#     gsa.dirpath_Y.mkdir(parents=True, exist_ok=True)
#     filepath_X_chunk = gsa.dirpath_Y / "X.unitcube.{}.{}.pickle".format(
#         i_worker, n_workers
#     )
#     X_chunk_unitcube = read_pickle(filepath_X_chunk)
#     X_chunk_rescaled = gsa.model.rescale(X_chunk_unitcube)
#     del X_chunk_unitcube
#     scores = gsa.model(X_chunk_rescaled)
#     Y_filename = "{}.{}.pickle".format(i_worker, n_workers)
#     filepath = gsa.dirpath_Y / Y_filename
#     write_pickle(scores, filepath)
#     return scores

In [None]:
# # Compute model outputs
# task_per_worker = dask.delayed(compute_scores_per_worker)
# model_evals = []
# for option,dict_ in options.items():
#     iterations = dict_["iterations"]
#     n_workers = dict_["n_workers"]
#     for i in range(n_workers):
#         print(option, num_params_inf, iterations, i, n_workers)
#         model_eval = task_per_worker(
#             option, 
#             num_params_inf, 
#             iterations, 
#             i, 
#             n_workers, 
#             setup_lca_model_realistic, 
#             path_base
#         )
#         model_evals.append(model_eval)

In [None]:
# %%time
# dask.compute(model_evals)

In [None]:
# from dev.setups_paper_gwp import generate_model_output_from_chunks
# generate_model_output_from_chunks(gsa_corr, n_workers)

# Spearman correlations without dask

In [None]:
# %%time
#
# filename_Y = "Y.randomSampling.{}.{}.700800900".format(num_params_inf, iterations, gsa_seed)
# filepath_Y = gsa.filepath_Y.parent / filename_Y
# print(filepath_Y)

# X = gsa.generate_unitcube_samples(iterations)
# Xr = model_screening.rescale(X)
# Y = model_screening(Xr)
# write_hdf5_array(Y, filepath_Y)

In [None]:
S = gsa_corr.perform_gsa()
spearman = S['spearman']
argsort_spearman = np.argsort(spearman)[-1::-1]

In [None]:
spearman.shape, parameter_choice_inf.shape

In [None]:
spearman[argsort_spearman]

# Validation

In [None]:
num_params_after_spearman = 20
parameter_choice_spearman = parameter_choice_inf[argsort_spearman][:num_params_after_spearman]
parameter_choice_spearman.sort()
parameter_choice_spearman

In [None]:
if __name__ == "__main__":
    tag = "spearman"
    Y_subset = val.get_influential_Y_from_parameter_choice(influential_inputs=parameter_choice_spearman, tag=tag)
    
fig=val.plot_correlation_Y_all_Y_inf(Y_subset, num_influential=parameter_choice_inf.shape[0], tag=tag)
fig.show()

In [None]:
tech = model.uncertain_params['tech'][parameter_choice_spearman[:8]]
rdict = lca.reverse_dict()
for p in tech:
    act_in = bd.get_activity(rdict[0][p['row']])
    act_out = bd.get_activity(rdict[0][p['col']])
    print("FROM {}".format(act_in['name']))
    print("TO {}\n".format(act_out['name']))

In [None]:
bio = model.uncertain_params['bio'][parameter_choice_spearman[8:17] - len(model.uncertain_params['tech'])]
rdict = lca.reverse_dict()
for p in bio:
    act_in = bd.get_activity(rdict[2][p['row']])
    act_out = bd.get_activity(rdict[0][p['col']])
    print("FROM {}".format(act_in['name']))
    print("TO {}\n".format(act_out['name']))

In [None]:
cf = model.uncertain_params['cf'][ parameter_choice_spearman[17:] - len(model.uncertain_params['tech'])
                                   - len(model.uncertain_params['bio'])]
rdict = lca.reverse_dict()
for p in cf:
    act_in = bd.get_activity(rdict[2][p['row']])
    print("{}\n".format(act_in['name']))

In [None]:
act_in