In [None]:
import bw2data as bd
import bw2calc as bc
import bw2io as bi
import stats_arrays as sa
import numpy as np
from copy import deepcopy
from pathlib import Path

from gsa_framework.models import LCAModel
from gsa_framework.convergence_robustness_validation import Validation
from gsa_framework.utils import read_pickle, read_hdf5_array
from gsa_framework.visualization.plotting import *

from setups_paper_gwp import *

project = "GSA for protocol narrow bio"
# bd.projects.delete_project(project, delete_dir=True)
bd.projects.set_current(project)

# 1. Narrow down all incorrect bio exchanges

In [None]:
# bi = bd.Database("biosphere3")
# ei = bd.Database("ecoinvent 3.7.1 cutoff")

# co = bd.Database("CH consumption 1.0")
# demand_act = [act for act in co if "Food" in act["name"]]
# assert len(demand_act) == 1
# demand_act = demand_act[0]
# demand = {demand_act: 1}
# method = ("IPCC 2013", "climate change", "GWP 100a", "uncertain")
# lca = bc.LCA(demand, method)
# lca.lci()
# lca.lcia()
# lca.score

In [None]:
# bio_params_normal = lca.bio_params[np.logical_and(
#     lca.bio_params['uncertainty_type']==sa.NormalUncertainty.id,
#     lca.bio_params['scale']==0.7383766,
# )]

# cols = bio_params_normal['col']
# acts = [bd.get_activity(lca.reverse_dict()[0][col]) for col in cols]

In [None]:
# assert len(set(bio_params_normal['row'])) == 1
# co2_ind = bio_params_normal['row'][0]
# co2_act = bd.get_activity(lca.reverse_dict()[2][co2_ind])
# co2_act.key

In [None]:
# for act in acts:
#     print(act['name'])
#     count = 0
#     for exc in act.exchanges():
#         if exc.input == co2_act and exc['uncertainty type']==sa.NormalUncertainty.id:
#             dict_ = {
#                 "input": (co2_act['database'], co2_act['code']), 
#                 "amount": deepcopy(exc.amount),
#                 "type": "biosphere",
#                 "uncertainty type": sa.LognormalUncertainty.id,
#                 "loc": np.log(exc['loc']),
#                 "scale": exc['scale'],
#             }
#             count += 1
#             break
#     assert count == 1
#     act.new_exchange(**dict_).save()
#     print("   deleting {}".format(exc.input['name']))
#     exc.delete()

In [None]:
# lca2 = bc.LCA(demand, method)
# lca2.lci()
# lca2.lcia()
# print(lca2.score)

# bio_params_normal2 = lca2.bio_params[np.logical_and(
#     lca2.bio_params['uncertainty_type']==sa.NormalUncertainty.id,
#     lca2.bio_params['scale']==0.7383766,
# )]
# print(len(bio_params_normal2))

# bio_params_lognormal = lca2.bio_params[lca2.bio_params['scale']==0.7383766]
# len(bio_params_lognormal)

In [None]:
# bio_params_lognormal

In [None]:
# lca2.bio_params[lca2.bio_params['row']==co2_ind]

# Run MC when everything varies

In [None]:
path_base = Path('/data/user/kim_a')
num_params = None
model, write_dir, gsa_seed = setup_lca_model_protocol_narrow_bio(
    path_base,
    num_params=num_params,
    write_dir=None,
)

In [None]:
default_uncertain_amounts = np.hstack([
    v for v in model.default_uncertain_amounts.values()
])

In [None]:
%%time
iterations_validation = 2000
validation_seed = 100023423
lca_scores_axis_title = r"$\text{LCA scores, [kg CO}_2\text{-eq}]$"
if __name__ == "__main__":
    val = Validation(
        model=model,
        iterations=iterations_validation,
        seed=validation_seed,
        default_x_rescaled=default_uncertain_amounts,
        write_dir=write_dir,
        model_output_name=lca_scores_axis_title,
    ) 

In [None]:
fp_original = Path("/data/user/kim_a/protocol_gsa/arrays") / "validation.Y.all.2000.100023423.hdf5"
Y_all_original = read_hdf5_array(fp_original).flatten()

fig=plot_histogram_Y1_Y2(Y_all_original, val.Y_all)
fig.show()

# Run MC for screening

In [None]:
# import dask
# from dask.distributed import Client, LocalCluster
# from dask_jobqueue import SLURMCluster
# from pathlib import Path

In [None]:
# which_pc = "merlin_gsa_protocol_narrow_bio"
# if 'merlin' in which_pc:
#     path_dask_logs = Path('/data/user/kim_a/dask_logs')
#     path_dask_logs.mkdir(parents=True, exist_ok=True)
#     cluster = SLURMCluster(cores     = 8, 
#                            memory    ='30GB', 
#                            walltime  = '10:00:00',
#                            interface ='ib0',
#                            local_directory = path_dask_logs.as_posix(),
#                            log_directory   = path_dask_logs.as_posix(),
#                            queue="daily",
#                            ) 
# elif 'local' in which_pc:
#     cluster = LocalCluster(memory_limit='7GB') 

In [None]:
# client = Client(cluster)

In [None]:
# n_workers = 80
# cluster.scale(n_workers)

In [None]:
# client

In [None]:
# cluster.close()

In [None]:
n_workers = 80

path_base = Path('/data/user/kim_a')
num_params = 20000
iter_corr = 4*num_params
gsa_corr = setup_corr(num_params, iter_corr, setup_lca_model_protocol_narrow_bio, path_base)
n_workers_corr = n_workers

options = {
    'corr': {
        "iterations": iter_corr,
        "n_workers":  n_workers_corr,
    },
}

In [None]:
write_X_chunks(gsa_corr, n_workers_corr)

In [None]:
# Compute model outputs
task_per_worker = dask.delayed(compute_scores_per_worker)
model_evals = []
for option,dict_ in options.items():
    iterations = dict_["iterations"]
    n_workers = dict_["n_workers"]
    for i in range(n_workers):
        print(option, num_params, iterations, i, n_workers)
        model_eval = task_per_worker(
            option, 
            num_params, 
            iterations, 
            i, 
            n_workers, 
            setup_lca_model_protocol_narrow_bio, 
            path_base
        )
        model_evals.append(model_eval)

In [None]:
# %%time
dask.compute(model_evals)

In [None]:
Y = generate_model_output_from_chunks(gsa_corr, n_workers)

In [None]:
S = gsa_corr.perform_gsa()
spearman = S['spearman']