In [1]:
import brightway2 as bw
from pathlib import Path
from gsa_framework import LCAModel
from gsa_framework.validation import Validation
from gsa_framework.utils_setac_lca import *
from gsa_framework.utils import read_hdf5_array

Using environment variable BRIGHTWAY2_DIR for data directory:
/data/user/kim_a/Brightway3


In [2]:
path_base = Path('/data/user/kim_a/setac_gsa/')
path_merlin = path_base / "merlin"
filepath_scores_lsa_dict = path_merlin / "scores_lsa_dict.pickle"
filepath_params_yes_0 = path_merlin / "params_yes_0.pickle"
filepath_params_yes_6 = path_merlin / "params_yes_6.pickle"

with open(filepath_scores_lsa_dict, 'rb') as f:
    scores_lsa_dict = pickle.load(f)
    
with open(filepath_params_yes_0, 'rb') as f:
    params_yes_0 = pickle.load(f)
    
with open(filepath_params_yes_6, 'rb') as f:
    params_yes_6 = pickle.load(f)

In [5]:
def compute_influential_Y(scores_lca_dict, params_yes_0, params_yes_inf, iterations_validation=500):

    print("Number of influential parameters is {}".format(params_yes_inf.shape[0]))
    
    # LCA model
    bw.projects.set_current("GSA for setac")
    co = bw.Database("CH consumption 1.0")
    demand_act = [act for act in co if "Food" in act["name"]][0]
    demand = {demand_act: 1}
    method = ("IPCC 2013", "climate change", "GTP 100a")
    write_dir = path_base
    model = LCAModel(demand, method, write_dir)

    # Define some variables
    seed = 923458
    num_params = len(model)
    bin_min, bin_max = 2300, 3300

    default_amounts, uncertain_tech_params_where = get_amounts_means(
        model.lca.tech_params
    )
    static_mean_score = get_static_score(
        default_amounts, uncertain_tech_params_where, model.lca
    )
    validation = Validation(
        model,
        iterations=iterations_validation,
        seed=seed,
        default_x=default_amounts,
        write_dir=write_dir,
    )

    params_yes_inf_where = np.array([np.where(params_yes_0==p)[0][0] for p in params_yes_inf])
    
    filepath_influential_Y = path_base / "arrays" / \
        "validation.influential.Y.{}.{}.{}.hdf5".format(iterations_validation,params_yes_inf.shape[0],seed)
    if filepath_influential_Y.exists():
        influential_Y = read_hdf5_array(filepath_influential_Y).flatten()
    else:
        influential_Y = validation.get_influential_Y_from_parameter_choice(
            params_yes_inf_where
        )
    
    influential_Y = read_hdf5_array(filepath_influential_Y).flatten()
    validation.plot_histogram(base_y=validation.base_Y, influential_y=influential_Y)

In [6]:
# iterations_validation = 2000
# compute_influential_Y(scores_lsa_dict, params_yes_0, params_yes_6, iterations_validation)

# Validation after regression

In [8]:
%%time
model_seed = 3333
path_model_dir = path_base / "regression" / "{}_model".format(model_seed)
model, params_yes_xgboost = get_xgboost_params(path_model_dir, params_yes_0)

params_yes_xgboost.sort()
iterations_validation = 2000
compute_influential_Y(scores_lsa_dict, params_yes_0, params_yes_xgboost, iterations_validation)

Number of influential parameters is 8722


CPU times: user 27min 39s, sys: 5.43 s, total: 27min 44s
Wall time: 28min 1s


# Validation after regression for only 60 parameters

In [None]:
%%time
model_seed = 3333
path_model_dir = path_base / "regression" / "{}_model".format(model_seed)
model, params_yes_xgboost = get_xgboost_params(path_model_dir, params_yes_0)

num_influential = 60
params_yes_inf = params_yes_xgboost[:num_influential_influential]

params_yes_inf.sort()
iterations_validation = 2000
compute_influential_Y(scores_lsa_dict, params_yes_0, params_yes_inf, iterations_validation)