# Run MC with narrowed distributions

In [None]:
from gsa_framework.utils import read_hdf5_array, write_hdf5_array
from pathlib import Path
import brightway2 as bw
from copy import deepcopy
from gsa_framework.utils_setac_lca import get_xgboost_params, plot_base_narrow_Y
import pickle
from gsa_framework.lca import LCAModelSetac
import numpy as np

num_params_narrow = 60
scaling_factor = 4
model_seed = 3333

# path_base = Path("/Users/akim/PycharmProjects/gsa_framework/dev/write_files/paper_gsa/")
path_base = Path('/data/user/kim_a/')
w = path_base / "setac_gsa"
path_merlin = path_setac / "merlin"

# 1. LCA setup
# LCA model
iterations = 500
bw.projects.set_current("GSA for setac")
co = bw.Database("CH consumption 1.0")
demand_act = [act for act in co if "Food" in act["name"]][0]
demand = {demand_act: 1}
method = ("IPCC 2013", "climate change", "GTP 100a")
lca = bw.LCA(demand, method)
lca.lci()
lca.lcia()
print(lca.score)

num_params = len(lca.tech_params[lca.tech_params["uncertainty_type"] > 1])
print(num_params)

seed = 923458
filename_Y_narrow = "validation.narrow.Y.{}.{}.{}.{}.hdf5".format(
    iterations, num_params_narrow, scaling_factor, seed
)
filepath_Y_narrow = path_setac / "arrays" / filename_Y_narrow


In [None]:
%%time
if filepath_Y_narrow.exists():
    narrow_Y = read_hdf5_array(filepath_Y_narrow).flatten()
else:
    tech_params_narrow = deepcopy(lca.tech_params)
    # Read params_yes from xgboost model
    filepath_params_yes_0 = path_merlin / "params_yes_0.pickle"
    with open(filepath_params_yes_0, "rb") as f:
        params_yes_0 = pickle.load(f)
    path_model_dir = path_setac / "regression" / "{}_model".format(model_seed)
    model, params_yes_xgboost, _ = get_xgboost_params(path_model_dir, params_yes_0)
    params_yes = params_yes_xgboost[:num_params_narrow]
    dt = lca.tech_params.dtype
    tech_params_narrow = np.array([a for a in lca.tech_params], dtype = dt)
    for p in params_yes:
        tech_params_narrow[p]["scale"] = tech_params_narrow[p]["scale"]/scaling_factor

    lca_model = LCAModelSetac(demand, method, tech_params_narrow)

    filename_X_unitcube = "validation.base.X.unitcube.{}.{}.{}.hdf5".format(
        iterations, num_params, seed
    )
    filepath_base_X_unitcube = path_setac / "arrays" / filename_X_unitcube
    if filepath_base_X_unitcube.exists():
        X_unitcube = read_hdf5_array(filepath_base_X_unitcube)
    else:
        np.random.rand(seed)
        X_unitcube = np.random.rand(iterations, num_params)
        write_hdf5_array(X_unitcube, filepath_base_X_unitcube)

    filename_X_rescaled_narrow = "validation.narrow.X.rescaled.{}.{}.{}.{}.hdf5".format(
        iterations, num_params_narrow, scaling_factor, seed
    )
    filepath_X_rescaled_narrow = path_setac / "arrays" / filename_X_rescaled_narrow
    if filepath_X_rescaled_narrow.exists():
        X_rescaled_narrow = read_hdf5_array(filepath_X_rescaled_narrow)
    else:
        X_rescaled_narrow = lca_model.rescale(X_unitcube, filepath_X_rescaled_narrow)
        write_hdf5_array(X_rescaled_narrow, filepath_X_rescaled_narrow)
    narrow_Y = lca_model(X_rescaled_narrow)
    write_hdf5_array(narrow_Y, filepath_Y_narrow)

filename_base_Y = "validation.base.Y.{}.{}.{}.hdf5".format(iterations, num_params, seed)
filepath_base_Y = path_setac / "arrays" / filename_base_Y
base_Y = read_hdf5_array(filepath_base_Y).flatten()
plot_base_narrow_Y(base_Y, narrow_Y)

# Run MC narrowed

In [None]:
import brightway2 as bw
from pathlib import Path
from gsa_framework.lca import LCAModel, LCAModelSetac
from gsa_framework.validation import Validation
from gsa_framework.utils_setac_lca import *
from gsa_framework.utils import read_hdf5_array, write_hdf5_array

In [None]:
iterations = 2000

seed = 923458
model_seed = 3333

num_params_narrow_list = [12,36,60,84]
scaling_factor_list = [2,4,6,8]

In [None]:
path_base = Path('/data/user/kim_a/setac_gsa/')
path_merlin = path_base / "merlin"
filepath_scores_lsa_dict = path_merlin / "scores_lsa_dict.pickle"
filepath_params_yes_0 = path_merlin / "params_yes_0.pickle"
filepath_params_yes_6 = path_merlin / "params_yes_6.pickle"

with open(filepath_scores_lsa_dict, 'rb') as f:
    scores_lsa_dict = pickle.load(f)
    
with open(filepath_params_yes_0, 'rb') as f:
    params_yes_0 = pickle.load(f)
    
with open(filepath_params_yes_6, 'rb') as f:
    params_yes_6 = pickle.load(f)
    
# path_base = Path(
#     "/Users/akim/PycharmProjects/gsa_framework/dev/write_files/paper_gsa/"
# )
path_base = Path('/data/user/kim_a/')
write_dir = path_base / "setac_gsa"
path_model_dir = write_dir / "regression" / "{}_model".format(model_seed)

# LCA model
bw.projects.set_current("GSA for setac")
co = bw.Database("CH consumption 1.0")
demand_act = [act for act in co if "Food" in act["name"]][0]
demand = {demand_act: 1}
method = ("IPCC 2013", "climate change", "GTP 100a")
lca = bw.LCA(demand, method)
lca.lci()
lca.lcia()
print(lca.score)

num_params = len(lca.tech_params[lca.tech_params["uncertainty_type"] > 1])
print(num_params)

# Read params_yes from xgboost model
model, params_yes_xgboost, _ = get_xgboost_params(path_model_dir, params_yes_0)

lca_model = LCAModel(demand, method, write_dir)
amounts = lca.tech_params[lca.tech_params["uncertainty_type"]>1]["amount"]
validation = Validation(
    lca_model,
    iterations=iterations,
    seed=seed,
    default_x_rescaled=amounts, #TODO change??
    write_dir=write_dir,
)

for num_params_narrow in num_params_narrow_list:
    for scaling_factor in scaling_factor_list:
        
        print(num_params_narrow, scaling_factor)

        filename_Y_narrow = "validation.narrow.Y.{}.{}.div{}.{}.hdf5".format(
            iterations, num_params_narrow, scaling_factor, seed
        )
        filepath_Y_narrow = write_dir / "arrays" / filename_Y_narrow

        filename_X_unitcube = "validation.base.X.unitcube.{}.{}.{}.hdf5".format(
            iterations, num_params, seed
        )
        filepath_base_X_unitcube = write_dir / "arrays" / filename_X_unitcube

        filename_X_rescaled_narrow = "validation.narrow.X.rescaled.{}.{}.div{}.{}.hdf5".format(
            iterations, num_params_narrow, scaling_factor, seed
        )
        filepath_X_rescaled_narrow = write_dir / "arrays" / filename_X_rescaled_narrow
        
        params_yes = params_yes_xgboost[:num_params_narrow]
        # Construct tech_params
        dtype = lca.tech_params.dtype
        tech_params_narrow = np.array([a for a in lca.tech_params], dtype = dtype)
        for p in params_yes:
            tech_params_narrow[p]["scale"] = tech_params_narrow[p]["scale"] / scaling_factor

        lca_model_setac = LCAModelSetac(demand, method, tech_params_narrow)

        if filepath_Y_narrow.exists():
            narrow_Y = read_hdf5_array(filepath_Y_narrow).flatten()
        else:
            if filepath_X_rescaled_narrow.exists():
                X_rescaled_narrow = read_hdf5_array(filepath_X_rescaled_narrow)
            else:
                if filepath_base_X_unitcube.exists():
                    X_unitcube = read_hdf5_array(filepath_base_X_unitcube)
                else:
                    np.random.rand(seed)
                    X_unitcube = np.random.rand(iterations, num_params)
                    write_hdf5_array(X_unitcube, filepath_base_X_unitcube)
                X_rescaled_narrow = lca_model_setac.rescale(X_unitcube)
                write_hdf5_array(X_rescaled_narrow, filepath_X_rescaled_narrow)

            narrow_Y = lca_model_setac(X_rescaled_narrow)
            write_hdf5_array(narrow_Y, filepath_Y_narrow)

In [None]:
validation.plot_histogram_base_Y_influential_Y(
    narrow_Y, tag=num_params_narrow, save_fig=False, bin_min=2300, bin_max=3300
)