# Model runs

In [None]:
from dask.distributed import Client, LocalCluster
from dask_jobqueue import SLURMCluster
import os
import dask

In [None]:
which_pc = "merlin_paper_gsa"
if 'merlin' in which_pc:
    path_dask_logs = '/data/user/kim_a/dask_logs'
    if not os.path.exists(path_dask_logs):
        os.makedirs(path_dask_logs)
    cluster = SLURMCluster(cores     = 4,
                           processes = 4,
                           memory    ="80GB", 
                           walltime  = '20:00:00',
                           interface ='ib0',
                           local_directory = path_dask_logs,
                           log_directory   = path_dask_logs,
                           queue="daily",
                           ) 
elif 'local' in which_pc:
    cluster = LocalCluster(memory_limit='7GB') 

In [None]:
client = Client(cluster)

In [None]:
n_workers = 2
cluster.scale(n_workers)

In [None]:
client

In [None]:
# client.close()
# cluster.close() 

# GSA

In [None]:
from gsa_framework.lca import LCAModel
from gsa_framework.methods.correlations import CorrelationCoefficients
from gsa_framework.methods.saltelli_sobol import SaltelliSobol
from gsa_framework.methods.delta_moment import DeltaMoment
from gsa_framework.methods.gradient_boosting import GradientBoosting
from pathlib import Path
import brightway2 as bw
import time
import numpy as np
from gsa_framework.utils import read_hdf5_array, read_pickle, write_hdf5_array, write_pickle
import h5py

In [None]:
def compute_scores_per_worker(option, num_params, iterations, i_worker, n_workers):
    if option == "random":
        gsa = setup_corr(num_params, iterations)
    elif option == "sobol":
        gsa = setup_sobol(num_params, iterations)
    elif option == 'latin':
        gsa = setup_latin(num_params, iterations)
    gsa.dirpath_Y.mkdir(parents=True, exist_ok=True)
    filepath_X_chunk = gsa.dirpath_Y / "X.unitcube.{}.{}.pickle".format(i_worker, n_workers)
    X_chunk_unitcube = read_pickle(filepath_X_chunk)
    X_chunk_rescaled = gsa.model.rescale(X_chunk_unitcube)
    scores = gsa.model(X_chunk_rescaled)
    Y_filename = "{}.{}.pickle".format(i_worker, n_workers)
    filepath = gsa.dirpath_Y / Y_filename
    write_pickle(scores, filepath)
    return scores

In [None]:
def setup_lca_model(num_params):
    path_base = Path('/data/user/kim_a/paper_gsa/gsa_framework_files')
    # LCA model
    bw.projects.set_current("GSA for paper")
    co = bw.Database("CH consumption 1.0")
    demand_act = [act for act in co if "Food and non-alcoholic beverages sector" in act['name']][0]
    demand = {demand_act: 1}
    method = ("IPCC 2013", "climate change", "GTP 100a")
    # Define some variables
    write_dir = path_base / "lca_model_food_{}".format(num_params)
    model = LCAModel(demand, method, write_dir, num_params=num_params)
    gsa_seed = 3403
    return model, write_dir, gsa_seed

In [None]:
def setup_corr(num_params, iterations):
    model, write_dir, gsa_seed = setup_lca_model(num_params)
    # Setup GSA
    gsa = CorrelationCoefficients(
        iterations=iterations,
        model=model,
        write_dir=write_dir,
        seed=gsa_seed,
    )
    return gsa

In [None]:
def setup_sobol(num_params, iterations):
    model, write_dir, gsa_seed = setup_lca_model(num_params)
    gsa = SaltelliSobol(iterations=iterations, model=model, write_dir=write_dir)
    return gsa

In [None]:
def setup_latin(num_params, iterations):
    model, write_dir, gsa_seed = setup_lca_model(num_params)
    num_resamples = 1
    gsa = DeltaMoment(
        iterations=iterations,
        model=model,
        write_dir=write_dir,
        num_resamples=num_resamples,
        seed=gsa_seed,
    )
    return gsa

In [None]:
def setup_xgboost(num_params, iterations):
    model, write_dir, gsa_seed = setup_lca_model(num_params)
    num_boost_round = 400
    tuning_parameters = {
         'max_depth': 6,  
         'eta': 0.1,
         'objective': 'reg:squarederror',
         'n_jobs': -1,
         'refresh_leaf': True,
         'subsample': 0.6,
         'min_child_weight': 0.5,
    }
    gsa = GradientBoosting(
        iterations=iterations,
        model=model,
        write_dir=write_dir,
        seed=gsa_seed,
        tuning_parameters=tuning_parameters,
        num_boost_round=num_boost_round,
        xgb_model=None,
    )
    return gsa

In [None]:
num_params = 10000
iter_corr = 4*num_params
iter_sobol = 40*num_params
iter_latin = 4*num_params
iter_xgboost = iter_corr

n_workers_corr = 20
n_workers_sobol = 39
n_workers_latin = 20

options = {
    'random': {
        "iterations": iter_corr,
        "n_workers": n_workers_corr,
    }, 
    'sobol': {
        "iterations": iter_sobol,
        "n_workers": n_workers_sobol,
    }, 
    'latin': {
        "iterations": iter_latin,
        "n_workers": n_workers_latin,
    }
}
gsa_corr = setup_corr(num_params, iter_corr)
gsa_sobol = setup_sobol(num_params, iter_sobol)
gsa_latin = setup_latin(num_params, iter_latin)
gsa_xgboost = setup_xgboost(num_params, iter_xgboost)

## 1. Correlation coefficients and XGBoost

In [None]:

X = gsa_corr.generate_unitcube_samples_based_on_method(gsa_corr.iterations)
gsa_corr.create_model_output_dir()
print(X.shape, gsa_corr.dirpath_Y)
iter_corr_chunk = gsa_corr.iterations//n_workers_corr
for i in range(n_workers_corr):
    start = iter_corr_chunk*i
    end = iter_corr_chunk*(i+1)
    print(i,start,end)
    X_chunk = X[start:end,:]
    filepath_X_chunk = gsa_corr.dirpath_Y / "X.unitcube.{}.{}.pickle".format(i, n_workers_corr)
    write_pickle(X_chunk, filepath_X_chunk)
    
del X

## 2. Sobol iterations

In [None]:
X = gsa_sobol.generate_unitcube_samples_based_on_method(gsa_sobol.iterations)
gsa_sobol.create_model_output_dir()
print(X.shape, gsa_sobol.dirpath_Y)
iter_sobol_chunk = gsa_sobol.iterations//(n_workers_sobol)
for i in range(n_workers_sobol):
    start = iter_sobol_chunk*i
    end = min(iter_sobol_chunk*(i+1), gsa_sobol.iterations)
    print(i,start,end)
    X_chunk = X[start:end,:]
    filepath_X_chunk = gsa_sobol.dirpath_Y / "X.unitcube.{}.{}.pickle".format(i, n_workers_sobol)
    write_pickle(X_chunk, filepath_X_chunk)
    
del X

## 3. Latin sampling

In [None]:
X = gsa_latin.generate_unitcube_samples_based_on_method(gsa_latin.iterations)
gsa_latin.create_model_output_dir()
print(X.shape, gsa_latin.dirpath_Y)
iter_latin_chunk = gsa_latin.iterations//n_workers_latin
for i in range(n_workers_latin):
    start = iter_latin_chunk*i
    end = iter_latin_chunk*(i+1)
    print(i,start,end)
    X_chunk = X[start:end,:]
    filepath_X_chunk = gsa_latin.dirpath_Y / "X.unitcube.{}.{}.pickle".format(i, n_workers_latin)
    write_pickle(X_chunk, filepath_X_chunk)
    
del X

# Compute model outputs for all

In [None]:
task_per_worker = dask.delayed(compute_scores_per_worker)
model_evals = []
for option,dict_ in options.items():
    iterations = dict_["iterations"]
    n_workers = dict_["n_workers"]
    for i in range(n_workers):
        print(option, num_params, iterations, i, n_workers)
        model_eval = task_per_worker(option, num_params, iterations, i, n_workers)
        model_evals.append(model_eval)

In [None]:
# %%time
# dask.compute(model_evals)

In [None]:
def generate_model_output_from_chunks(gsa, n_workers):
    Y = np.zeros(
        shape=(0,)
    )
    for i in range(n_workers):
        filepath_Y_chunk = (
            gsa.dirpath_Y
            / "{}.{}.pickle".format(i, n_workers)
        )
        Y_chunk = read_pickle(filepath_Y_chunk)
        Y = np.hstack(
            [Y, Y_chunk]
        )  # TODO change to vstack for multidimensional output
    write_hdf5_array(Y,gsa.filepath_Y)
    return Y

In [None]:
Ycorr = generate_model_output_from_chunks(gsa_corr, n_workers_corr)
Ysobol = generate_model_output_from_chunks(gsa_sobol, n_workers_sobol)
Ylatin = generate_model_output_from_chunks(gsa_latin, n_workers_latin)

# Run GSA

In [None]:
worker_latin = dask.delayed(gsa_latin.perform_gsa)
model_eval_latin = worker_latin()
worker_xgboost = dask.delayed(gsa_xgboost.perform_gsa)
model_eval_xgboost = worker_xgboost()
model_evals = [model_eval_latin, model_eval_xgboost]

In [None]:
%%time
dask.compute(model_evals)

In [None]:
inds = gsa_xgboost.S
gsa_xgboost.model.uncertain_tech_params[inds]

# Construct LCA model

In [None]:
from gsa_framework.lca import LCAModel
from gsa_framework.methods.correlations import CorrelationCoefficients
from gsa_framework.methods.extended_FAST import eFAST
from gsa_framework.methods.saltelli_sobol import SaltelliSobol
from gsa_framework.methods.gradient_boosting import GradientBoosting
from gsa_framework.validation import Validation
from pathlib import Path
import brightway2 as bw
import time
import numpy as np
from gsa_framework.plotting import histogram_Y1_Y2
from gsa_framework.utils import read_hdf5_array

if __name__ == "__main__":

#     path_base = Path(
#         "/Users/akim/PycharmProjects/gsa_framework/dev/write_files/paper_gsa/"
#     )
    path_base = Path('/data/user/kim_a/paper_gsa/gsa_framework_files')

    # LCA model
    bw.projects.set_current("GSA for paper")
    co = bw.Database("CH consumption 1.0")
    demand_act = [act for act in co if "Food and non-alcoholic beverages sector" in act['name']][0]
    print(demand_act)
    demand = {demand_act: 1}
    method = ("IPCC 2013", "climate change", "GTP 100a")

    # Define some variables
    num_params = 162299
    iterations_validation = 2000
    write_dir = path_base / "lca_model_food_{}".format(num_params)
    model = LCAModel(demand, method, write_dir) # TODO add num_params later
    gsa_seed = 3403
    validation_seed = 7043
    fig_format = ["html", "pickle"]

    # Make sure  that the chosen num_params in LCA are appropriate
    val = Validation(
        model=model,
        iterations=iterations_validation,
        seed=4444,
        default_x_rescaled=model.default_uncertain_amounts,
        write_dir=write_dir,
    )
    num_params_paper = 10000
    tag = "numParams{}".format(num_params_paper)
    scores_dict = model.get_lsa_scores_pickle(model.write_dir / "LSA_scores")
    uncertain_tech_params_where_subset, _ = model.get_nonzero_params_from_num_params(scores_dict, num_params_paper)
    parameter_choice = []
    for u in uncertain_tech_params_where_subset:
        where_temp = np.where(model.uncertain_tech_params_where == u)[0]
        assert len(where_temp) == 1
        parameter_choice.append(where_temp[0])
    parameter_choice.sort()

In [None]:
Y_subset = val.get_influential_Y_from_parameter_choice(parameter_choice=parameter_choice, tag=tag)
val.plot_histogram_Y_all_Y_inf(Y_subset, num_influential=num_params_paper)