## 1. Create bw project and set it to current 

In [1]:
import bw2data as bd
project = 'Geothermal'  
bd.projects.set_current(project)
bd.databases

Databases dictionary with 3 object(s):
	biosphere3
	ecoinvent 3.6 cutoff
	geothermal energy

# Start DASK Client

In [2]:
from dask.distributed import Client

In [3]:
option_comp = 'local'

In [None]:
if option_comp == "merlin":

    from dask_jobqueue import SLURMCluster
    
    cluster = SLURMCluster(cores     = 4, 
                           memory    ='4GB', 
                           walltime  = '10:00:00',
                           interface ='ib0',
                           local_directory = '/data/user/kim_a',
                           log_directory   = '/data/user/kim_a',
                          ) 
    
elif option_comp == "local":
    
    from dask.distributed import LocalCluster
    
    cluster = LocalCluster()

In [None]:
client = Client(cluster)

In [None]:
n_workers = 25
cluster.scale(n_workers)

In [None]:
client

In [None]:
client.close()
cluster.close()

# Project setup

In [4]:
import numpy as np
from copy import copy

# from utils.gsa_lca_dask import *
# from setup_files_gsa import *

## --> TODO choose option: EGE or CGE

In [5]:
option = 'cge'
diff_distr = True # set to true when checking for robustness of GSA results to distribution choice

## --> TODO choose number of Monte Carlo runs for one total index

In [6]:
N = 500

# Create long task for each worker

In [8]:
from SALib.sample import saltelli
from SALib.analyze import sobol

In [9]:
def model_per_X_chunk(X_chunk, gsa_in_lca, method_matrices):
    scores = []
    i = 0
    for sample in X_chunk:
        score = gsa_in_lca.model(sample, method_matrices)
        scores.append(score)
        i += 1
    return np.array(scores)

In [10]:
def task_per_worker(project, N, option, n_workers, i_chunk, path_files, diff_distr):

    # 1. setup geothermal project
    demand, gt_model, parameters = setup_gt_project(project, option, diff_distr=diff_distr)
    methods = get_ILCD_methods(CC_only=False, units=False)
    
    # 2. generate characterization matrices for all methods
    lca = bw.LCA(demand, methods[0])
    lca.lci(factorize=True)
    lca.lcia()
    lca.build_demand_array()
    method_matrices = gen_cf_matrices(lca, methods)

    # 3. gsa in lca model
    gsa_in_lca = GSAinLCA(lca, parameters, gt_model, project=project)

    # 4. setup GSA project in the SALib format
    num_vars = len(gsa_in_lca.parameters_array) \
             + len(gsa_in_lca.uncertain_exchanges_dict['tech_params_where']) \
             + len(gsa_in_lca.uncertain_exchanges_dict['bio_params_where'])
    problem, calc_second_order = setup_gsa(num_vars)

    # 5. generate sobol samples, choose correct chunk for the current worker based on index i_chunk
    X = saltelli.sample(problem, N, calc_second_order=calc_second_order)

    # 6. Extract part of the sample for the current worker
    chunk_size = X.shape[0]//n_workers
    start = i_chunk*chunk_size
    if i_chunk != n_workers-1:
        end = (i_chunk+1)*chunk_size
    else:
        end = X.shape[0] 
    X_chunk = X[start:end, :]
    del X

    # 6. compute scores for all methods for X_chunk  
    scores_for_methods = model_per_X_chunk(X_chunk, gsa_in_lca, method_matrices)
    
    # 7. Save results
    filepath = os.path.join(path_files, 'scores_' + str(start) + '_' + str(end-1) + '.pkl')
    with open(filepath, "wb") as fp:   #Pickling
        pickle.dump(scores_for_methods, fp)

    return scores_for_methods

In [None]:
# Path for saving results
write_dir = Path("write_files")
write_dir
if diff_distr == False:
    path_files = os.path.join(path, option + '_N' + str(N))
elif diff_distr == True:
    path_files = os.path.join(path, option + '_N' + str(N) + '_robust')
if not os.path.exists(path_files):
    os.makedirs(path_files)

In [None]:
path_files

In [None]:
# %%time
# i_chunk = 0
# N = 1
# n_workers = 1
# scores_for_methods = task_per_worker(project, N, option, n_workers, i_chunk, path_files, diff_distr=diff_distr)

# Compute with dask

In [None]:
import dask

In [None]:
task_per_worker = dask.delayed(task_per_worker)

In [None]:
model_evals = []
for i in range(n_workers):
    model_eval = task_per_worker(project, N, option, n_workers, i, path_files, diff_distr=diff_distr)
    model_evals.append(model_eval)

In [None]:
len(model_evals)

In [None]:
%%time
Y_intermediate = dask.compute(model_evals)

# Postprocessing of model outputs Y and saving

In [None]:
Y_intermediate = np.array(Y_intermediate).squeeze()
Y_all_methods = np.vstack(Y_intermediate)

In [None]:
Y_all_methods.shape

In [None]:
filepath = os.path.join(path_files, 'all_scores.pkl')
with open(filepath, 'wb') as f:
    pickle.dump(Y_all_methods, f)

In [None]:
Y_intermediate