# Prepare BW

In [1]:
import brightway2 as bw
import os

## 1. Create bw project and set it to current 

In [2]:
project = 'Geothermal'  
bw.projects.set_current(project)

In [3]:
bw.databases

Databases dictionary with 3 object(s):
	biosphere3
	ecoinvent 3.6 cutoff
	geothermal energy

## 2. Import biosphere and ecoinvent

In [4]:
bw.bw2setup()

Biosphere database already present!!! No setup is needed


In [5]:
ei_name = "ecoinvent 3.6 cutoff"    
# ei_path = "/psi/home/kim_a/LCA files/ecoinvent 3.5 cutoff/datasets"
ei_path = "/Users/akim/Documents/LCA files/ecoinvent 3.6 cutoff/datasets"
if ei_name in bw.databases:
    print(ei_name + " database already present!!! No import is needed")
else:
    ei = bw.SingleOutputEcospold2Importer(ei_path, ei_name)
    ei.apply_strategies()
    ei.statistics()
    ei.write_database()

ecoinvent 3.6 cutoff database already present!!! No import is needed


## 3. Import `geothermal energy` database

In [6]:
bw.databases

Databases dictionary with 3 object(s):
	biosphere3
	ecoinvent 3.6 cutoff
	geothermal energy

In [7]:
%run Import_and_Replace.py

Database already exists
Do you want to delete it and reimport? Y/N? N
Skipping import


# Start DASK Client

In [10]:
from dask.distributed import Client

In [11]:
option_comp = 'local'

In [12]:
if option_comp == "merlin":

    from dask_jobqueue import SLURMCluster
    
    cluster = SLURMCluster(cores     = 8, 
                           memory    ='4GB', 
                           walltime  = '10:00:00',
                           interface ='ib0',
                           local_directory = '/data/user/kim_a',
                           log_directory   = '/data/user/kim_a',
                          ) 
    
elif option_comp == "local":
    
    from dask.distributed import LocalCluster
    
    cluster = LocalCluster()

In [13]:
client = Client(cluster)

In [14]:
n_workers = 10
cluster.scale(n_workers)

In [42]:
client

0,1
Client  Scheduler: tcp://127.0.0.1:59421  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 10  Cores: 30  Memory: 42.95 GB


In [17]:
# client.close()
# cluster.close()

# Project setup

In [43]:
import numpy as np
import brightway2 as bw
from copy import copy

from utils.gsa_lca_dask import *
from setup_files_gsa import *

## --> TODO choose option: EGE or CGE

In [66]:
option = 'cge'

## --> TODO choose number of Monte Carlo runs for one total index

In [67]:
N = 500

# Create long task for each worker

In [68]:
from SALib.sample import saltelli
from SALib.analyze import sobol

In [69]:
def model_per_X_chunk(X_chunk, gsa_in_lca, method_matrices):
    scores = []
    i = 0
    for sample in X_chunk:
        score = gsa_in_lca.model(sample, method_matrices)
        scores.append(score)
        i += 1
    return np.array(scores)

In [70]:
def task_per_worker(project, N, option, n_workers, i_chunk, path_files, diff_distr):

    # 1. setup geothermal project
    demand, gt_model, parameters = setup_gt_project(project, option, diff_distr=diff_distr)
    methods = get_ILCD_methods(CC_only=False, units=False)
    
    # 2. generate characterization matrices for all methods
    lca = bw.LCA(demand, methods[0])
    lca.lci(factorize=True)
    lca.lcia()
    lca.build_demand_array()
    method_matrices = gen_cf_matrices(lca, methods)

    # 3. gsa in lca model
    gsa_in_lca = GSAinLCA(lca, parameters, gt_model, project=project)

    # 4. setup GSA project in the SALib format
    num_vars = len(gsa_in_lca.parameters_array) \
             + len(gsa_in_lca.uncertain_exchanges_dict['tech_params_where']) \
             + len(gsa_in_lca.uncertain_exchanges_dict['bio_params_where'])
    problem, calc_second_order = setup_gsa(num_vars)

    # 5. generate sobol samples, choose correct chunk for the current worker based on index i_chunk
    X = saltelli.sample(problem, N, calc_second_order=calc_second_order)

    # 6. Extract part of the sample for the current worker
    chunk_size = X.shape[0]//n_workers
    start = i_chunk*chunk_size
    if i_chunk != n_workers-1:
        end = (i_chunk+1)*chunk_size
    else:
        end = X.shape[0] 
    X_chunk = X[start:end, :]
    del X

    # 6. compute scores for all methods for X_chunk  
    scores_for_methods = model_per_X_chunk(X_chunk, gsa_in_lca, method_matrices)
    
    # 7. Save results
    filepath = os.path.join(path_files, 'scores_' + str(start) + '_' + str(end-1) + '.pkl')
    with open(filepath, "wb") as fp:   #Pickling
        pickle.dump(scores_for_methods, fp)

    return scores_for_methods

In [72]:
# Path for saving results
path = "generated_files/write_files"
path_files = os.path.join(path, option + '_N' + str(N))
if not os.path.exists(path_files):
    os.makedirs(path_files)

In [71]:
# %%time
# i_chunk = 0
# scores_for_methods = task_per_worker(project, N, option, n_workers, i_chunk, path_files, diff_distr=diff_distr)

# Compute with dask

In [73]:
import dask

In [74]:
task_per_worker = dask.delayed(task_per_worker)

In [75]:
model_evals = []
for i in range(n_workers):
    model_eval = task_per_worker(project, N, option, n_workers, i, path_files)
    model_evals.append(model_eval)

In [76]:
len(model_evals)

10

In [77]:
%%time
Y_intermediate = dask.compute(model_evals)

CPU times: user 13min 18s, sys: 2min 4s, total: 15min 23s
Wall time: 3h 4min 14s


# Postprocessing of model outputs Y and saving

In [78]:
Y_intermediate = np.array(Y_intermediate).squeeze()
Y_all_methods = np.vstack(Y_intermediate)

In [79]:
Y_all_methods.shape

(9500, 16)

In [80]:
filepath = os.path.join(path_files, 'all_scores.pkl')
with open(filepath, 'wb') as f:
    pickle.dump(Y_all_methods, f)