In [1]:
import gsa_framework as gf
import brightway2 as bw
from pathlib import Path
import pickle

import dask

Using environment variable BRIGHTWAY2_DIR for data directory:
/data/user/kim_a/Brightway3


In [2]:
# path_base = Path('/Users/akim/PycharmProjects/gsa_framework/dev/write_files/paper_gsa/')
path_base = Path('/data/user/kim_a/paper_gsa/gsa_framework_files')

In [3]:
# 1. LCA model
bw.projects.set_current('GSA for paper')
co = bw.Database('CH consumption 1.0')
act = co.search('average consumption')[0]
demand = {act: 1}
method = ('IPCC 2013', 'climate change', 'GTP 100a')

write_dir_lca = path_base / 'lca_model'
model_lca = gf.LCAModel(demand, method, write_dir_lca)
const_lca = 2
iterations_lca = model_lca.num_params * const_lca

# 2. Moon model
write_dir_moon = path_base / 'moon_model'
model_moon = gf.Moon()
const_moon = 2
iterations_moon = model_moon.num_params * const_moon

# 3. Morris model
write_dir_morris = path_base / 'morris_model'
model_morris = gf.Morris(num_params=120000, num_influential=12000)
const_morris = 2
iterations_morris = model_morris.num_params * const_morris

# 3. Sobol-Levitan model
write_dir_SL = path_base / 'sobol_levitan_model'
model_SL = gf.SobolLevitan(num_params=5000)
const_SL = 2
iterations_SL = model_SL.num_params * const_SL

# 4. Problem
models_dict = {
    'lca': dict(
        model= model_lca,
        write_dir = write_dir_lca,
        iterations = iterations_lca,
        const = const_lca,
    ),
    'moon': dict(
        model = model_moon,
        write_dir = write_dir_moon,
        iterations = iterations_moon,
        const = const_moon,
    ),
    'morris': dict(
        model = model_morris,
        write_dir = write_dir_morris,
        iterations = iterations_morris,
        const = const_morris,
    ),
    'sobol_levitan': dict(
        model = model_SL,
        write_dir = write_dir_SL,
        iterations = iterations_SL,
        const = const_SL,
    )
}

# DASK delayed

In [4]:
gsa_methods = [
    'correlation_coefficients',
#     'sobol_indices',
#     'eFAST_indices',
#     'xgboost',
    # 'dissimilarity_measure',
]

model_names = [
    'lca',
#     'morris',
#     'sobol_levitan',
]

In [5]:
%%time
problem_per_worker = dask.delayed(gf.Problem)
# problem_per_worker = gf.Problem

model_evals = []
for model_name in model_names:
    print('\n>>> ' + model_name)
    model = models_dict[model_name]['model']
    write_dir = models_dict[model_name]['write_dir']
    if not write_dir.exists():
        write_dir.mkdir(parents=True, exist_ok=True)
    for gsa_method in gsa_methods:
        if gsa_method == 'eFAST_indices':
            iterations = 130
        elif gsa_method == 'correlation_coefficients':
            iterations = None
        else:
            iterations = models_dict[model_name]['iterations']
        print('--- ' + gsa_method + ' -> ' + str(model.num_params) + ' parameters, ' + str(iterations) + ' iterations')
        problem = problem_per_worker(
            sampler = 'random',
            model = model,
            interpreter = gsa_method,
            write_dir = write_dir,
            iterations = iterations,
            seed = 34534,
            X = None,
        )
        model_evals.append(problem)
#         # Plotting
#         filename_sa_results = problem.gsa_dict['sa_results']
#         with open(filename_sa_results, 'rb') as f:
#             sa_results = pickle.load(f)
#         for sa_index_name, sa_index_values in sa_results.items():
#             sa_indices = {sa_index_name: sa_index_values}
#             try:
#                 problem.plot_sa_results(sa_indices, model.influential_params)
#             except:
#                 pass


>>> lca
--- correlation_coefficients -> 114688 parameters, None iterations
CPU times: user 2.6 ms, sys: 0 ns, total: 2.6 ms
Wall time: 1.52 ms


# DASK workers

In [6]:
from dask.distributed import Client, LocalCluster
from dask_jobqueue import SLURMCluster
import os

In [7]:
which_pc = 'merlin'

if 'merlin' in which_pc:
    path_dask_logs = Path('/data/user/kim_a/dask_logs')
    if not path_dask_logs.exists():
        path_dask_logs.mkdir(parents=True, exist_ok=True)
    cluster = SLURMCluster(cores     = 2, 
                           memory    ='28GB', 
                           walltime  = '00:50:00',
                           interface ='ib0',
                           local_directory = path_dask_logs,
                           log_directory   = path_dask_logs,
                           queue = 'hourly',
                           ) 
elif 'local' in which_pc:
    cluster = LocalCluster(memory_limit='7GB') 

In [8]:
client = Client(cluster)

In [9]:
n_workers = len(gsa_methods) * len(model_names)
cluster.scale(n_workers)

In [20]:
client

0,1
Client  Scheduler: tcp://192.168.196.62:39569  Dashboard: http://192.168.196.62:8787/status,Cluster  Workers: 2  Cores: 2  Memory: 28.00 GB


In [None]:
# client.close()
# cluster.close()

# Dask compute

In [None]:
%%time
dask.compute(model_evals)