In [None]:
# Stability for Saltelli
from gsa_framework.lca import LCAModel
from gsa_framework.methods.saltelli_sobol import SaltelliSobol
from gsa_framework.convergence import Convergence
from pathlib import Path
import brightway2 as bw
import time
import numpy as np
from gsa_framework.utils import write_pickle, read_hdf5_array
from SALib.analyze.sobol import analyze

if __name__ == "__main__":
    path_base = Path(
        "/Users/akim/PycharmProjects/gsa_framework/dev/write_files/paper_gsa/"
    )
    # path_base = Path('/data/user/kim_a/paper_gsa/gsa_framework_files')

    # LCA model
    bw.projects.set_current("GSA for paper")
    co = bw.Database("CH consumption 1.0")
    act = [act for act in co if "ch hh average consumption" in act['name']][0]
    demand = {act: 1}
    method = ("IPCC 2013", "climate change", "GTP 100a")

    # Define some variables
    num_params = 10000
    num_influential = num_params // 100
    iterations_validation = 2000
    write_dir = path_base / "lca_model_consumption_{}".format(num_params)
    model = LCAModel(demand, method, write_dir, num_params=num_params)
    gsa_seed = 3403
    validation_seed = 7043
    fig_format = ["html", "pickle"]

    iterations = 100 * num_params

    gsa = SaltelliSobol(iterations=iterations, model=model, write_dir=write_dir)

    conv = Convergence(
        gsa.filepath_Y,
        gsa.num_params,
        gsa.generate_gsa_indices,
        gsa.gsa_label,
        write_dir,
        num_steps=100,
    )
    problem = {
        "num_vars": num_params,
    }
    S_dict = {}
    for i, step in enumerate(conv.iterations_for_convergence):
        print(step)
        Y = read_hdf5_array(gsa.filepath_Y).flatten()[:step]
        S_dict[step] = analyze(problem, Y, calc_second_order=False, num_resamples=100)
    filepath = write_dir / "arrays" / "stability.S.saltelliGsa.{}step{}".format(
        gsa.iterations,conv.iterations_step)
    write_pickle(S_dict, filepath)

In [14]:
from dask.distributed import Client, LocalCluster
from dask_jobqueue import SLURMCluster
import os

In [31]:
which_pc = "merlin_paper_gsa"
if 'merlin' in which_pc:
    path_dask_logs = '/data/user/kim_a/dask_logs'
    if not os.path.exists(path_dask_logs):
        os.makedirs(path_dask_logs)
    cluster = SLURMCluster(cores=8,
                           memory="80GB", 
                           walltime  = '10:00:00',
                           interface ='ib0',
                           local_directory = path_dask_logs,
                           log_directory   = path_dask_logs,
                           queue="daily",
                           ) 
elif 'local' in which_pc:
    cluster = LocalCluster(memory_limit='7GB') 

In [32]:
client = Client(cluster)

In [33]:
n_workers = 80
cluster.scale(n_workers)

In [45]:
client

0,1
Client  Scheduler: tcp://192.168.196.21:46363  Dashboard: http://192.168.196.21:8787/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


In [44]:
# client.close()
# cluster.close()

In [46]:
from gsa_framework.lca import LCAModel
from gsa_framework.methods.correlations import CorrelationCoefficients
from gsa_framework.sensitivity_analysis.correlations import corrcoef_parallel
from gsa_framework.convergence import Convergence
from pathlib import Path
import brightway2 as bw
import time
import numpy as np
from gsa_framework.utils import write_pickle
import dask

In [47]:
def compute_per_worker(iterations_current, seed):
    path_base = Path('/data/user/kim_a/paper_gsa/gsa_framework_files')
    num_params = 10000
    write_dir = path_base / "lca_model_{}".format(num_params)
    stability_dir = write_dir / "stability_intermediate_correlationsGsa"
    
    filepath_S = stability_dir / "step{}.seed{}.pickle".format(iterations_current, seed)
    if not filepath_S.exists():
        np.random.rand(seed)
        X = np.random.rand(iterations_current, num_params)
        Xr = model.rescale(X)
        del X
        y = model(Xr)
        S_dict = corrcoef_parallel(y, Xr)
        write_pickle(S_dict, filepath_S)
        return S_dict
    else:
        print("{} already exists".format(filepath_S.name))

In [None]:
if __name__ == "__main__":
#     path_base = Path(
#         "/Users/akim/PycharmProjects/gsa_framework/dev/write_files/paper_gsa/"
#     )
    path_base = Path('/data/user/kim_a/paper_gsa/gsa_framework_files')

    # LCA model
    bw.projects.set_current("GSA for paper")
    co = bw.Database("CH consumption 1.0")
    act = [act for act in co if "Food" in act["name"]][0]
    demand = {act: 1}
    method = ("IPCC 2013", "climate change", "GTP 100a")

    # Define some variables
    num_params = 10000
    num_influential = num_params // 100
    iterations_validation = 500
    write_dir = path_base / "lca_model_{}".format(num_params)
    model = LCAModel(demand, method, write_dir, num_params=num_params)
    gsa_seed = 3403
    validation_seed = 7043
    fig_format = ["html", "pickle"]

    iterations = 2 * num_params
    gsa = CorrelationCoefficients(
        iterations=iterations,
        model=model,
        write_dir=write_dir,
        seed=gsa_seed,
    )

    conv = Convergence(
        gsa.filepath_Y,
        gsa.num_params,
        gsa.generate_gsa_indices,
        gsa.gsa_label,
        write_dir,
        num_steps=100,
    )
    num_bootstrap = 10
    np.random.seed(gsa_seed)
    stability_seeds = np.random.randint(
        low=0,
        high=2147483647,
        size=(len(conv.iterations_for_convergence), num_bootstrap),
    )
    
    stability_dir = write_dir / "stability_intermediate_{}".format(gsa.gsa_label)
    stability_dir.mkdir(parents=True, exist_ok=True)
    
    istep = 4
    i=0
    for iterations_current in conv.iterations_for_convergence[::istep]:
        print(iterations_current)
        for seed in stability_seeds[i,:]:
            compute_per_worker(iterations_current, seed)
        i += istep
#     model_evals = []
#     model_evals_all = []
#     task_per_worker = dask.delayed(compute_per_worker)
#     i=0
#     for iterations_current in conv.iterations_for_convergence[::2]:
#         if i%16==0 and len(model_evals)>0:
#             print(len(model_evals))
#             model_evals_all.append(model_evals)
#             model_evals = []
#         for seed in stability_seeds[i,:]:
#             filepath_S = stability_dir / "step{}.seed{}.pickle".format(iterations_current, seed)
#             if not filepath_S.exists():
#                 model_eval = task_per_worker(iterations_current, seed)
#                 model_evals.append(model_eval)
#             else:
#                 print("{} already exists".format(filepath_S.name))
#         i += 2
        
    model_evals_all.append(model_evals)

10
814
1618
2422
3226
4030


In [None]:
# %%time
# for model_evals in model_evals_all:
#     print(len(model_evals))
#     dask.compute(model_evals)