In [1]:
def setup_lca_model(num_params):
    path_base = Path('/data/user/kim_a/')
    # LCA model
    bw.projects.set_current("GSA for oases")
    co = bw.Database("CH consumption 1.0")
    demand_act = [act for act in co if "ch hh average consumption" in act['name']][0]
    demand = {demand_act: 1}
    method = ("IPCC 2013", "climate change", "GTP 100a")
    # Define some variables
    write_dir = path_base / "oases_gsa"
    model = LCAModel(demand, method, write_dir, num_params=num_params)
    gsa_seed = 92374523
    return model, write_dir, gsa_seed

def setup_corr(num_params, iterations):
    model, write_dir, gsa_seed = setup_lca_model(num_params)
    # Setup GSA
    gsa = CorrelationCoefficients(
        iterations=iterations,
        model=model,
        write_dir=write_dir,
        seed=gsa_seed,
    )
    return gsa

def compute_scores_per_worker(option, num_params, iterations, i_worker, n_workers):
    if option == "random":
        gsa = setup_corr(num_params, iterations)
    elif option == "sobol":
        gsa = setup_sobol(num_params, iterations)
    elif option == 'latin':
        gsa = setup_latin(num_params, iterations)
    gsa.dirpath_Y.mkdir(parents=True, exist_ok=True)
    filepath_X_chunk = gsa.dirpath_Y / "X.unitcube.{}.{}.pickle".format(i_worker, n_workers)
    X_chunk_unitcube = read_pickle(filepath_X_chunk)
    X_chunk_rescaled = gsa.model.rescale(X_chunk_unitcube)
    scores = gsa.model(X_chunk_rescaled)
    Y_filename = "{}.{}.pickle".format(i_worker, n_workers)
    filepath = gsa.dirpath_Y / Y_filename
    write_pickle(scores, filepath)
    return scores

def generate_model_output_from_chunks(gsa, n_workers):
    Y = np.zeros(
        shape=(0,)
    )
    for i in range(n_workers):
        filepath_Y_chunk = (
            gsa.dirpath_Y
            / "{}.{}.pickle".format(i, n_workers)
        )
        Y_chunk = read_pickle(filepath_Y_chunk)
        Y = np.hstack(
            [Y, Y_chunk]
        )  # TODO change to vstack for multidimensional output
    write_hdf5_array(Y, gsa.filepath_Y)
    return Y

# 1. Remove inputs that result in low output variance, only use 35k inputs

In [23]:
from gsa_framework.lca import LCAModel
from gsa_framework.methods.correlations import CorrelationCoefficients
from gsa_framework.validation import Validation
from pathlib import Path
import brightway2 as bw
import time
import numpy as np
from gsa_framework.utils import read_hdf5_array, read_pickle, write_hdf5_array, write_pickle
import h5py

In [26]:
if __name__ == "__main__":

    path_base = Path('/data/user/kim_a/')

    # LCA model
    bw.projects.set_current("GSA for oases")
    co = bw.Database("CH consumption 1.0")
    demand_act = [act for act in co if "average consumption" in act['name']][0]
    print(demand_act)
    demand = {demand_act: 1}
    method = ("IPCC 2013", "climate change", "GTP 100a")

    # Define some variables
    num_params = 172056
    iterations_validation = 2000
    write_dir = path_base / "oases_gsa"
    model = LCAModel(demand, method, write_dir) # TODO add num_params later
    gsa_seed = 42095720
    validation_seed = 66666
    fig_format = ["html", "pickle"]

    # Make sure  that the chosen num_params in LCA are appropriate
    val = Validation(
        model=model,
        iterations=iterations_validation,
        seed=validation_seed,
        default_x_rescaled=model.default_uncertain_amounts,
        write_dir=write_dir,
    )
    num_params_gsa = 35000
    tag = "LocalSA"
    scores_dict = model.get_lsa_scores_pickle(model.write_dir / "LSA_scores")
    uncertain_tech_params_where_subset, _ = model.get_nonzero_params_from_num_params(scores_dict, num_params_gsa)
    parameter_choice = []
    for u in uncertain_tech_params_where_subset:
        where_temp = np.where(model.uncertain_tech_params_where == u)[0]
        assert len(where_temp) == 1
        parameter_choice.append(where_temp[0])
    parameter_choice.sort()
    Y_subset = val.get_influential_Y_from_parameter_choice(parameter_choice=parameter_choice, tag=tag)
    fig=val.plot_histogram_Y_all_Y_inf(Y_subset, num_influential=num_params_gsa)

'ch hh average consumption' (1 month of consumption, CH, None)
1045.7396304829626
validation.Y.35000inf.2000.66666.LocalSA.hdf5 already exists


In [32]:
with open("file.pickle", 'wb') as f:
    pickle.dump(fig,f)

# 2. Run GSA on the lca model with 35k inputs

## 2.1. DASK

In [None]:
from dask.distributed import Client, LocalCluster
from dask_jobqueue import SLURMCluster
import os

In [None]:
which_pc = "merlin_oases_gsa"
if 'merlin' in which_pc:
    path_dask_logs = '/data/user/kim_a/dask_logs'
    if not os.path.exists(path_dask_logs):
        os.makedirs(path_dask_logs)
    cluster = SLURMCluster(cores     = 8, 
                           memory    ='30GB', 
                           walltime  = '20:00:00',
                           interface ='ib0',
                           local_directory = path_dask_logs,
                           log_directory   = path_dask_logs,
                           queue="daily",
                           ) 
elif 'local' in which_pc:
    cluster = LocalCluster(memory_limit='7GB') 

In [None]:
client = Client(cluster)

In [None]:
n_workers = 80
cluster.scale(n_workers)

In [None]:
client

In [None]:
client.close()
cluster.close() 

## 2.2. Model outputs

In [None]:
from gsa_framework.lca import LCAModel
from gsa_framework.methods.correlations import CorrelationCoefficients
from gsa_framework.validation import Validation
from pathlib import Path
import brightway2 as bw
import time
import numpy as np
from gsa_framework.utils import read_hdf5_array, read_pickle, write_hdf5_array, write_pickle
import h5py
import dask

In [None]:
# Setup GSA
num_params = 35000
iterations = 2*num_params
gsa_corr = setup_corr(num_params, iterations)

In [None]:
# X = gsa_corr.generate_unitcube_samples_based_on_method(gsa_corr.iterations)
# gsa_corr.create_model_output_dir()
# print(X.shape, gsa_corr.dirpath_Y)
# iter_corr_chunk = gsa_corr.iterations//n_workers

# for i in range(n_workers):
#     start = iter_corr_chunk*i
#     end = iter_corr_chunk*(i+1)
#     print(i,start,end)
#     X_chunk = X[start:end,:]
#     filepath_X_chunk = gsa_corr.dirpath_Y / "X.unitcube.{}.{}.pickle".format(i, n_workers)
#     write_pickle(X_chunk, filepath_X_chunk)
    
# del X

In [None]:
option = "random"
model_evals = []
task_per_worker = dask.delayed(compute_scores_per_worker)
for i in range(n_workers):
    model_eval = task_per_worker(option, num_params, iterations, i, n_workers)
    model_evals.append(model_eval)

In [None]:
# %%time
# dask.compute(model_evals)

In [None]:
# Y = generate_model_output_from_chunks(gsa_corr, n_workers)

## 2.3. Perform GSA

In [None]:
%%time
S_dict = gsa_corr.perform_gsa()

# 3. Validate GSA results TODO wrt all 180k inputs

In [22]:
path_base = Path('/data/user/kim_a/')
# LCA model
bw.projects.set_current("GSA for oases")
co = bw.Database("CH consumption 1.0")
demand_act = [act for act in co if "average consumption" in act['name']][0]
print(demand_act)
demand = {demand_act: 1}
method = ("IPCC 2013", "climate change", "GTP 100a")

# Define some variables
num_params = 172051
iterations_validation = 2000
write_dir = path_base / "oases_gsa"
model = LCAModel(demand, method, write_dir) # TODO add num_params later
validation_seed = 66666
fig_format = ["html", "pickle"]

t0 = time.time()
# Make sure  that the chosen num_params in LCA are appropriate
val = Validation(
    model=model,
    iterations=iterations_validation,
    seed=validation_seed,
    default_x_rescaled=model.default_uncertain_amounts,
    write_dir=write_dir,
)

spearman = S_dict["spearman"]
num_influential = 60
tag = "SpearmanIndex"

parameter_choice_inf = np.argsort(abs(spearman))[::-1][:num_influential]
parameter_choice_inf.sort()
params_where_inf = gsa_corr.model.uncertain_tech_params_where[parameter_choice_inf]
params_where_all = model.uncertain_tech_params_where

parameter_choice = np.zeros(num_influential,dtype=int)
parameter_choice[:] = np.nan
for i,p in enumerate(params_where_inf):
    parameter_choice[i] = np.where(params_where_all==p)[0]

influential_Y = val.get_influential_Y_from_parameter_choice(parameter_choice=parameter_choice, tag=tag)
t1 = time.time()
print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
fig_format = ['html', 'pickle']
val.plot_histogram_Y_all_Y_inf(
    influential_Y, num_influential, tag=tag, fig_format=fig_format
)
val.plot_correlation_Y_all_Y_inf(
    influential_Y, num_influential, tag=tag, fig_format=fig_format
)

'ch hh average consumption' (1 month of consumption, CH, None)
1045.7396304829626
validation.Y.60inf.2000.66666.SpearmanIndex.hdf5 already exists
Total validation time  ->    9.775 s 



In [53]:
diff = -1045.7396304829626+1709.0750836479817
fig = correlation_Y1_Y2(val.Y_all+diff,influential_Y+diff)

In [54]:
write_pickle(fig, "validation_60inf_correlation_spearman.pickle")

# 4. Print exchanges

In [2]:
from gsa_framework.lca import LCAModel
from gsa_framework.methods.correlations import CorrelationCoefficients
from gsa_framework.validation import Validation
from pathlib import Path
import brightway2 as bw
import time
import numpy as np
from gsa_framework.utils import read_hdf5_array, read_pickle, write_hdf5_array, write_pickle
import h5py
import pickle

import plotly.graph_objects as go

Using environment variable BRIGHTWAY2_DIR for data directory:
/data/user/kim_a/Brightway3


In [3]:
num_params = 35000
iterations = 2*num_params
gsa_corr = setup_corr(num_params, iterations)
S_dict = gsa_corr.generate_gsa_indices()
spearman = S_dict["spearman"]

num_influential = 60
imp_inds = np.argsort(spearman)[::-1][:num_influential]
imp_tech = gsa_corr.model.uncertain_tech_params[imp_inds]

1045.7396304829626


In [4]:
%%time
row_acts, col_acts = [],[]
for p in imp_tech[:num_influential]:
    row = p['row']
    col = p['col']
    row_acts.append(bw.get_activity(gsa_corr.model.lca.reverse_dict()[0][row]))
    col_acts.append(bw.get_activity(gsa_corr.model.lca.reverse_dict()[0][col]))

CPU times: user 3.29 s, sys: 165 ms, total: 3.45 s
Wall time: 384 ms


In [5]:
# filename = gsa_corr.write_dir / "arrays" / 'row_acts.pickle'
# with open(filename, 'wb') as f:
#      pickle.dump(row_acts, f)
    
# filename = gsa_corr.write_dir / "arrays" / 'col_acts.pickle'
# with open(filename, 'wb') as f:
#      pickle.dump(col_acts, f)
        
# filename = gsa_corr.write_dir / "arrays" /  'imp_tech.pickle'
# with open(filename, 'wb') as f:
#      pickle.dump(imp_tech, f)

In [6]:
# filename = gsa_corr.write_dir / "arrays" /  'row_acts.pickle'
# with open(filename, 'rb') as f:
#     row_acts = pickle.load(f)
    
# filename = gsa_corr.write_dir / "arrays" /  'col_acts.pickle'
# with open(filename, 'rb') as f:
#     col_acts = pickle.load(f)

# filename = gsa_corr.write_dir / "arrays" / 'imp_tech.pickle'
# with open(filename, 'rb') as f:
#     imp_tech = pickle.load(f)

In [7]:
scale_plot = imp_tech['scale']
# amounts_plot = imp_tech['amount']

# xscale_inf = 100
# xscale_max = np.round((max(spearman)//0.2 + 1)*0.2, 1)*xscale_inf
# xtickvals_inf = np.arange(0,-xscale_max-1,-20)
# xticktext_inf = -xtickvals_inf/xscale_inf

# xscale_scale = int(np.ceil(max(scale_plot) * xscale_inf / 20) * 20)
# xtickvals_scale = np.arange(0,-xscale_scale-1,-20)
# xticktext_scale = -xtickvals_scale/100

xscale_inf = 200
xtickvals_inf = np.array([-0.0,-0.2,-0.4,-0.6,-0.8,-1.0])*100
xticktext_inf = -xtickvals_inf / xscale_inf

xscale_scale = 120
xtickvals_scale = np.array([0,  -20,  -40,  -60,  -80, -100,])#np.arange(0,-xscale_scale-1,-20)
xticktext_scale = np.array([0, 0.25,  0.5, 0.75,  1.0, 1.25,])

n_features_plot = num_influential
importance_plot = spearman[imp_inds]

In [9]:
colors = {
    'scale': '#fc6955',
    'agribalyse': '#00e779',
    'inf': '#2f91e5',
}

In [10]:
annotations = []
for i in range(n_features_plot):
    # Set color depending in database
    col_color = 'black'
    if col_acts[i]['database'] != 'ecoinvent 3.6 cutoff':
        col_color = colors['agribalyse']
    row_color = 'black'
    if row_acts[i]['database'] != 'ecoinvent 3.6 cutoff':
        row_color = colors['agribalyse']
        
    ann_input = dict(
        x=12,
        y=i-0.15,
        xref="x",
        yref="y",
        text="Output " + col_acts[i]['database'][:10] + ' -> ' \
                       + col_acts[i]['name'] + ', ' \
                       + col_acts[i]['location'],
        xanchor = 'left',
        yanchor = 'middle',
        showarrow = False,
        font_size=7,
        font_color=col_color
        )
    ann_output = dict(
        x=12,
        y=i+0.15,
        xref="x",
        yref="y",
        text="Input   " + row_acts[i]['database'][:10] + ' -> ' \
                        + row_acts[i]['name'] + ', ' \
                        + row_acts[i]['location'],
        xanchor = 'left',
        yanchor = 'middle',
        showarrow = False,
        font_size=7,
        font_color=row_color
        )
    ann_text = dict(
        x=0,
        y=i,
        xref="x",
        yref="y",
        text=i+1,
        xanchor = 'left',
        yanchor = 'middle',
        showarrow = False,
        
        )
    annotations.append(ann_input)
    annotations.append(ann_output)
    annotations.append(ann_text)

# Add ticks and annotations to feature importance
for j in range(xtickvals_inf.shape[0]):
    annotations.append(
        dict(
            x=xtickvals_inf[j],
            y=-1,
            xref="x",
            yref="y",
            text=xticktext_inf[j],
            xanchor = 'center',
            yanchor = 'middle',
            showarrow = False,
            font_color=colors['inf']
        )
    )
    
annotations.append(
    dict(
        x=-xscale_scale/2,
        y=-1.5,
        xref="x",
        yref="y",
        text='Feature importance',
        xanchor = 'center',
        yanchor = 'middle',
        showarrow = False,
        font_color=colors['inf']
    )
)
 
# Add ticks and annotations to scale values
for j in range(xtickvals_scale.shape[0]):
    annotations.append(
        dict(
            x=xtickvals_scale[j],
            y=-2.2,
            xref="x",
            yref="y",
            text=xticktext_scale[j],
            xanchor = 'center',
            yanchor = 'middle',
            showarrow = False,
            font_color=colors['scale'],
        )
    )
    
annotations.append(
    dict(
        x=-xscale_scale/2,
        y=-2.7,
        xref="x",
        yref="y",
        text='Scale (lognormal distr)',
        xanchor = 'center',
        yanchor = 'middle',
        showarrow = False,
        font_color=colors['scale']
    )
)
    
annotations.append(
    dict(
        x=11,
        y=-1.5,
        xref="x",
        yref="y",
        text='Corresponding exchanges',
        xanchor = 'left',
        yanchor = 'middle',
        showarrow = False,)
)


In [20]:
fig = go.Figure()

opacity_ = 0.8

# Importance
fig.add_trace( go.Bar( x=importance_plot*(-xscale_inf),
                       y=np.arange(n_features_plot),
                       name='All features',
                       opacity=opacity_,
                       orientation='h',
                       width=[0.3]*n_features_plot,
                       showlegend=False,
                      marker_color=colors['inf']
                     ),              
             )

# Lognormal scales
fig.add_trace( go.Bar( x=scale_plot*(-xscale_scale),
                       y=np.arange(n_features_plot),
                       name='All features',
                       opacity=opacity_,
                       orientation='h',
                       width=[0.3]*n_features_plot,
                       showlegend=False,
                       marker_color=colors['scale']
                     ),
             )

fig.add_trace( go.Scatter( x=[-130,170],
                           y=[-0.6,-0.6],
                           mode='lines',
                           showlegend=False,
                           line_color='white'
                     ),
             )


fig.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = xtickvals_inf,
        ticktext = xticktext_inf,
        showticklabels = False,
    ),
    yaxis = dict(
        tickmode = 'array',
        tickvals = np.arange(n_features_plot),
        ticktext = [],
        autorange = 'reversed',
        showticklabels = False,
    ),
    width=800,
    height=1200,
    margin=dict(l=0, r=0, t=0, b=0),
    annotations = annotations,
    barmode='group',
    bargap = 0.4,
    yaxis_showgrid=False,
    yaxis_zeroline=False,
)

fig.show()

In [21]:
filename = gsa_corr.write_dir / "figures" /  'fig_gsa_results.pickle'
with open(filename, 'wb') as f:
     pickle.dump(fig, f)