# 1. LSA: Remove inputs that result in low output variance, only use 10k inputs

In [None]:
from gsa_framework.lca import LCAModel
from gsa_framework.methods.correlations import CorrelationCoefficients
from gsa_framework.validation import Validation
from pathlib import Path
import brightway2 as bw
import time
import numpy as np
from gsa_framework.utils import read_hdf5_array, read_pickle, write_hdf5_array, write_pickle
import h5py
import pickle
from setups_paper_gwp import *

In [None]:
# TODO choose
option = 'paper'
if option == 'paper':
    setup_lca_model = setup_lca_model_paper
    path_base = Path('/data/user/kim_a/paper_gsa')
    write_dir = path_base / "lca_model_food_10000"
elif option == 'setac':
    setup_lca_model = setup_lca_model_oases
    path_base = Path('/data/user/kim_a/')
    write_dir = path_base / 'oases_gsa_gwp_10000'

In [None]:
if __name__ == "__main__":
    
    num_params_gsa = 10000
    
    model, write_dir, gsa_seed = setup_lca_model(
        num_params=None, 
        write_dir=write_dir, 
        flag_generate_scores_dict=True,
    )

    # Define some variables
    iterations_validation = 2000
    validation_seed = 66666
    fig_format = ["pickle"]

    # Make sure  that the chosen num_params in LCA are appropriate
    val = Validation(
        model=model,
        iterations=iterations_validation,
        seed=validation_seed,
        default_x_rescaled=model.default_uncertain_amounts,
        write_dir=write_dir,
    )
    
    tag = "LocalSA"
    scores_dict = model.get_lsa_scores_pickle(model.write_dir / "LSA_scores")
    uncertain_tech_params_where_subset, _ = model.get_nonzero_params_from_num_params(scores_dict, num_params_gsa)
    parameter_choice = []
    for u in uncertain_tech_params_where_subset:
        where_temp = np.where(model.uncertain_tech_params_where == u)[0]
        assert len(where_temp) == 1
        parameter_choice.append(where_temp[0])
    parameter_choice.sort()
    Y_subset = val.get_influential_Y_from_parameter_choice(parameter_choice=parameter_choice, tag=tag)
    fig=val.plot_histogram_Y_all_Y_inf(Y_subset, num_influential=num_params_gsa)

# 2. Run all GSA for lca model with 10k inputs

## 2.1. DASK

In [None]:
from dask.distributed import Client, LocalCluster
from dask_jobqueue import SLURMCluster
from pathlib import Path

In [None]:
which_pc = "merlin_paper_gsa"
if 'merlin' in which_pc:
    path_dask_logs = Path('/data/user/kim_a/dask_logs')
    path_dask_logs.mkdir(parents=True, exist_ok=True)
    cluster = SLURMCluster(cores     = 8, 
                           memory    ='30GB', 
                           walltime  = '20:00:00',
                           interface ='ib0',
                           local_directory = path_dask_logs.as_posix(),
                           log_directory   = path_dask_logs.as_posix(),
                           queue="daily",
                           ) 
elif 'local' in which_pc:
    cluster = LocalCluster(memory_limit='7GB') 

In [None]:
client = Client(cluster)

In [None]:
n_workers = 3
cluster.scale(n_workers)

In [None]:
client

In [None]:
# client.close()
# cluster.close() 

# 3. Validate GSA results TODO wrt all 180k inputs

In [None]:
path_base = Path('/data/user/kim_a/')
# LCA model
bw.projects.set_current("GSA for oases")
co = bw.Database("CH consumption 1.0")
demand_act = [act for act in co if "average consumption" in act['name']][0]
print(demand_act)
demand = {demand_act: 1}
method = ("IPCC 2013", "climate change", "GWP 100a")

# Define some variables
num_params = 172051
iterations_validation = 2000
write_dir = path_base / "oases_gsa"
model = LCAModel(demand, method, write_dir) # TODO add num_params later
validation_seed = 66666
fig_format = ["html", "pickle"]

t0 = time.time()
# Make sure  that the chosen num_params in LCA are appropriate
val = Validation(
    model=model,
    iterations=iterations_validation,
    seed=validation_seed,
    default_x_rescaled=model.default_uncertain_amounts,
    write_dir=write_dir,
)

spearman = S_dict["spearman"]
num_influential = 60
tag = "SpearmanIndex"

parameter_choice_inf = np.argsort(abs(spearman))[::-1][:num_influential]
parameter_choice_inf.sort()
params_where_inf = gsa_corr.model.uncertain_tech_params_where[parameter_choice_inf]
params_where_all = model.uncertain_tech_params_where

parameter_choice = np.zeros(num_influential,dtype=int)
parameter_choice[:] = np.nan
for i,p in enumerate(params_where_inf):
    parameter_choice[i] = np.where(params_where_all==p)[0]

influential_Y = val.get_influential_Y_from_parameter_choice(parameter_choice=parameter_choice, tag=tag)
t1 = time.time()
print("Total validation time  -> {:8.3f} s \n".format(t1 - t0))
fig_format = ['html', 'pickle']
# val.plot_histogram_Y_all_Y_inf(
#     influential_Y, num_influential, tag=tag, fig_format=fig_format, bin_min=None, bin_max=None, num_bins=60
# )
# val.plot_correlation_Y_all_Y_inf(
#     influential_Y, num_influential, tag=tag, fig_format=fig_format
# )

In [None]:
from gsa_framework.plotting import *
lcia_scores_text = "LCIA scores, [kg CO2-eq]"
diff = 0
diff = 1869.6814802713172-1150.4295714576062
bin_min = 1800
bin_max = 2150
num_bins = 60
fig = histogram_Y1_Y2(
    val.Y_all+diff,
    influential_Y+diff,
    default_Y=None,
    bin_min=bin_min,
    bin_max=bin_max,
    num_bins=num_bins,
    trace_name1="All parameters vary",
    trace_name2="Only influential vary",
    color1="#636EFA",
    color2="#EF553B",
    color_default_Y="red",
    opacity=0.65,
    xaxes_title_text=lcia_scores_text,
)
fig.update_yaxes(range=[-10,140])
fig.show()
write_pickle(fig, "validation_60inf_histogram_spearman.pickle")

In [None]:
fig = correlation_Y1_Y2(
    Y1=val.Y_all+diff,
    Y2=influential_Y+diff,
    start=0,
    end=80,
    trace_name1="All parameters vary",
    trace_name2="Only influential vary",
    yaxes1_title_text=lcia_scores_text,
    xaxes2_title_text=lcia_scores_text,
    yaxes2_title_text=lcia_scores_text,
)
fig.update_yaxes(
    range=[bin_min, bin_max],
)
fig.update_xaxes(
    range=[bin_min, bin_max],
    row=1,
    col=2,
)
fig.show()
write_pickle(fig, "validation_60inf_correlation_spearman.pickle")

# 4. Print exchanges

In [None]:
from gsa_framework.lca import LCAModel
from gsa_framework.methods.correlations import CorrelationCoefficients
from gsa_framework.validation import Validation
from pathlib import Path
import brightway2 as bw
import time
import numpy as np
from gsa_framework.utils import read_hdf5_array, read_pickle, write_hdf5_array, write_pickle
import h5py
import pickle

import plotly.graph_objects as go
import pandas as pd

In [None]:
num_params = 35000
iterations = 2*num_params
gsa_corr = setup_corr(num_params, iterations)
S_dict = gsa_corr.generate_gsa_indices()
spearman = S_dict["spearman"]

num_influential = 20
imp_inds = np.argsort(spearman)[::-1][:num_influential]
imp_tech = gsa_corr.model.uncertain_tech_params[imp_inds]
spearman_inf = spearman[imp_inds]

In [None]:
%%time
row_acts, col_acts = [],[]
for p in imp_tech[:num_influential]:
    row = p['row']
    col = p['col']
    row_acts.append(bw.get_activity(gsa_corr.model.lca.reverse_dict()[0][row]))
    col_acts.append(bw.get_activity(gsa_corr.model.lca.reverse_dict()[0][col]))

In [None]:
list_ = []
for i in range(num_influential):
    input_act  = row_acts[i].as_dict()
    output_act = col_acts[i].as_dict()
    dict_ = {
        "input activity": input_act['name'],
        "input location": input_act['location'],
        "input reference product": input_act['reference product'],
        "input unit": input_act['unit'],
        "output activity": output_act['name'],
        "output location": output_act['location'],
        "output reference product": output_act['reference product'],
        "output unit": output_act['unit'],
        "exchange amount": imp_tech[i]['amount'],
        "exchange scale": imp_tech[i]['scale'],
        "exchange loc": imp_tech[i]['loc'],
        "exchange Spearman rank coef.": spearman_inf[i],
    }
    list_.append(dict_)
df = pd.DataFrame(list_)
df.to_excel('influential_exchanges.xlsx')

In [None]:
# filename = gsa_corr.write_dir / "arrays" / 'row_acts.pickle'
# with open(filename, 'wb') as f:
#      pickle.dump(row_acts, f)
    
# filename = gsa_corr.write_dir / "arrays" / 'col_acts.pickle'
# with open(filename, 'wb') as f:
#      pickle.dump(col_acts, f)
        
# filename = gsa_corr.write_dir / "arrays" /  'imp_tech.pickle'
# with open(filename, 'wb') as f:
#      pickle.dump(imp_tech, f)

In [None]:
# filename = gsa_corr.write_dir / "arrays" /  'row_acts.pickle'
# with open(filename, 'rb') as f:
#     row_acts = pickle.load(f)
    
# filename = gsa_corr.write_dir / "arrays" /  'col_acts.pickle'
# with open(filename, 'rb') as f:
#     col_acts = pickle.load(f)

# filename = gsa_corr.write_dir / "arrays" / 'imp_tech.pickle'
# with open(filename, 'rb') as f:
#     imp_tech = pickle.load(f)

In [None]:
scale_plot = imp_tech['scale']
# amounts_plot = imp_tech['amount']

# xscale_inf = 100
# xscale_max = np.round((max(spearman)//0.2 + 1)*0.2, 1)*xscale_inf
# xtickvals_inf = np.arange(0,-xscale_max-1,-20)
# xticktext_inf = -xtickvals_inf/xscale_inf

# xscale_scale = int(np.ceil(max(scale_plot) * xscale_inf / 20) * 20)
# xtickvals_scale = np.arange(0,-xscale_scale-1,-20)
# xticktext_scale = -xtickvals_scale/100

xscale_inf = 200
xtickvals_inf = np.array([-0.0,-0.2,-0.4,-0.6,-0.8,-1.0])*100
xticktext_inf = -xtickvals_inf / xscale_inf

xscale_scale = 100
xtickvals_scale = np.array([0, -25,  -50,  -75,  -100, -125,])#np.arange(0,-xscale_scale-1,-20)
xticktext_scale = np.array([0, 0.25,  0.5, 0.75,  1.0, 1.25,])

n_features_plot = num_influential
importance_plot = spearman[imp_inds]

In [None]:
colors = {
#     'scale': '#fc6955',
    'scale': 'gray',
    'agribalyse': '#00e779',
    'inf': '#2f91e5',
    'mobility': '#9467bd',
    'electricity': '#109618',
    'agriculture': '#ffa15a',
#     'computing': '#0099c6',   
    'computing': '#4c78a8',
    'combustion': '#b82e2e',   
}

In [None]:
titles_dict = {
    # Locations
    "Europe without Switzerland": "EU without CH",
    # Row acts
    "petrol production, unleaded, petroleum refinery operation": "petrol production, unleaded, refinery operation",
    "market for printed wiring board, surface mounted, unspecified, Pb free": "market for printed wiring board, Pb free",
    "market for wafer, fabricated, for integrated circuit": "market for wafer, for integrated circuit",
    # Col acts
    "electricity voltage transformation from high to medium voltage": "el. voltage transformation, high to medium",
    "electricity voltage transformation from medium to low voltage": "el. voltage transformation, medium to low",
    "printed wiring board production, surface mounted, unspecified, Pb free": "printed wiring board production, Pb free",
    "heat production, natural gas, at boiler condensing modulating <100kW": "heat production, natural gas, at boiler <100kW",
    "printed wiring board production, surface mounted, unspecified, Pb containing": "printed wiring board production, Pb containing",
    "heat production, light fuel oil, at boiler 10kW condensing, non-modulating": "heat production, light fuel oil, boiler 10kW condensing",
    "heat production, light fuel oil, at boiler 10kW, non-modulating": "heat production, light fuel oil, at boiler 10kW",
}

In [None]:
row_acts_plot, col_acts_plot = [], []
for i in range(num_influential):
    row_name = row_acts[i]['name']
    row_location = row_acts[i]['location']
    dict_row = {
        'name': titles_dict.get(row_name, row_name),
        'location': titles_dict.get(row_location, row_location)
    }
    row_acts_plot.append(dict_row)
    
    col_name = col_acts[i]['name']
    col_location = col_acts[i]['location']
    dict_col = {
        'name': titles_dict.get(col_name, col_name),
        'location': titles_dict.get(col_location, col_location)
    }
    col_acts_plot.append(dict_col)

In [None]:
annotations = []
for i in range(n_features_plot):
    # Set color depending in database
    col_color = 'black'
    row_color = 'black'
    if 'voltage' in col_acts_plot[i]['name']:
        col_color = colors['electricity']
        row_color = colors['electricity']
    elif 'soybean' in col_acts_plot[i]['name'] or 'cheese' in col_acts_plot[i]['name']:
        col_color = colors['agriculture']
        row_color = colors['agriculture']
    elif 'car' in col_acts_plot[i]['name'] or 'steel' in col_acts_plot[i]['name'] or 'diesel' in col_acts_plot[i]['name']:
        col_color = colors['mobility']
        row_color = colors['mobility']
    elif 'circuit' in col_acts_plot[i]['name'] or 'wiring' in col_acts_plot[i]['name'] \
    or 'computer' in col_acts_plot[i]['name'] or 'display' in col_acts_plot[i]['name']:
        col_color = colors['computing']
        row_color = colors['computing']
    elif 'heat' in col_acts_plot[i]['name'] or 'petrol' in col_acts_plot[i]['name'] or 'well' in col_acts_plot[i]['name']:
        col_color = colors['combustion']
        row_color = colors['combustion']
   
    ann_input = dict(
        x=12,
        y=i-0.15,
        xref="x",
        yref="y",
        text="Output -> " \
               + col_acts_plot[i]['name'] + ', ' \
               + col_acts_plot[i]['location'],
        xanchor = 'left',
        yanchor = 'middle',
        showarrow = False,
        font_size=7,
        font_color=col_color
        )
    ann_output = dict(
        x=12,
        y=i+0.15,
        xref="x",
        yref="y",
        text="Input   -> " \
                + row_acts_plot[i]['name'] + ', ' \
                + row_acts_plot[i]['location'],
        xanchor = 'left',
        yanchor = 'middle',
        showarrow = False,
        font_size=7,
        font_color=row_color
        )
    ann_text = dict(
        x=0,
        y=i,
        xref="x",
        yref="y",
        text=i+1,
        xanchor = 'left',
        yanchor = 'middle',
        showarrow = False,
        
        )
    annotations.append(ann_input)
    annotations.append(ann_output)
    annotations.append(ann_text)

# Add ticks and annotations to feature importance
for j in range(xtickvals_inf.shape[0]):
    annotations.append(
        dict(
            x=xtickvals_inf[j],
            y=-1,
            xref="x",
            yref="y",
            text=xticktext_inf[j],
            xanchor = 'center',
            yanchor = 'middle',
            showarrow = False,
            font_color=colors['inf']
        )
    )
    
annotations.append(
    dict(
        x=-xscale_scale/2,
        y=-1.5,
        xref="x",
        yref="y",
        text='Feature importance',
        xanchor = 'center',
        yanchor = 'middle',
        showarrow = False,
        font_color=colors['inf']
    )
)
 
# Add ticks and annotations to scale values
for j in range(xtickvals_scale.shape[0]):
    annotations.append(
        dict(
            x=xtickvals_scale[j],
            y=-2.2,
            xref="x",
            yref="y",
            text=xticktext_scale[j],
            xanchor = 'center',
            yanchor = 'middle',
            showarrow = False,
            font_color=colors['scale'],
        )
    )
    
annotations.append(
    dict(
        x=-xscale_scale/2,
        y=-2.7,
        xref="x",
        yref="y",
        text='Scale (lognormal distr)',
        xanchor = 'center',
        yanchor = 'middle',
        showarrow = False,
        font_color=colors['scale']
    )
)
    
annotations.append(
    dict(
        x=11,
        y=-1.5,
        xref="x",
        yref="y",
        text='Ecoinvent technosphere exchanges',
        xanchor = 'left',
        yanchor = 'middle',
        showarrow = False,)
)


In [None]:
# annotations = []
# for i in range(n_features_plot):
#     # Set color depending in database
#     col_color = 'black'
#     if col_acts[i]['database'] != 'ecoinvent 3.6 cutoff':
#         col_color = colors['agribalyse']
#     row_color = 'black'
#     if row_acts[i]['database'] != 'ecoinvent 3.6 cutoff':
#         row_color = colors['agribalyse']
        
#     ann_input = dict(
#         x=12,
#         y=i-0.15,
#         xref="x",
#         yref="y",
#         text="Output " + col_acts[i]['database'][:10] + ' -> ' \
#                        + col_acts[i]['name'] + ', ' \
#                        + col_acts[i]['location'],
#         xanchor = 'left',
#         yanchor = 'middle',
#         showarrow = False,
#         font_size=7,
#         font_color=col_color
#         )
#     ann_output = dict(
#         x=12,
#         y=i+0.15,
#         xref="x",
#         yref="y",
#         text="Input   " + row_acts[i]['database'][:10] + ' -> ' \
#                         + row_acts[i]['name'] + ', ' \
#                         + row_acts[i]['location'],
#         xanchor = 'left',
#         yanchor = 'middle',
#         showarrow = False,
#         font_size=7,
#         font_color=row_color
#         )
#     ann_text = dict(
#         x=0,
#         y=i,
#         xref="x",
#         yref="y",
#         text=i+1,
#         xanchor = 'left',
#         yanchor = 'middle',
#         showarrow = False,
        
#         )
#     annotations.append(ann_input)
#     annotations.append(ann_output)
#     annotations.append(ann_text)

# # Add ticks and annotations to feature importance
# for j in range(xtickvals_inf.shape[0]):
#     annotations.append(
#         dict(
#             x=xtickvals_inf[j],
#             y=-1,
#             xref="x",
#             yref="y",
#             text=xticktext_inf[j],
#             xanchor = 'center',
#             yanchor = 'middle',
#             showarrow = False,
#             font_color=colors['inf']
#         )
#     )
    
# annotations.append(
#     dict(
#         x=-xscale_scale/2,
#         y=-1.5,
#         xref="x",
#         yref="y",
#         text='Feature importance',
#         xanchor = 'center',
#         yanchor = 'middle',
#         showarrow = False,
#         font_color=colors['inf']
#     )
# )
 
# # Add ticks and annotations to scale values
# for j in range(xtickvals_scale.shape[0]):
#     annotations.append(
#         dict(
#             x=xtickvals_scale[j],
#             y=-2.2,
#             xref="x",
#             yref="y",
#             text=xticktext_scale[j],
#             xanchor = 'center',
#             yanchor = 'middle',
#             showarrow = False,
#             font_color=colors['scale'],
#         )
#     )
    
# annotations.append(
#     dict(
#         x=-xscale_scale/2,
#         y=-2.7,
#         xref="x",
#         yref="y",
#         text='Scale (lognormal distr)',
#         xanchor = 'center',
#         yanchor = 'middle',
#         showarrow = False,
#         font_color=colors['scale']
#     )
# )
    
# annotations.append(
#     dict(
#         x=11,
#         y=-1.5,
#         xref="x",
#         yref="y",
#         text='Corresponding exchanges',
#         xanchor = 'left',
#         yanchor = 'middle',
#         showarrow = False,)
# )


In [None]:
fig = go.Figure()

opacity_ = 0.8

# Importance
fig.add_trace( go.Bar( x=importance_plot*(-xscale_inf),
                       y=np.arange(n_features_plot),
                       name='All features',
                       opacity=opacity_,
                       orientation='h',
                       width=[0.3]*n_features_plot,
                       showlegend=False,
                      marker_color=colors['inf']
                     ),              
             )

# Lognormal scales
fig.add_trace( go.Bar( x=scale_plot*(-xscale_scale),
                       y=np.arange(n_features_plot),
                       name='All features',
                       opacity=opacity_,
                       orientation='h',
                       width=[0.3]*n_features_plot,
                       showlegend=False,
                       marker_color=colors['scale']
                     ),
             )

fig.add_trace( go.Scatter( x=[-130,170],
                           y=[-0.6,-0.6],
                           mode='lines',
                           showlegend=False,
                           line_color='white'
                     ),
             )


fig.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = xtickvals_inf,
        ticktext = xticktext_inf,
        showticklabels = False,
    ),
    yaxis = dict(
        tickmode = 'array',
        tickvals = np.arange(n_features_plot),
        ticktext = [],
        autorange = 'reversed',
        showticklabels = False,
    ),
    width=500,
    height=num_influential*24,
    margin=dict(l=0, r=0, t=0, b=0),
    annotations = annotations,
    barmode='group',
    bargap = 0.4,
    yaxis_showgrid=False,
    yaxis_zeroline=False,
)

fig.show()

In [None]:
filename = gsa_corr.write_dir / "figures" /  'fig_gsa_results.pickle'
with open(filename, 'wb') as f:
     pickle.dump(fig, f)

# Contribution analysis

In [None]:
import brightway2 as bw
import pandas as pd

In [None]:
%%time
project = 'GSA for oases'
bw.projects.set_current(project)
co = bw.Database("CH consumption 1.0")
demand_act = co.search('ch hh average consumption')
assert len(demand_act) == 1
demand = {demand_act[0]: 1}
list_methods = [('IPCC 2013', 'climate change', 'GWP 100a')]
sectors = [act for act in co if 'sector' in act["name"]]
list_fus = []
for act in sectors:
    list_fus.append({act: 1})
    
bw.calculation_setups['sector_contribution_analysis'] = {'inv':list_fus, 'ia':list_methods}
myMultiLCA = bw.MultiLCA('sector_contribution_analysis')
lcia_unit = bw.Method(list_methods[0]).metadata['unit']
fus = [bw.get_activity(list(el.keys())[0])['name'][:] for el in list_fus]
df = pd.DataFrame(index=fus, columns=[lcia_unit], data=myMultiLCA.results)
df['units'] = [bw.get_activity(list(el.keys())[0])['unit'] for el in list_fus]
df.sort_values(lcia_unit, ascending=False, inplace=True)

with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(df)

In [None]:
df_plot = df.iloc[:7]

In [None]:
ax = df_plot.plot.barh()
ax.invert_yaxis()


# Convergence

In [None]:
from gsa_framework.lca import LCAModel
from gsa_framework.methods.correlations import CorrelationCoefficients
from gsa_framework.convergence import Convergence
from pathlib import Path
import brightway2 as bw
import time
import numpy as np
from gsa_framework.utils import read_hdf5_array, read_pickle, write_hdf5_array, write_pickle
import h5py
import pickle

In [None]:
if __name__ == "__main__":
    num_params = 10000
    model, write_dir, gsa_seed = setup_lca_model(num_params)
    fig_format = ["html", "pickle"]
    
    iterations = 2 * num_params
    gsa = CorrelationCoefficients(
        iterations=iterations,
        model=model,
        write_dir=write_dir,
        seed=gsa_seed,
    )
    S_dict = gsa.generate_gsa_indices()
    spearman = S_dict["spearman"]
    
    num_convergence_steps = 100
    num_convergence_plot = 10
    parameter_inds_convergence_plot = np.hstack(
        [
            np.argsort(spearman)[::-1][:num_convergence_plot],
            np.argsort(spearman)[::-1][-num_convergence_plot:],
        ]
    )
    conv = Convergence(
        gsa.filepath_Y,
        gsa.num_params,
        gsa.generate_gsa_indices,
        gsa.gsa_label,
        write_dir,
        num_steps=num_convergence_steps,
    )
    conv.run_convergence(
        parameter_inds=parameter_inds_convergence_plot,
        fig_format=fig_format,
    )