In [None]:
import bw2data as bd
import bw2calc as bc
import bw_processing as bwp
from pypardiso import spsolve
from pathlib import Path
from copy import deepcopy
from gsa_framework.utils import read_pickle, write_pickle
from fs.zipfs import ZipFS
import sys
sys.path.append('/Users/akim/PycharmProjects/akula')

# Local files
from akula.sensitivity_analysis.local_sa import *

project = 'GSA for archetypes'
bd.projects.set_current(project)
const_factor = 10
cutoff = 1e-6  # For contribution analysis
max_calc = 1e16  # For supply chain traversal

# Setups

In [None]:
co = bd.Database('swiss consumption 1.0')
fu = [act for act in co if "ch hh average consumption aggregated, years 151617" == act['name']][0]

write_dir = Path("write_files") / project.lower().replace(" ", "_") / fu['name'].lower().replace(" ", "_").replace(",", "")
write_dir_sct = write_dir / "supply_chain_traversal" 
write_dir_sct.mkdir(exist_ok=True, parents=True)

demand = {fu: 1}
method = ("IPCC 2013", "climate change", "GWP 100a", "uncertain")
fu_mapped, packages, _ = bd.prepare_lca_inputs(demand=demand, method=method, remapping=False)
lca = bc.LCA(demand=fu_mapped, data_objs=packages)
lca.lci()
lca.lcia()
static_score = deepcopy(lca.score)
static_score

In [None]:
# TODO make packages static

In [None]:
# Get all relevant data
ei = bd.Database('ecoinvent 3.8 cutoff').datapackage()
co = bd.Database('swiss consumption 1.0').datapackage()
cf = bd.Method(method).datapackage()

# Technosphere
tei = ei.filter_by_attribute('matrix', 'technosphere_matrix')
tco = co.filter_by_attribute('matrix', 'technosphere_matrix')
get_tresource_kind = lambda kind: np.hstack(
    [
        tei.get_resource(f'ecoinvent_3.8_cutoff_technosphere_matrix.{kind}')[0], 
        tco.get_resource(f'swiss_consumption_1.0_technosphere_matrix.{kind}')[0]
    ]
)
tindices = get_tresource_kind('indices')
tdata = get_tresource_kind('data')
tflip = get_tresource_kind('flip')

# Biosphere
bei = ei.filter_by_attribute('matrix', 'biosphere_matrix')
bindices = bei.get_resource('ecoinvent_3.8_cutoff_biosphere_matrix.indices')[0]
bdata = bei.get_resource('ecoinvent_3.8_cutoff_biosphere_matrix.data')[0]
bdistributions = bei.get_resource('ecoinvent_3.8_cutoff_biosphere_matrix.distributions')[0]

# Characterization
cindices = cf.get_resource('IPCC_2013_climate_change_GWP_100a_uncertain_matrix_data.indices')[0]
cdata = cf.get_resource('IPCC_2013_climate_change_GWP_100a_uncertain_matrix_data.data')[0]
cdistributions = cf.get_resource('IPCC_2013_climate_change_GWP_100a_uncertain_matrix_data.distributions')[0]

In [None]:
# Get technosphere uncertainty boolean array
distributions_ei = tei.get_resource('ecoinvent_3.8_cutoff_technosphere_matrix.distributions')[0] 
has_uncertainty_ei = distributions_ei['uncertainty_type'] >= 2

tindices_co = tco.get_resource('swiss_consumption_1.0_technosphere_matrix.indices')[0]
has_uncertainty_dict = {}
for act in bd.Database('swiss consumption 1.0'):
    exchanges = list(act.exchanges())
    col = lca.dicts.activity[act.id]
    for exc in exchanges:
        if exc.get('has_uncertainty', False):
            row = lca.dicts.activity[exc.input.id]
            has_uncertainty_dict[(exc.input.id, act.id)] = True
has_uncertainty_co= np.array([has_uncertainty_dict.get(tuple(ids), False) for ids in tindices_co])

has_uncertainty_tech = np.hstack(
    [
        has_uncertainty_ei,
        has_uncertainty_co,
    ]
)

# Step 1. Remove non influential with contribution analysis

## Step 1.1 Technosphere & Supply chain traversal

In [None]:
%%time
fp_sct = write_dir_sct / f"sct.cutoff_{cutoff:.0e}.maxcalc_{max_calc:.0e}.pickle"
if fp_sct.exists():
    tindices_wo_noninf = read_pickle(fp_sct)
else:    
    tindices_wo_noninf = get_tindices_wo_noninf(lca, cutoff, max_calc)
    write_pickle(tindices_wo_noninf, fp_sct)

fp_tmask_wo_noninf = write_dir / f"mask.tech.without_noninf.sct.cutoff_{cutoff:.0e}.maxcalc_{max_calc:.0e}.pickle"
if fp_tmask_wo_noninf.exists():
    tmask_wo_noninf = read_pickle(fp_tmask_wo_noninf)
else:
    tmask_wo_noninf = get_mask(tindices, tindices_wo_noninf)
    write_pickle(tmask_wo_noninf, fp_tmask_wo_noninf)

## Step 1.2 Biosphere

In [None]:
bindices_wo_noninf = get_bindices_wo_noninf(lca)
fp_bmask_wo_noninf = write_dir / "mask.bio.without_noninf.pickle"
if fp_bmask_wo_noninf.exists():
    bmask_wo_noninf = read_pickle(fp_bmask_wo_noninf)
else:
    bmask_wo_noninf = get_mask(bindices, bindices_wo_noninf)
    write_pickle(bmask_wo_noninf, fp_bmask_wo_noninf)

## Step 1.3 Characterization

In [None]:
cindices_wo_noninf = get_cindices_wo_noninf(lca)
fp_cmask_wo_noninf = write_dir / "mask.cf.without_noninf.pickle"
if fp_cmask_wo_noninf.exists():
    cmask_wo_noninf = read_pickle(fp_cmask_wo_noninf)
else:
    cmask_wo_noninf = get_mask(cindices, cindices_wo_noninf)
    write_pickle(cmask_wo_noninf, fp_cmask_wo_noninf)

# Step 2. Run local SA

## 2.1 Technosphere

In [None]:
def run_local_sa_tech(
    matrix_type,
    fu_mapped,
    packages,
    tindices,
    tdata,
    has_uncertainty_tech,
    mask_tech_without_noninf,
    flip_tech,
    factors,
    write_dir,
):
    for i, factor in enumerate(factors):
        fp_factor = write_dir / f"local_sa.tech.factor_{factor:.0e}.cutoff_{cutoff:.0e}.maxcalc_{max_calc:.0e}.pickle"
        if fp_factor.exists():
            local_sa_current = read_pickle(fp_factor)
        else:
            local_sa_current = run_local_sa(
                matrix_type,
                fu_mapped,
                packages,
                indices_tech,
                data_tech,
                has_uncertainty_tech,
                mask_tech_without_noninf,
                flip_tech,
                factor,
            )
            write_pickle(local_sa_current, fp_factor)
        if i == 0:
            local_sa = deepcopy(local_sa_current)
        else:
            local_sa = {k: np.hstack([local_sa[k], local_sa_current[k]]) for k in local_sa.keys()}
    return local_sa
        

In [None]:
%%time
fp_local_sa_tech = write_dir / f"local_sa.tech.cutoff_{cutoff:.0e}.maxcalc_{max_calc:.0e}.pickle"
if fp_local_sa_tech.exists():
    local_sa_tech = read_pickle(fp_local_sa_tech)
else:
    local_sa_tech = run_local_sa_tech(
        "technosphere",
        fu_mapped,
        packages,
        indices_tech,
        data_tech,
        has_uncertainty_tech,
        mask_tech_without_noninf,
        flip_tech,
        [1/const_factor, const_factor],
        write_dir
    )
    write_pickle(local_sa_tech, fp_local_sa_tech)

In [None]:
# if fp_mask_tech_without_lowinf.exists():
#     mask_tech_without_lowinf = read_pickle(fp_mask_tech_without_lowinf)
# else:
#     mask_tech_without_lowinf = get_mask(indices_tech, use_indices_tech_without_lowinf)
#     write_pickle(mask_tech_without_lowinf, fp_mask_tech_without_lowinf)

In [None]:
# TODO cutoffs

## 2.2 Biosphere

In [None]:
%%time
fp_local_sa_bio = write_dir / f"local_sa.bio.pickle"
if fp_local_sa_bio.exists():
    local_sa_bio = read_pickle(fp_local_sa_bio)
else:
    local_sa_bio = run_local_sa(
        "biosphere",
        fu_mapped,
        packages,
        indices_bio,
        data_bio,
        distributions_bio,
        mask_bio_without_noninf,
        None,
        const_factor,
    )
    write_pickle(local_sa_bio, fp_local_sa_bio)
    
#     mask_bio_without_lowinf = get_mask(indices_bio, use_indices_bio_without_lowinf)
#     assert sum(mask_bio_without_lowinf) == len(use_indices_bio_without_lowinf)
#     mask_bio_without_lowinf = get_mask(indices_bio, use_indices_bio_without_lowinf)
#     write_pickle(mask_bio_without_lowinf, fp_mask_bio_without_lowinf)

## 2.3 Characterization

In [None]:
%%time
fp_local_sa_cf = write_dir / f"local_sa.cf.pickle"
if fp_local_sa_cf.exists():
    local_sa_cf = read_pickle(fp_local_sa_cf)
else:
    local_sa_cf = run_local_sa(
        "characterization",
        fu_mapped,
        packages,
        indices_cf,
        data_cf,
        distributions_cf,
        mask_cf_without_noninf,
        None,
        const_factor,
    )
    write_pickle(local_sa_cf, fp_local_sa_cf)
#     mask_cf_without_lowinf = get_mask(indices_cf, use_indices_cf_without_lowinf)
#     assert sum(mask_cf_without_lowinf) == len(use_indices_cf_without_lowinf)
#     mask_cf_without_lowinf = get_mask(indices_cf, use_indices_cf_without_lowinf)
#     write_pickle(mask_cf_without_lowinf, fp_mask_cf_without_lowinf)

## 2.4 Virtual markets

In [None]:
from akula.virtual_markets import DATA_DIR
fp_virtual_markets = DATA_DIR / "virtual-markets.zip"
dp = bwp.load_datapackage(ZipFS(fp_virtual_markets))

In [None]:
indices_vm_unordered = dp.get_resource('virtual markets.indices')[0]
mask_vm_tech = get_mask(indices_tech, indices_vm_unordered)
# flip_vm = dp.get_resource('virtual markets.flip')[0]
indices_vm = indices_tech[mask_vm_tech]
data_vm = data_tech[mask_vm_tech]
flip_vm = flip_tech[mask_vm_tech]
distributions_vm = np.ones(len(indices_vm), dtype=bool)
mask_vm = distributions_vm

In [None]:
fp_local_sa_vm = write_dir / f"local_sa.virtual_markets.pickle"
if fp_local_sa_cf.exists():
    local_sa_vm = run_local_sa(
        "technosphere",
        fu_mapped,
        packages,
        indices_vm,
        data_vm,
        distributions_vm,
        mask_vm,
        flip_vm,
        const_factor,
    )

# 2.4 Remove lowly influential based on variance

In [None]:
# Add static score
local_sa_list = [local_sa_tech, local_sa_bio, local_sa_cf]
for dict_ in local_sa_list:
    values = np.vstack(list(dict_.values()))
    values = np.hstack([values, np.ones((len(values), 1))*static_score])
    variances = np.var(values, axis=1)
    for i,k in enumerate(dict_.keys()):
#         dict_.update({k: values[i,:]})
        dict_[k] = {
            "arr": values[i,:],
            "var": variances[i],
        }

In [None]:
# Find threshold for variance
num_parameters_step2 = 100
# Collect all variances
variances = np.array([v['var'] for dict_ in local_sa_list for k,v in dict_.items() ])
variances = np.sort(variances)[-1::-1]
variances_threshold = variances[:num_parameters_step2][-1]

In [None]:
# Remove lowly influential
get_indices_high_variance = lambda dict_: \
    [k for k in dict_ if dict_[k]['var'] >= variances_threshold]

use_indices_tech_without_lowinf = get_indices_high_variance(local_sa_tech)
mask_tech_without_lowinf = get_mask(indices_tech, use_indices_tech_without_lowinf)

use_indices_bio_without_lowinf = get_indices_high_variance(local_sa_bio)
mask_bio_without_lowinf = get_mask(indices_bio, use_indices_bio_without_lowinf)

use_indices_cf_without_lowinf = get_indices_high_variance(local_sa_cf)
mask_cf_without_lowinf = get_mask(indices_cf, use_indices_cf_without_lowinf)

assert sum(mask_tech_without_lowinf) + sum(mask_bio_without_lowinf) + sum(mask_cf_without_lowinf) == num_parameters_step2