In [1]:
import bw2data as bd
import bw2calc as bc
import bw_processing as bwp
from pypardiso import spsolve
from pathlib import Path
from gsa_framework.utils import read_pickle, write_pickle

project = 'GSA for archetypes'
bd.projects.set_current(project)
const_factor = 10

cutoff = 1e-3

bd.databases

Databases dictionary with 3 object(s):
	biosphere3
	ecoinvent 3.8 cutoff
	swiss consumption 1.0

In [2]:
co = bd.Database('swiss consumption 1.0')
fu = [act for act in co if "average consumption" in act['name']][0]

write_dir = Path("write_files") / project.lower().replace(" ", "_") / fu['name'].lower().replace(" ", "_")

demand = {fu: 1}
method = ("IPCC 2013", "climate change", "GWP 100a", "uncertain")
fu_mapped, packages, _ = bd.prepare_lca_inputs(demand=demand, method=method, remapping=False)  # Could also add LCIA method
lca = bc.LCA(demand=fu_mapped, data_objs=packages)
lca.lci()
lca.lcia()
lca.score

1094.9290193609536

In [3]:
list(fu.exchanges())[1].as_dict()

{'output': ('swiss consumption 1.0', 'ch hh average consumption aggregated'),
 'input': ('swiss consumption 1.0', 'a42'),
 'amount': 196.99587275500957,
 'type': 'technosphere',
 'has_uncertainty': True}

# Utils

In [4]:
def get_mask(all_indices, use_indices):
    """Creates a `mask` such that `all_indices[mask]=use_indices`."""
    use_indices = np.array(use_indices, dtype=[('row', '<i4'), ('col', '<i4')])
    mask = np.zeros(len(all_indices), dtype=bool)
    for inds in use_indices:
        mask_current = all_indices==inds
        mask = mask | mask_current
    assert sum(mask)==len(use_indices)
    return mask

In [5]:
ei = bd.Database('ecoinvent 3.8 cutoff').datapackage()
co = bd.Database('swiss consumption 1.0').datapackage()
cf = bd.Method(method).datapackage()

# Technosphere
ei_tech = ei.filter_by_attribute('matrix', 'technosphere_matrix')
co_tech = co.filter_by_attribute('matrix', 'technosphere_matrix')
get_tech_resource_kind = lambda kind: np.hstack(
    [
        ei_tech.get_resource(f'ecoinvent_3.8_cutoff_technosphere_matrix.{kind}')[0], 
        co_tech.get_resource(f'swiss_consumption_1.0_technosphere_matrix.{kind}')[0]
    ]
)
indices_tech = get_tech_resource_kind('indices')
data_tech = get_tech_resource_kind('data')
flip_tech = get_tech_resource_kind('flip')

# Biosphere
ei_bio = ei.filter_by_attribute('matrix', 'biosphere_matrix')
indices_bio = ei_bio.get_resource('ecoinvent_3.8_cutoff_biosphere_matrix.indices')[0]
data_bio = ei_bio.get_resource('ecoinvent_3.8_cutoff_biosphere_matrix.data')[0]
distributions_bio = ei_bio.get_resource('ecoinvent_3.8_cutoff_biosphere_matrix.distributions')[0]

# Characterization
indices_cf = cf.get_resource('IPCC_2013_climate_change_GWP_100a_uncertain_matrix_data.indices')[0]
data_cf = cf.get_resource('IPCC_2013_climate_change_GWP_100a_uncertain_matrix_data.data')[0]
distributions_cf = cf.get_resource('IPCC_2013_climate_change_GWP_100a_uncertain_matrix_data.distributions')[0]

In [6]:
distributions_ei = ei_tech.get_resource(f'ecoinvent_3.8_cutoff_technosphere_matrix.distributions')[0] 
has_uncertainty_ei = distributions_ei['uncertainty_type'] >= 2

indices_tech_co = co_tech.get_resource(f'swiss_consumption_1.0_technosphere_matrix.indices')[0]
has_uncertainty_dict = {}
for act in bd.Database('swiss consumption 1.0'):
    exchanges = list(act.exchanges())
    col = lca.dicts.activity[act.id]
    for exc in exchanges:
        if exc.get('has_uncertainty', False):
            row = lca.dicts.activity[exc.input.id]
            has_uncertainty_dict[(exc.input.id, act.id)] = True
has_uncertainty_co = np.array([has_uncertainty_dict.get(tuple(ids), False) for ids in indices_tech_co])

has_uncertainty_tech = np.hstack(
    [
        has_uncertainty_ei,
        has_uncertainty_co,
    ]
)

# Step 1. Remove non influential with contribution analysis

## Step 1.1 Technosphere & Supply chain traversal

In [7]:
def get_inds_tech_without_noninf(lca, cutoff, max_calc=1e4):
    # Supply chain traversal
    res = bc.GraphTraversal().calculate(
        lca, cutoff=cutoff, max_calc=max_calc
    )
    edges = res['edges']
    use_indices = []
    use_indices_dict = {}
    for edge in res['edges']:
        if edge['to'] !=- 1:
            if abs(edge['impact']) > abs(lca.score * cutoff):
                row,col = edge['from'], edge['to']
                i,j = lca.dicts.activity.reversed[row], lca.dicts.activity.reversed[col]
                use_indices.append((i,j))
                use_indices_dict[(i,j)] = edge['impact']
    return use_indices

In [8]:
%%time
max_calc = 1e3
write_dir_sct = write_dir / "supply_chain_traversal" 
write_dir_sct.mkdir(exist_ok=True, parents=True)
fp_sct = write_dir_sct / f"sct.cutoff_{cutoff:.0e}.maxcalc_{max_calc:.0e}.pickle"
fp_mask_without_noninf = write_dir / f"mask.tech.without_noninf.cutoff_{cutoff:.0e}.maxcalc_{max_calc:.0e}.pickle"
if fp_sct.exists():
    use_indices_tech_without_noninf = read_pickle(fp_sct)
else:    
    use_indices_tech_without_noninf = get_inds_tech_without_noninf(lca, cutoff, max_calc)
    write_pickle(use_indices_tech_without_noninf, fp_sct)

if fp_mask_without_noninf.exists():
    mask_tech_without_noninf = read_pickle(fp_mask_without_noninf)
else:
    mask_tech_without_noninf = get_mask(indices_tech, use_indices_tech_without_noninf)
    write_pickle(mask_tech_without_noninf, fp_mask_without_noninf)

CPU times: user 1.04 ms, sys: 1.73 ms, total: 2.77 ms
Wall time: 3.6 ms


## Step 1.2 Biosphere

In [9]:
def get_inds_bio_without_noninf(lca, cutoff):
    """Find datapackage indices that correspond to B*Ainv*f, where contributions are higher than cutoff"""
    inv = lca.characterized_inventory
    finv = inv.multiply(abs(inv) > abs(lca.score * cutoff))
    # Find row and column in B*Ainv*f
    biosphere_row_col = list(zip(*finv.nonzero()))
    # Translate row and column to datapackage indices
    biosphere_reversed = lca.dicts.biosphere.reversed
    activity_reversed = lca.dicts.activity.reversed
    use_indices = []
    for row,col in biosphere_row_col:
        i, j = biosphere_reversed[row], activity_reversed[col]
        use_indices.append((i,j))
    return use_indices

use_indices_bio_without_noninf = get_inds_bio_without_noninf(lca, cutoff)
fp_mask_bio_without_noninf = write_dir / f"mask.bio.without_noninf.cutoff_{cutoff:.0e}.pickle"
if fp_mask_bio_without_noninf.exists():
    mask_bio_without_noninf = read_pickle(fp_mask_bio_without_noninf)
else:
    mask_bio_without_noninf = get_mask(indices_bio, use_indices_bio_without_noninf)
    write_pickle(mask_bio_without_noninf, fp_mask_bio_without_noninf)

## Step 1.3 Characterization

In [10]:
def get_inds_cf_without_noninf(lca, cutoff):
    """Find datapackage indices that correspond to C*B*Ainv*f, where contributions are higher than cutoff"""
    inv_sum = np.array(np.sum(lca.characterized_inventory, axis=1)).squeeze()
    # print('Characterized inventory:', inv.shape, inv.nnz)
    finv_sum = inv_sum * abs(inv_sum) > abs(lca.score * cutoff)
    characterization_row = list(finv_sum.nonzero()[0])
    # Translate row to datapackage indices
    biosphere_reversed = lca.dicts.biosphere.reversed
    use_indices = [(biosphere_reversed[row], 1) for row in characterization_row]
    return use_indices

use_indices_cf_without_noninf = get_inds_cf_without_noninf(lca, cutoff)
fp_mask_cf_without_noninf = write_dir / f"mask.cf.without_noninf.cutoff_{cutoff:.0e}.pickle"
if fp_mask_cf_without_noninf.exists():
    mask_cf_without_noninf = read_pickle(fp_mask_cf_without_noninf)
else:
    mask_cf_without_noninf = get_mask(indices_cf, use_indices_cf_without_noninf)
    write_pickle(mask_cf_without_noninf, fp_mask_cf_without_noninf)

# Step 2. Remove lowly influential with local SA

In [11]:
class LocalSAInterface:
    def __init__(self, indices, data, distributions, mask, factor=10, cutoff=1e-3):
        self.indices = indices
        self.data = data
        self.distributions = distributions
        self.has_uncertainty = self.get_uncertainty_bool(self.distributions)
        self.lca = lca
        self.factor = factor
        self.cutoff = cutoff
        self.mask = mask  # indices with high enough contributions

        assert self.indices.shape[0] == self.data.shape[0] == self.distributions.shape[0]

        self.masked_indices = self.indices[self.mask]
        self.masked_data = self.data[self.mask]
        self.masked_has_uncertainty = self.has_uncertainty[self.mask]

        self.size = len(self.masked_indices)
        self.index = None  # To indicate we haven't consumed first value yet
        self.mask_where = np.where(self.mask)[0]

    def __next__(self):
        if self.index is None:
            self.index = 0
        else:
            self.index += 1

        if self.index < self.size:
            # 0 and 1 are `no` and `unknown` uncertainty
            while self.masked_has_uncertainty[self.index]:
                self.index += 1
                if self.index >= self.size:
                    raise StopIteration
        else:
            raise StopIteration

        data = self.data.copy()
        data[self.mask_where[self.index]] *= self.factor
        return data

    @staticmethod
    def get_uncertainty_bool(distributions):
        try:
            arr = distributions['uncertainty_type'] >= 2
        except:
            arr = distributions > 0
        return arr

    @property
    def coordinates(self):
        return self.masked_indices[self.index]
    
    
def run_local_sa(
        matrix_type,
        fu_mapped,
        packages,
        indices_array,
        data_array,
        distributions_array,
        mask_without_noninf,
        flip_array=None,  # only needed for technosphere
        const_factor=10,
        cutoff=1e-3,
):

    interface = LocalSAInterface(
        indices_array,
        data_array,
        distributions_array,
        mask_without_noninf,
        const_factor,
        cutoff,
    )

    dp = bwp.create_datapackage()
    dp.add_dynamic_vector(
        matrix = f"{matrix_type}_matrix",
        interface = interface,
        indices_array = indices_array,
        flip_array = flip_array,
    )
    if matrix_type == "characterization":
        [d.update({"global_index": 1}) for d in dp.metadata['resources']]  # TODO Chris, is this correct?

    lca_local_sa = bc.LCA(demand=fu_mapped, data_objs=packages + [dp])
    lca_local_sa.lci()
    lca_local_sa.lcia()

    interface.index = None  # there should be a better way to discount the first __next__
    indices_local_sa_scores = {}

    count = 0
    try:
        while True:
            next(lca_local_sa)
            i, j = interface.coordinates
            count += 1
            indices_local_sa_scores[tuple(interface.coordinates)] = lca_local_sa.score
    except StopIteration:
        pass

    assert count <= sum(interface.mask)

    return indices_local_sa_scores

## 2.1 Technosphere

In [13]:
%%time
indices_tech_local_sa = run_local_sa(
    "technosphere",
    fu_mapped,
    packages,
    indices_tech,
    data_tech,
    has_uncertainty_tech,
    mask_tech_without_noninf,
    flip_tech,
    const_factor,
    cutoff,
)
# mask_tech_without_lowinf = get_mask(indices_tech, use_indices_tech_without_lowinf)
# assert sum(mask_tech_without_lowinf) == len(use_indices_tech_without_lowinf)

CPU times: user 5min 19s, sys: 8.6 s, total: 5min 28s
Wall time: 59.3 s


In [None]:
# fp_mask_tech_without_lowinf = write_dir / f"mask.tech.without_lowinf.cutoff_{cutoff:.0e}.pickle"
# if fp_mask_tech_without_lowinf.exists():
#     mask_tech_without_lowinf = read_pickle(fp_mask_tech_without_lowinf)
# else:
#     mask_tech_without_lowinf = get_mask(indices_tech, use_indices_tech_without_lowinf)
#     write_pickle(mask_tech_without_lowinf, fp_mask_tech_without_lowinf)

## 2.2 Biosphere

In [None]:
%%time
fp_mask_bio_without_lowinf = write_dir / f"mask.bio.without_lowinf.cutoff_{cutoff:.0e}.pickle"
if fp_mask_bio_without_lowinf.exists():
    mask_bio_without_lowinf = read_pickle(fp_mask_bio_without_lowinf)
else:
    indices_bio_local_sa = run_local_sa(
        "biosphere",
        fu_mapped,
        packages,
        indices_bio,
        data_bio,
        distributions_bio,
        mask_bio_without_noninf,
        const_factor,
        cutoff,
    )
#     mask_bio_without_lowinf = get_mask(indices_bio, use_indices_bio_without_lowinf)
#     assert sum(mask_bio_without_lowinf) == len(use_indices_bio_without_lowinf)
#     mask_bio_without_lowinf = get_mask(indices_bio, use_indices_bio_without_lowinf)
#     write_pickle(mask_bio_without_lowinf, fp_mask_bio_without_lowinf)

## 2.3 Characterization

In [None]:
%%time
fp_mask_cf_without_lowinf = write_dir / f"mask.cf.without_lowinf.cutoff_{cutoff:.0e}.pickle"
if fp_mask_cf_without_lowinf.exists():
    mask_cf_without_lowinf = read_pickle(fp_mask_cf_without_lowinf)
else:
    indices_cf_local_sa = run_local_sa(
        "characterization",
        fu_mapped,
        packages,
        indices_cf,
        data_cf,
        distributions_cf,
        mask_cf_without_noninf,
        const_factor,
        cutoff,
    )
#     mask_cf_without_lowinf = get_mask(indices_cf, use_indices_cf_without_lowinf)
#     assert sum(mask_cf_without_lowinf) == len(use_indices_cf_without_lowinf)
#     mask_cf_without_lowinf = get_mask(indices_cf, use_indices_cf_without_lowinf)
#     write_pickle(mask_cf_without_lowinf, fp_mask_cf_without_lowinf)

In [14]:
indices_tech_local_sa

{(4435, 7979): 1156.4119822218981,
 (4563, 5504): 1139.102293365673,
 (4935, 5504): 1473.9926943431763,
 (5504, 15411): 1560.145824653224,
 (5824, 20020): 1136.8258072088622,
 (5887, 7979): 1145.6603527178163,
 (6224, 4891): 1107.99334498506,
 (7051, 5504): 1107.7057857235382,
 (7302, 17206): 1479.244696309855,
 (7979, 7134): 1435.5526111428494,
 (8107, 20020): 1108.309360923258,
 (8307, 5504): 1123.1885961378125,
 (8595, 20020): 1108.1098261064194,
 (8613, 13455): 1443.0750448316296,
 (8635, 12329): 1167.470606120462,
 (8974, 20020): 1108.505535725945,
 (9164, 13648): 1106.8076721220839,
 (9649, 20020): 1106.438611957718,
 (9884, 6128): 1284.5869723835167,
 (10820, 20020): 1136.517438346823,
 (10934, 20020): 1121.7755961996038,
 (11461, 7979): 1200.3633125362712,
 (11516, 15411): 1119.0455276793457,
 (11932, 5504): 1141.0869159178342,
 (11962, 16176): 1139.2121715173084,
 (12231, 4891): 1218.4323701499593,
 (12245, 7979): 1109.221940416118,
 (12253, 4935): 1502.8791077289322,
 (12624,

In [None]:
indices_tech_local_sa

In [15]:
row,col = lca.dicts.activity[24105], lca.dicts.activity[24201]

In [16]:
from copy import deepcopy

A = deepcopy(lca.technosphere_matrix)
A[row,col] *= 10
a = lca.characterization_matrix*lca.biosphere_matrix*spsolve(A, lca.demand_array)

In [17]:
a.sum()

3867.973841453901

In [None]:
row,col