In [1]:
import bw2data as bd
import bw2calc as bc
import bw_processing as bwp
from pypardiso import spsolve
from pathlib import Path
from gsa_framework.utils import read_pickle, write_pickle

project = 'GSA for archetypes'
bd.projects.set_current(project)
const_factor = 10
cutoff = 1e-3

bd.databases

Databases dictionary with 3 object(s):
	biosphere3
	ecoinvent 3.8 cutoff
	swiss consumption 1.0

In [2]:
co = bd.Database('swiss consumption 1.0')
fu = [act for act in co if "average consumption" in act['name']][0]

write_dir = Path("write_files") / project.lower().replace(" ", "_") / fu['name'].lower().replace(" ", "_")

demand = {fu: 1}
method = ("IPCC 2013", "climate change", "GWP 100a", "uncertain")
fu_mapped, packages, _ = bd.prepare_lca_inputs(demand=demand, method=method, remapping=False)  # Could also add LCIA method
lca = bc.LCA(demand=fu_mapped, data_objs=packages)
lca.lci()
lca.lcia()
lca.score

1094.9290193609536

# Utils

In [4]:
def get_mask(all_indices, use_indices):
    """Creates a `mask` such that `all_indices[mask]=use_indices`."""
    use_indices = np.array(use_indices, dtype=[('row', '<i4'), ('col', '<i4')])
    mask = np.zeros(len(all_indices), dtype=bool)
    for inds in use_indices:
        mask_current = all_indices==inds
        mask = mask | mask_current
    assert sum(mask)==len(use_indices)
    return mask

In [5]:
ei = bd.Database('ecoinvent 3.8 cutoff').datapackage()
ei_tech = ei.filter_by_attribute('matrix', 'technosphere_matrix')
ei_bio = ei.filter_by_attribute('matrix', 'biosphere_matrix')
co = bd.Database('swiss consumption 1.0').datapackage()
co_tech = co.filter_by_attribute('matrix', 'technosphere_matrix')
co_bio = co.filter_by_attribute('matrix', 'biosphere_matrix')
cf = bd.Method(method).datapackage()

# Step 1. Remove non influential with contribution analysis

## Step 1.1 Technosphere & Supply chain traversal

In [6]:
def get_inds_tech_without_noninf(lca, cutoff, max_calc=1e4):
    # Supply chain traversal
    res = bc.GraphTraversal().calculate(
        lca, cutoff=cutoff, max_calc=max_calc
    )
    edges = res['edges']
    use_indices = []
    use_indices_dict = {}
    for edge in res['edges']:
        if edge['to'] !=- 1:
            if abs(edge['impact']) > abs(lca.score * cutoff):
                row,col = edge['from'], edge['to']
                i,j = lca.dicts.activity.reversed[row], lca.dicts.activity.reversed[col]
                use_indices.append((i,j))
                use_indices_dict[(i,j)] = edge['impact']
    return use_indices

In [8]:
%%time
all_indices_tech = np.hstack(
    [
        ei_tech.get_resource('ecoinvent_3.8_cutoff_technosphere_matrix.indices')[0], 
        co_tech.get_resource('swiss_consumption_1.0_technosphere_matrix.indices')[0]
    ]
)

max_calc = 1e3
write_dir_sct = write_dir / "supply_chain_traversal" 
write_dir_sct.mkdir(exist_ok=True, parents=True)
fp_sct = write_dir_sct / f"sct.cutoff_{cutoff:.0e}.maxcalc_{max_calc:.0e}.pickle"
fp_mask_without_noninf = write_dir / f"mask.tech.without_noninf.cutoff_{cutoff:.0e}.maxcalc_{max_calc:.0e}.pickle"
if fp_sct.exists():
    use_indices_tech_without_noninf = read_pickle(fp_sct)
else:    
    use_indices_tech_without_noninf = get_inds_tech_without_noninf(lca, cutoff, max_calc)
    write_pickle(use_indices_tech_without_noninf, fp_sct)

if fp_mask_without_noninf.exists():
    mask_tech_without_noninf = read_pickle(fp_mask_without_noninf)
else:
    mask_tech_without_noninf = get_mask(all_indices_tech, use_indices_tech_without_noninf)
    write_pickle(mask_tech_without_noninf, fp_mask_without_noninf)

CPU times: user 538 ms, sys: 4.6 ms, total: 543 ms
Wall time: 544 ms


## Step 1.2 Biosphere

In [9]:
def get_inds_bio_without_noninf(lca, cutoff):
    """Find datapackage indices that correspond to B*Ainv*f, where contributions are higher than cutoff"""
    inv = lca.characterized_inventory
    finv = inv.multiply(abs(inv) > abs(lca.score * cutoff))
    # Find row and column in B*Ainv*f
    biosphere_row_col = list(zip(*finv.nonzero()))
    # Translate row and column to datapackage indices
    biosphere_reversed = lca.dicts.biosphere.reversed
    activity_reversed = lca.dicts.activity.reversed
    use_indices = []
    for row,col in biosphere_row_col:
        i, j = biosphere_reversed[row], activity_reversed[col]
        use_indices.append((i,j))
    return use_indices

all_indices_bio = np.hstack(
    [
        ei_bio.get_resource('ecoinvent_3.8_cutoff_biosphere_matrix.indices')[0],
        co_bio.get_resource('swiss_consumption_1.0_biosphere_matrix.indices')[0]
    ]
)
use_indices_bio_without_noninf = get_inds_bio_without_noninf(lca, cutoff)

fp_mask_bio_without_noninf = write_dir / f"mask.bio.without_noninf.cutoff_{cutoff:.0e}.pickle"
if fp_mask_bio_without_noninf.exists():
    mask_bio_without_noninf = read_pickle(fp_mask_bio_without_noninf)
else:
    mask_bio_without_noninf = get_mask(all_indices_bio, use_indices_bio_without_noninf)
    write_pickle(mask_bio_without_noninf, fp_mask_bio_without_noninf)

## Step 1.3 Characterization

In [10]:
def get_inds_cf_without_noninf(lca, cutoff):
    """Find datapackage indices that correspond to C*B*Ainv*f, where contributions are higher than cutoff"""
    inv_sum = np.array(np.sum(lca.characterized_inventory, axis=1)).squeeze()
    # print('Characterized inventory:', inv.shape, inv.nnz)
    finv_sum = inv_sum * abs(inv_sum) > abs(lca.score * cutoff)
    characterization_row = list(finv_sum.nonzero()[0])
    # Translate row to datapackage indices
    biosphere_reversed = lca.dicts.biosphere.reversed
    use_indices = [(biosphere_reversed[row], 1) for row in characterization_row]
    return use_indices

In [11]:
all_indices_cf = cf.get_resource('IPCC_2013_climate_change_GWP_100a_uncertain_matrix_data.indices')[0]
use_indices_cf_without_noninf = get_inds_cf_without_noninf(lca, cutoff)

fp_mask_cf_without_noninf = write_dir / f"mask.cf.without_noninf.cutoff_{cutoff:.0e}.pickle"
if fp_mask_cf_without_noninf.exists():
    mask_cf_without_noninf = read_pickle(fp_mask_cf_without_noninf)
else:
    mask_cf_without_noninf = get_mask(all_indices_cf, use_indices_cf_without_noninf)
    write_pickle(mask_cf_without_noninf, fp_mask_cf_without_noninf)

# Step 2. Remove lowly influential with local SA

In [None]:
class LocalSAInterface:
    def __init__(self, indices, data, distributions, mask, factor=10, cutoff=1e-3):
        self.indices = indices
        self.data = data
        self.distributions = distributions
        self.lca = lca
        self.factor = factor
        self.cutoff = cutoff
        self.mask = mask  # indices with high enough contributions

        assert self.indices.shape[0] == self.data.shape[0] == self.distributions.shape[0]
        
        self.masked_indices = self.indices[self.mask]
        self.masked_data = self.data[self.mask]
        self.masked_distributions = self.distributions[self.mask]
        
        self.size = len(self.masked_indices)
        self.index = None  # To indicate we haven't consumed first value yet
        self.mask_where = np.where(self.mask)[0]
        
    def __next__(self):
        if self.index is None:
            self.index = 0
        else:
            self.index += 1
            
#         while (self.masked_distributions[self.index]['uncertainty_type'] < 2): # 0 and 1 are no and unknown uncertainty                             
#             self.index += 1
#             if self.index >= self.size:
#                 raise StopIteration
        if self.index < self.size:
            while self.masked_distributions[self.index]['uncertainty_type'] < 2: # 0 and 1 are no and unknown uncertainty                             
                self.index += 1
                if self.index >= self.size:
                    raise StopIteration
        else:
            raise StopIteration
                
        data = self.data.copy()
        data[self.mask_where[self.index]] *= self.factor
        return data
    
    @property
    def coordinates(self):
        return self.masked_indices[self.index]
    

## 2.1 Technosphere

In [None]:
# Add swiss consumption to biosphere connection below

In [12]:
ei_bio.get_resource('ecoinvent_3.8_cutoff_biosphere_matrix.data')[0]

array([5.0965748e-10, 8.4702134e-11, 4.9607101e-09, ..., 4.0835706e-14,
       1.2208397e-10, 4.1007302e-13], dtype=float32)

In [None]:
%%time

matrix_type = "biosphere"

interface = LocalSAInterface(
    all_indices,
    all_data,
    all_distributions,
    mask_without_noninf,
    const_factor,
    cutoff,
)

dp = bwp.create_datapackage()
dp.add_dynamic_vector(
    matrix = f"{matrix_type}_matrix",
    interface = interface,
    indices_array = all_indices,
)

lca_local_sa = bc.LCA(demand=fu_mapped, data_objs=packages + [dp])
lca_local_sa.lci()
lca_local_sa.lcia()

interface.index = None  # there should be a better way to discount the first __next__ 
use_indices_without_lowinf = []

count = 0
try:
    while True:
        next(lca_local_sa)
        i, j = interface.coordinates
#         print(
#             count, 
#             bio_interface.index, 
#             lca_local_sa.score,
#             bio_interface.coordinates, (row, col),
#     #         lca_local_sa.dicts.biosphere[row, col]
#         )
        count += 1
        use_indices_bio_without_lowinf.append(bio_interface.coordinates)
except StopIteration:
    pass

assert count <= sum(bio_interface.mask)

mask_bio_without_lowinf = get_mask(all_indices_bio, use_indices_bio_without_lowinf)

assert sum(mask_bio_without_lowinf) == count

## 2.2 Biosphere

In [None]:
%%time

bio_interface = LocalSAInterface(
    all_indices_bio,
    ei_bio.get_resource('ecoinvent_3.8_cutoff_biosphere_matrix.data')[0],
    ei_bio.get_resource('ecoinvent_3.8_cutoff_biosphere_matrix.distributions')[0],
    
    ,
    const_factor,
)

dp = bwp.create_datapackage()
dp.add_dynamic_vector(
    matrix = 'biosphere_matrix',
    interface = bio_interface,
    indices_array = all_indices_bio,
)

lca_local_sa = bc.LCA(demand=fu_mapped, data_objs=packages + [dp])
lca_local_sa.lci()
lca_local_sa.lcia()

bio_interface.index = None  # there should be a better way to discount the first __next__ 
use_indices_bio_without_lowinf = []

count = 0
try:
    while True:
        next(lca_local_sa)
        i, j = bio_interface.coordinates
        row, col = lca_local_sa.dicts.biosphere[i], lca_local_sa.dicts.activity[j]
        print(
            count, 
            bio_interface.index, 
            lca_local_sa.score,
            bio_interface.coordinates, (row, col),
    #         lca_local_sa.dicts.biosphere[row, col]
        )
        count += 1
        use_indices_bio_without_lowinf.append(bio_interface.coordinates)
except StopIteration:
    pass

assert count <= sum(bio_interface.mask)

mask_bio_without_lowinf = get_mask(all_indices_bio, use_indices_bio_without_lowinf)

assert sum(mask_bio_without_lowinf) == count

## 2.3 Characterization

In [None]:
%%time

cf_interface = LocalSAInterface(
    all_indices_cf,
    cf.get_resource('IPCC_2013_climate_change_GWP_100a_uncertain_matrix_data.data')[0],
    cf.get_resource('IPCC_2013_climate_change_GWP_100a_uncertain_matrix_data.distributions')[0],
    mask_cf_without_noninf,
    const_factor,
)

dp = bwp.create_datapackage()
dp.add_dynamic_vector(
    matrix = 'characterization_matrix',
    interface = cf_interface,
    indices_array = all_indices_cf,
)
[d.update({"global_index": 1}) for d in dp.metadata['resources']]  # TODO Chris, is this correct?

lca_local_sa = bc.LCA(demand=fu_mapped, data_objs=packages + [dp])
lca_local_sa.lci()
lca_local_sa.lcia()

cf_interface.index = None  # there should be a better way to discount the first __next__ 
use_indices_cf_without_lowinf=[]

count = 0
try:
    while True:
        next(lca_local_sa)
        i, j = cf_interface.coordinates
        row = lca_local_sa.dicts.biosphere[i]
        print(
            count, 
            cf_interface.index, 
            lca_local_sa.score,
            cf_interface.coordinates, (row, 1),
        )
        count += 1
        use_indices_cf_without_lowinf.append(cf_interface.coordinates)
except StopIteration:
    pass

assert count <= sum(cf_interface.mask)

mask_cf_without_lowinf = get_mask(all_indices_cf, use_indices_cf_without_lowinf)

assert sum(mask_cf_without_lowinf) == count

In [None]:
[bd.get_activity(a) for a,b in use_indices_cf_without_noninf]