# Contribution analysis and comparison

In [1]:
import bw2data as bd
import bw2calc as bc
import numpy as np
import pandas as pd
from os.path import commonprefix
import tabulate

In [2]:
bd.projects.set_current("ecoinvent 3.7.1 bw2")

## Shallow supply chain graph traversal

This function just prints the information on a shallow supply chain graph traversal; you can use the [graph traversal class](https://2.docs.brightway.dev/lca.html#graph-traversal) for a more powerful, though complicated, algorithm.

In [3]:
def print_recursive_calculation(activity, lcia_method, lca_obj=None, total_score=None, amount=1, level=0, max_level=3, cutoff=1e-2):
    if lca_obj is None:
        lca_obj = bc.LCA({activity: amount}, lcia_method)
        lca_obj.lci()
        lca_obj.lcia()
        total_score = lca_obj.score
    elif total_score is None:
        raise ValueError
    else:
        lca_obj.redo_lcia({activity: amount})
        if abs(lca_obj.score) <= abs(total_score * cutoff):
            return
    print("{}{:4.3f} ({:06.4f}): {:.70}".format("  " * level, lca_obj.score / total_score, lca_obj.score, str(activity)))
    if level < max_level:
        for exc in activity.technosphere():
            print_recursive_calculation(
                activity=exc.input, 
                lcia_method=lcia_method, 
                lca_obj=lca_obj, 
                total_score=total_score, 
                amount=amount * exc['amount'], 
                level=level + 1, 
                max_level=max_level, 
                cutoff=cutoff
            )        

Example application

In [4]:
act = next(obj for obj in bd.Database("ecoinvent 3.7.1") if obj['name'] == 'polyethylene production, low density, granulate')
act

'polyethylene production, low density, granulate' (kilogram, RER, None)

In [5]:
ipcc = ('IPCC 2013', 'climate change', 'GWP 100a')

In [6]:
print_recursive_calculation(act, ipcc)

1.000 (0.0230): 'polyethylene production, low density, granulate' (kilogram, RER, None
  0.196 (0.0045): 'market group for electricity, medium voltage' (kilowatt hour, RER, No
    0.195 (0.0045): 'market group for electricity, medium voltage' (kilowatt hour, Europe 
      0.041 (0.0009): 'market for electricity, medium voltage' (kilowatt hour, DE, None)
      0.013 (0.0003): 'market for electricity, medium voltage' (kilowatt hour, ES, None)
      0.014 (0.0003): 'market for electricity, medium voltage' (kilowatt hour, GB, None)
      0.018 (0.0004): 'market for electricity, medium voltage' (kilowatt hour, IT, None)
      0.022 (0.0005): 'market for electricity, medium voltage' (kilowatt hour, PL, None)
      0.011 (0.0002): 'market for electricity, medium voltage' (kilowatt hour, UA, None)
  0.018 (0.0004): 'market for chemical, organic' (kilogram, GLO, None)
    0.018 (0.0004): 'chemical production, organic' (kilogram, GLO, None)
  0.731 (0.0168): 'market for ethylene, average' (kilog

In [7]:
print_recursive_calculation(act, ipcc, max_level=7, cutoff=0.025)

1.000 (0.0230): 'polyethylene production, low density, granulate' (kilogram, RER, None
  0.196 (0.0045): 'market group for electricity, medium voltage' (kilowatt hour, RER, No
    0.195 (0.0045): 'market group for electricity, medium voltage' (kilowatt hour, Europe 
      0.041 (0.0009): 'market for electricity, medium voltage' (kilowatt hour, DE, None)
        0.041 (0.0009): 'electricity voltage transformation from high to medium voltage' (kilo
          0.041 (0.0009): 'market for electricity, high voltage' (kilowatt hour, DE, None)
  0.731 (0.0168): 'market for ethylene, average' (kilogram, RER, None)
    0.731 (0.0168): 'ethylene production, average' (kilogram, RER, None)
  0.027 (0.0006): 'chemical factory construction, organics' (unit, RER, None)


Set the cutoff to zero if you want all inputs. But lower the `max_level`, otherwise, there will be a lot of printing!

In [8]:
print_recursive_calculation(act, ipcc, max_level=1, cutoff=0)

1.000 (0.0230): 'polyethylene production, low density, granulate' (kilogram, RER, None
  0.000 (0.0000): 'market for chemicals, inorganic' (kilogram, GLO, None)
  0.196 (0.0045): 'market group for electricity, medium voltage' (kilowatt hour, RER, No
  0.002 (0.0000): 'market group for waste plastic, mixture' (kilogram, RER, None)
  0.018 (0.0004): 'market for chemical, organic' (kilogram, GLO, None)
  0.731 (0.0168): 'market for ethylene, average' (kilogram, RER, None)
  0.001 (0.0000): 'market for compressed air, 600 kPa gauge' (cubic meter, RER, None)
  0.027 (0.0006): 'chemical factory construction, organics' (unit, RER, None)
  0.002 (0.0000): 'market for nitrogen, liquid' (kilogram, RER, None)
  0.002 (0.0001): 'market for propylene' (kilogram, RER, None)
  0.000 (0.0000): 'market for hydrogen, liquid' (kilogram, RER, None)
  0.004 (0.0001): 'market for wastewater, unpolluted' (cubic meter, CH, None)
  0.001 (0.0000): 'market for solvent, organic' (kilogram, GLO, None)
  0.000 (0.

## Compare activities

Compare the LCA scores of a list of activities, either with the same name, or with similar attributes.

In [9]:
def compare_activities(activities, lcia_method):
    """Compare selected activities to see if they are substantially different.
    
    Inputs:
    
        ``activities``: List of ``Activity`` objects.
        ``lcia_method``: Tuple identifying a ``Method``
        
    Returns:
    
        Nothing, but prints to stdout.
    
    """
    import bw2calc as bc
    
    lca = bc.LCA({a: 1 for a in activities}, lcia_method)
    lca.lci()
    lca.lcia()
    
    # First pass: Are all scores close?
    scores = []
    
    for a in activities:
        lca.redo_lcia({a: 1})
        scores.append(lca.score)
        
    if abs(max(scores) - min(scores)) < 0.1 * abs(max(scores)):
        print("All activities similar")
        return
    else:
        print("Differences observed. LCA scores:")
        for x, y in zip(scores, activities):
            print("\t{:5.3f}{}".format(x, y))

In [10]:
compare_activities([x for x in bd.Database("ecoinvent 3.7.1") 
                    if 'polyethylene terephthalate' in x['name'].lower()
                    and not x['name'].startswith("market")
                    and not 'waste' in x['name'].lower()
                    and not 'to generic market' in x['name'].lower()
                    and x['location'] == 'CH'], 
                   ipcc)

All activities similar


In [11]:
pet_production = [x for x in bd.Database("ecoinvent 3.7.1") 
                    if 'polyethylene terephthalate' in x['name'].lower()
                    and not x['name'].startswith("market")
                    and not 'waste' in x['name'].lower()
                    and not 'to generic market' in x['name'].lower()]

In [12]:
compare_activities(pet_production, ipcc)

Differences observed. LCA scores:
	1.013'polyethylene terephthalate production, granulate, bottle grade, recycled' (kilogram, RoW, None)
	3.164'polyethylene terephthalate production, granulate, bottle grade' (kilogram, CA-QC, None)
	2.838'polyethylene terephthalate production, granulate, bottle grade' (kilogram, RoW, None)
	2.944'polyethylene terephthalate production, granulate, amorphous' (kilogram, RER, None)
	0.438'polyethylene terephthalate production, granulate, bottle grade, recycled' (kilogram, CH, None)
	0.397'polyethylene terephthalate production, granulate, amorphous, recycled' (kilogram, CH, None)
	2.748'polyethylene terephthalate production, granulate, bottle grade' (kilogram, RER, None)
	1.093'polyethylene terephthalate production, granulate, amorphous, recycled' (kilogram, Europe without Switzerland, None)
	1.588'polyethylene terephthalate production, granulate, amorphous, recycled' (kilogram, RoW, None)
	2.943'polyethylene terephthalate production, granulate, amorphous' 

## Construct aggregated arborescence

Group by CPC codes

In [17]:
def find_leaves(activity, lcia_method, results=None, lca_obj=None, amount=1, total_score=None, level=0, max_level=3, cutoff=2.5e-2):
    """Traverse the supply chain of an activity to find leaves - places where the impact of that 
    component falls below a threshold value.
    
    Returns a list of ``(fraction of total impact, specific impact, amount, Activity instance)`` tuples."""
    if results is None:
        results = []

    if lca_obj is None:
        lca_obj = bc.LCA({activity: amount}, lcia_method)
        lca_obj.lci()
        lca_obj.lcia()
        total_score = lca_obj.score
    elif total_score is None:
        raise ValueError
    else:
        lca_obj.redo_lcia({activity: amount})
        if abs(lca_obj.score) <= abs(total_score * cutoff) or level >= max_level:
            if abs(lca_obj.score) > abs(total_score * 1e-6):
                results.append((lca_obj.score / total_score, lca_obj.score, amount, activity))
            return results

    # Add direct impacts from this activity, if relevant
    da = np.zeros_like(lca_obj.demand_array)
    da[lca_obj.product_dict[activity]] = amount
    direct = (lca_obj.characterization_matrix * lca_obj.biosphere_matrix * da).sum()
    if abs(direct) >= abs(total_score * cutoff):
        results.append((direct / total_score, direct, amount, activity))
        
        
    for exc in activity.technosphere():
        find_leaves(
            activity=exc.input, 
            lcia_method=lcia_method, 
            results=results,
            lca_obj=lca_obj, 
            amount=amount * exc['amount'],
            total_score=total_score,
            level=level + 1, 
            max_level=max_level, 
            cutoff=cutoff
        )
    
    return sorted(results, reverse=True)

In [18]:
def get_cpc(activity):
        try:
            return next(cl[1] for cl in activity.get('classifications', []) if cl[0] == 'CPC')
        except StopIteration:
            return

In [19]:
def group_leaves(leaves):
    """Group elements in ``leaves`` by their `CPC (Central Product Classification) <https://unstats.un.org/unsd/classifications/Econ/cpc>`__ code.
    
    Returns a list of ``(fraction of total impact, specific impact, amount, Activity instance)`` tuples."""
    results = {}
        
    for leaf in leaves:
        cpc = get_cpc(leaf[3])
        if cpc not in results:
            results[cpc] = np.zeros((3,))
        results[cpc] += np.array(leaf[:3])
    
    _ = lambda x: float(x)
    
    return sorted([(_(a[0]), _(a[1]), _(a[2]), k) for k, a in results.items()], reverse=True)

In [20]:
find_leaves(act, ipcc)[:20]

[(0.7274518715189927,
  0.016756729814329387,
  0.011586675833,
  'ethylene production, average' (kilogram, RER, None)),
 (0.04118571372722042,
  0.0009487058926610116,
  0.0016417552467926355,
  'market for electricity, medium voltage' (kilowatt hour, DE, None)),
 (0.02166420948066825,
  0.000499031371175891,
  0.0005067167625697237,
  'market for electricity, medium voltage' (kilowatt hour, PL, None)),
 (0.018477977506640535,
  0.0004256370609748953,
  0.0009616517455802065,
  'market for electricity, medium voltage' (kilowatt hour, IT, None)),
 (0.01841228848656637,
  0.0004241239255988705,
  0.000211930908987205,
  'market for chemical, organic' (kilogram, GLO, None)),
 (0.016306900720072584,
  0.0003756266774113529,
  5.8495193426313e-05,
  'chemical factory construction' (kilogram, RER, None)),
 (0.014158557355125704,
  0.00032613995433831695,
  0.0009730749074477607,
  'market for electricity, medium voltage' (kilowatt hour, GB, None)),
 (0.012682968774208953,
  0.00029215002299

In [21]:
group_leaves(find_leaves(act, ipcc))[:5]

[(0.7298572894865746,
  0.0168121382070374,
  0.011623711179268872,
  '33421: Ethylene, propylene, butylene, butadiene'),
 (0.19597103085875947,
  0.00451415927857724,
  0.01106526337462979,
  '17100: Electrical energy'),
 (0.019533883648797632,
  0.0004499596789048918,
  0.0002450884690768921,
  '341: Basic organic chemicals'),
 (0.016306900720072584,
  0.0003756266774113529,
  5.8495193426313e-05,
  '53269: Other constructions for manufacturing'),
 (0.010311476068147734,
  0.00023752309290246137,
  7.822571501852176e-07,
  '531: Buildings')]

## Comparing activities by arborescences

In [22]:
def get_value_for_cpc(lst, label, index):
    for elem in lst:
        if elem[3] == label:
            return elem[index]
    return 0

In [23]:
def compare_activities_by_grouped_leaves(activities, lcia_method, mode='relative'):
    index = 0 if mode == 'relative' else 1
    
    objs = [group_leaves(find_leaves(act, lcia_method)) for act in activities]
    sorted_keys = sorted([
                       (max([el[index] 
                             for obj in objs 
                             for el in obj 
                             if el[3] == key]), 
                        key) 
                       for key in {el[3] 
                                   for obj in objs 
                                   for el in obj
                                  }
                       ]
                   , reverse=True)
    name_common = commonprefix([act['name'] for act in activities])
    product_common = commonprefix([act['reference product'] for act in activities])
    
    lca = bc.LCA({act: 1 for act in activities}, lcia_method)
    lca.lci()
    lca.lcia()
    
    labels = ['activity', 'product', 'location', 'unit', 'total'] + [key for _, key in sorted_keys]
    data = []
    for act, lst in zip(activities, objs):
        lca.redo_lcia({act: 1})
        data.append([
            act['name'].replace(name_common, ''),
            act['reference product'].replace(product_common, ''),
            act['location'][:25],
            act['unit'],
            lca.score,
        ] + [get_value_for_cpc(lst, key, index) for _, key in sorted_keys])

    return labels, data

In [24]:
def table_for_compared_activities(labels, data, str_cutoff=50):
    return tabulate.tabulate(sorted(data, key=lambda x: x[4]), [x[:str_cutoff] for x in labels], tablefmt="html", floatfmt=".3f")

In [25]:
table_for_compared_activities(*compare_activities_by_grouped_leaves(pet_production, ipcc))

activity,product,location,unit,total,17100: Electrical energy,"39270: Waste, parings and scrap of plastics",347: Plastics in primary forms,"34110: Hydrocarbons and their halogenated, sulphon",39910: Municipal waste,17300: Steam and hot water,"34170: Ethers, alcohol peroxides, ether peroxides,",53269: Other constructions for manufacturing,"34710: Polymers of ethylene, in primary forms","12020: Natural gas, liquefied or in the gaseous st",34231: Chemical elements n.e.c.; inorganic acids e,543: Site preparation services,34120: Industrial monocarboxylic fatty acids; acid,11010: Hard coal,41122: Alloy steel in ingots or other primary form,53290: Other civil engineering works,6511: Road transport services of freight,34250: Salts of oxometallic or peroxometallic acid,341: Basic organic chemicals,16200: Salt and pure sodium chloride; sea water,"34210: Hydrogen, nitrogen, oxygen, carbon dioxide","89330: Metal forging, pressing, stamping, roll for",6512: Railway transport services of freight,39920: Sewage sludge,33370: Fuel oils n.e.c.,35321: Soap; organic surface-active products and p,"41601: Tungsten, molybdenum, tantalum, magnesium,",65213: Coastal and transoceanic water transport se,532: Civil engineering works,"41603: Bismuth, antimony, manganese, chromium and","374: Plaster, lime and cement","37420: Quicklime, slaked lime and hydraulic lime",39950: Wastes from chemical or allied industries,34: Basic chemicals,39: Wastes or scraps,"34651: Ammonia, anhydrous",34240: Phosphates of triammonium; salts and peroxy,342: Basic inorganic chemicals n.e.c.,43420: Industrial or laboratory furnaces and ovens,39990: Other wastes n.e.c.,34280: Hydrogen peroxide; phosphides; carbides; hy,53262: Power plants,65229: Other inland water transport services of fr,"89200: Moulding, pressing, stamping, extruding and",14290: Other non-ferrous metal ores and concentrat,53253: Sewage and water treatment plants,"42210: Reservoirs, tanks, vats and similar contain",34232: Phosphoric acid,18000: Natural water,4160: Other non-ferrous metals and articles thereo,54330: Excavating and earthmoving services,34220: Zinc oxide; zinc peroxide; chromium oxides,34790: Other plastics in primary forms; ion exchan,"94231: General waste collection services, resident"
"amorphous, recycled","amorphous, recycled",CH,kilogram,0.397,0.123,0.465,0.0,0.0,0.221,0.068,0.0,0.0,0.0,0.0,0.009,0.033,0.0,0.0,0.025,0.023,-0.0,0.0,0.006,0.0,0.0,0.006,0.0,0.004,0.005,0.004,0.0,0.0,0.002,0.0,0.001,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.001
"bottle grade, recycled","bottle grade, recycled",CH,kilogram,0.438,0.141,0.421,0.0,0.0,0.242,0.05,0.0,0.001,0.0,0.0,0.033,0.03,0.0,0.0,0.023,0.021,0.002,0.0,0.002,0.01,0.0,0.006,0.001,0.005,0.0,0.001,0.0,0.0,0.002,0.0,0.001,0.001,0.0,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.001
"bottle grade, recycled","bottle grade, recycled",RoW,kilogram,1.013,0.284,0.24,0.0,0.0,0.047,0.218,0.0,0.001,0.0,0.0,0.015,0.007,0.0,0.027,0.013,0.009,0.001,0.0,0.001,0.004,0.0,0.003,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"amorphous, recycled","amorphous, recycled",Europe without Switzerlan,kilogram,1.093,0.368,0.225,0.0,0.0,-0.062,0.112,0.0,0.0,0.0,0.013,0.027,0.008,0.0,0.0,0.013,0.008,0.001,0.0,0.012,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"amorphous, recycled","amorphous, recycled",US,kilogram,1.271,0.417,0.182,0.0,0.0,0.08,0.073,0.0,0.0,0.0,0.007,0.024,0.007,0.0,0.0,0.011,0.007,-0.0,0.0,0.011,0.0,0.0,0.003,-0.0,0.0,0.0,0.0,0.0,0.0,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"amorphous, recycled","amorphous, recycled",RoW,kilogram,1.588,0.541,0.141,0.0,0.0,0.073,0.079,0.0,0.0,0.0,0.037,0.019,0.005,0.0,0.0,0.009,0.006,-0.001,0.0,0.008,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.002
bottle grade,bottle grade,RER,kilogram,2.748,0.119,0.0,0.03,0.362,0.0,0.083,0.207,0.044,0.041,0.011,0.006,0.0,0.028,0.0,0.0,0.0,0.022,0.013,0.005,0.0,0.006,0.0,0.006,0.001,0.0,0.0,0.003,0.002,0.0,0.001,0.0,0.0,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
bottle grade,bottle grade,RoW,kilogram,2.838,0.141,0.0,0.029,0.35,0.0,0.083,0.201,0.045,0.04,0.01,0.006,0.0,0.027,0.0,0.0,0.0,0.022,0.014,0.005,0.0,0.008,0.0,0.006,0.001,0.0,0.0,0.003,0.002,0.0,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
amorphous,amorphous,CA-QC,kilogram,2.943,0.086,0.0,0.028,0.343,0.0,0.211,0.19,0.043,0.0,0.002,0.005,0.0,0.027,0.0,0.0,0.0,0.021,0.011,0.004,0.0,0.008,0.0,0.006,0.001,0.0,0.0,0.003,0.002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
amorphous,amorphous,RER,kilogram,2.944,0.111,0.001,0.028,0.342,0.0,0.189,0.19,0.041,0.0,0.002,0.005,0.0,0.027,0.0,0.0,0.0,0.021,0.011,0.004,0.0,0.006,0.0,0.006,0.001,0.0,0.0,0.003,0.002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
