In [109]:
from bw2io.extractors import ExcelExtractor
import bw2data as bd
import re

In [110]:
assert "biosphere3" in bd.databases, "Must install base ecoinvent data"
bio = list(bd.Database("biosphere3"))
bio_names = {o['name'].lower() for o in bio}
len(bio), len(bio_names)

(4321, 1415)

In [111]:
bio[0]._data

{'categories': ('soil', 'forestry'),
 'code': '48dbf4b1-0b2a-4bf5-a190-2d7fc465bda4',
 'CAS number': '007440-22-4',
 'name': 'Silver',
 'database': 'biosphere3',
 'unit': 'kilogram',
 'type': 'emission'}

In [3]:
[x for x in bio_names if "fossil" in x]

['methane, fossil',
 'carbon monoxide, non-fossil',
 'oils, non-fossil',
 'carbon monoxide, fossil',
 'carbon dioxide, fossil',
 'carbon dioxide, non-fossil',
 'carbon dioxide, non-fossil, from calcination',
 'methane, non-fossil']

In [4]:
data = ExcelExtractor.extract("ReCiPe2016_CFs_v1.1_20180117.xlsx")

In [5]:
multiple = re.compile("^(.*)\((.*)\)$")

def get_names(dct):
    """Get all possible names for this elementary flow"""
    def _(s):
        return s.strip().lower()
    
    if 'name' in dct:
        yield _(dct['name'])
    if 'Name' in dct:
        yield _(dct['Name'])
    if 'Alternative name (=name in emission database if different)' in dct:
        match = multiple.match(dct['Alternative name (=name in emission database if different)'])
        if match:
            for name in match.groups():
                yield _(name)
                yield _(name).replace(" ", "-")
        else:
            yield _(dct['Alternative name (=name in emission database if different)'])
            yield _(dct['Alternative name (=name in emission database if different)']).replace(" ", "-")
    if 'Alternative name' in dct:
        match = multiple.match(dct['Alternative name'])
        if match:
            for name in match.groups():
                yield _(name)
                yield _(name).replace(" ", "-")
        else:
            yield _(dct['Alternative name'])
            yield _(dct['Alternative name']).replace(" ", "-")
            
get_names_test = {
    'Name': 'Nitrous oxide',
   'Alternative name (=name in emission database if different)': 'dinitrogen oxide (nitrous oxide)',
}

assert list(get_names(get_names_test)) == ['nitrous oxide', 'dinitrogen oxide', 'dinitrogen-oxide', 'nitrous oxide', 'nitrous-oxide']

In [101]:
def three_row_header(data):
    def _(s):
        """Clean up some minor typos.
        
        Turns `(kg CO2eq/ kg GHG)` into `kg CO2eq/kg GHG`."""
        if s.startswith("("):
            s = s[1:]
        if s.endswith(")"):
            s = s[:-1]
        return s.replace("/ ", "/")
    
    try:
        index_of_starting_cfs = data[2].index("Individualist")
    except ValueError:
        index_of_starting_cfs = data[2].index("I")
    labels = data[2][:index_of_starting_cfs]
    
    results = []
    for column_index in range(index_of_starting_cfs, len(data[0])):
        method = {
            'name': data[0][column_index],
            'unit': _(data[1][column_index]),
            'perspective': data[2][column_index],
            'cfs': []
        }
        for row in data[3:]:
            if row[column_index] == '':
                continue
            method['cfs'].append(1)
        results.append(method)
    return method

In [102]:
category_formatter = {
    'Global Warming': three_row_header,
    'Stratospheric ozone depletion': three_row_header,
    'Ionizing radiation': three_row_header,
    'Human damage ozone formation': three_row_header,
}

In [103]:
def as_dict(data):
    return {
        label: category_formatter[label](rows)
        for label, rows in data
        if label in category_formatter
    }

In [104]:
dd = as_dict(data)

In [105]:
def find_name_in_biosphere(cf, category_mapping):
    _ = lambda s: category_mapping.get(s, s)
    for name in get_names(cf):
        name = _(name)
        if name in bio_names:
            return name
    # Try a partial match
    name = _(cf['Name'].lower())
    for o in bio_names:
        if name and name in o:
            return o

# Global warming

In [11]:
category_mapping = {
    'carbon dioxide': 'carbon dioxide, fossil',
    'carbon monoxide': 'carbon monoxide, fossil',
    'methane': 'methane, non-fossil',
    'fossil methane': 'methane, fossil',
    'nitrous oxide': 'dinitrogen monoxide',
    'sulphur hexafluoride': 'sulfur hexafluoride',
    'halon-1211': 'methane, bromochlorodifluoro-, halon 1211',
    'pfc-41-12': 'perfluoropentane',
    'cfc-11': 'methane, trichlorofluoro-, cfc-11',
    'pfc-116': 'ethane, hexafluoro-, hfc-116',
    'halon-1301': 'methane, bromotrifluoro-, halon 1301',
    'methylene chloride': 'methane, dichloro-, hcc-30',
    'methyl chloride': 'methane, monochloro-, r-40',
    'carbon tetrachloride': 'methane, tetrachloro-, r-10',
    'pfc-14': 'methane, tetrafluoro-, r-14',
    'cfc-12': 'methane, dichlorodifluoro-, cfc-12',
    'methyl bromide': 'methane, bromo-, halon 1001',
}

In [12]:
found = {find_name_in_biosphere(cf, category_mapping) 
         for cf in dd['Global Warming']['cfs'] 
         if find_name_in_biosphere(cf, category_mapping)}
found

{'carbon dioxide, fossil',
 'chloroform',
 'dinitrogen monoxide',
 'ethane, 1,1,1,2-tetrafluoro-, hfc-134a',
 'ethane, 1,1,1-trifluoro-, hfc-143a',
 'ethane, 1,1,2-trichloro-1,2,2-trifluoro-, cfc-113',
 'ethane, 1,1-dichloro-1-fluoro-, hcfc-141b',
 'ethane, 1,1-difluoro-, hfc-152a',
 'ethane, 1,2-dichloro-1,1,2,2-tetrafluoro-, cfc-114',
 'ethane, 1-chloro-1,1-difluoro-, hcfc-142b',
 'ethane, 2,2-dichloro-1,1,1-trifluoro-, hcfc-123',
 'ethane, 2-chloro-1,1,1,2-tetrafluoro-, hcfc-124',
 'ethane, chloropentafluoro-, cfc-115',
 'ethane, hexafluoro-, hfc-116',
 'ethane, pentafluoro-, hfc-125',
 'hydrocarbons, chlorinated',
 'methane, bromo-, halon 1001',
 'methane, bromochlorodifluoro-, halon 1211',
 'methane, bromotrifluoro-, halon 1301',
 'methane, chlorodifluoro-, hcfc-22',
 'methane, chlorotrifluoro-, cfc-13',
 'methane, dichloro-, hcc-30',
 'methane, dichlorodifluoro-, cfc-12',
 'methane, dichlorofluoro-, hcfc-21',
 'methane, difluoro-, hfc-32',
 'methane, fossil',
 'methane, monochlor

In [13]:
in_ecoinvent_ipcc = {bd.get_activity(o[0])['name'].lower() for o in bd.Method(('IPCC 2013', 'climate change', 'GWP 100a')).load()}

In [14]:
in_ecoinvent_recipe = {bd.get_activity(o[0])['name'].lower() for o in bd.Method(('ReCiPe Midpoint (E) V1.13', 'climate change', 'GWP500')).load()}

In [15]:
known_missing = {
    'ethane, 1,1,1-trichloro-, hcfc-140',
    'carbon monoxide, fossil',
    'carbon monoxide, from soil or biomass stock',
    'carbon monoxide, non-fossil',  
    'methane, bromo-, halon 1001',
    'nitric oxide',
    'nitrogen fluoride',
    'voc, volatile organic compounds, unspecified origin', # What could the CF even be?
    'ethane, 1,2-dichloro-',
    'dimethyl ether',
}

In [16]:
in_ecoinvent_ipcc.difference(found).difference(known_missing)

{'carbon dioxide, from soil or biomass stock',
 'carbon dioxide, to soil or biomass stock',
 'methane',
 'methane, from soil or biomass stock'}

In [17]:
in_ecoinvent_recipe.difference(found).difference(known_missing)

{'carbon dioxide, from soil or biomass stock',
 'carbon dioxide, to soil or biomass stock',
 'methane',
 'methane, from soil or biomass stock'}

In [19]:
found.difference(in_ecoinvent_recipe)

{'perfluoropentane'}

In [20]:
found.difference(in_ecoinvent_ipcc)

{'hydrocarbons, chlorinated'}

# Ozone depletion

In [87]:
category_mapping = {
    'carbon tetrachloride': 'methane, tetrachloro-, r-10',
    'cfc-12': 'methane, dichlorodifluoro-, cfc-12',
    'cfc-11': 'methane, trichlorofluoro-, cfc-11',
    'halon-1301': 'methane, bromotrifluoro-, halon 1301',
    'halon-1211': 'methane, bromochlorodifluoro-, halon 1211',
    'ch3cl': 'methane, monochloro-, r-40',
    'ch3br': 'methane, bromo-, halon 1001',
    'n2o': 'dinitrogen oxide',
}

In [88]:
found = {find_name_in_biosphere(cf, category_mapping) 
         for cf in dd['Stratospheric ozone depletion']['cfs'] 
         if find_name_in_biosphere(cf, category_mapping)}
found

{'ethane, 1,1,2-trichloro-1,2,2-trifluoro-, cfc-113',
 'ethane, 1,1-dichloro-1-fluoro-, hcfc-141b',
 'ethane, 1,2-dichloro-1,1,2,2-tetrafluoro-, cfc-114',
 'ethane, 1-chloro-1,1-difluoro-, hcfc-142b',
 'ethane, 2,2-dichloro-1,1,1-trifluoro-, hcfc-123',
 'ethane, 2-chloro-1,1,1,2-tetrafluoro-, hcfc-124',
 'ethane, chloropentafluoro-, cfc-115',
 'hydrocarbons, chlorinated',
 'methane, bromo-, halon 1001',
 'methane, bromochlorodifluoro-, halon 1211',
 'methane, bromotrifluoro-, halon 1301',
 'methane, chlorodifluoro-, hcfc-22',
 'methane, dichlorodifluoro-, cfc-12',
 'methane, monochloro-, r-40',
 'methane, tetrachloro-, r-10',
 'methane, trichlorofluoro-, cfc-11'}

In [89]:
missing = {cf['Name'] 
         for cf in dd['Stratospheric ozone depletion']['cfs'] 
         if not find_name_in_biosphere(cf, category_mapping)}
missing

{'CH3CCl3', 'HCFC-225ca', 'HCFC-225cb', 'Halon-1202', 'Halon-2402', 'N2O'}

In [23]:
in_ecoinvent = {bd.get_activity(o[0])['name'].lower() for o in bd.Method(('ReCiPe Midpoint (E) V1.13', 'ozone depletion', 'ODPinf')).load()}

In [24]:
known_missing = {
    'ethane, 1,1,1-trichloro-, hcfc-140',
    'ethane, 1,1,2-trichloro-',
}

In [25]:
in_ecoinvent.difference(found).difference(known_missing)

set()

In [26]:
found.difference(in_ecoinvent)

set()

# Ionizing radiation

In [90]:
category_mapping = {
    'ch3cl': 'methane, monochloro-, r-40',
    'ch3br': 'methane, bromo-, halon 1001',
    'actinides, unspecified': 'actinides, radioactive, unspecified',
    'cs-134': 'cesium-134',
    'cs-137': 'cesium-137',
    'co-58': 'cobalt-58',
    'h-3': 'hydrogen-3, tritium',
    'cm alphaa': 'curium alpha',
    'pb-210': 'lead-210',
    'pu-238': 'plutonium-238',
    'pu-239': 'plutonium-239',
    'mn-54': 'manganese-54',
    'ra-226a': 'radium-226',
    'ra-226': 'radium-226',
    'sb-124': 'antimony-124',
    'i-133': 'iodine-133',
    'po-210': 'polonium-210',
    'ag-110m': 'silver-110',
    'pu alpha': 'plutonium-alpha',
    'pu alphaa': 'plutonium-alpha',
    'u-238a': 'uranium-238',
    'xe-133': 'xenon-133',
    # What ecoinvent calls 'plutonium-alpha' could be Pu 239?
}

In [91]:
found = {find_name_in_biosphere(cf, category_mapping) 
         for cf in dd['Ionizing radiation']['cfs'] 
         if find_name_in_biosphere(cf, category_mapping)}
found

{'actinides, radioactive, unspecified',
 'americium-241',
 'antimony-124',
 'antimony-125',
 'carbon-14',
 'cesium-134',
 'cesium-137',
 'cobalt-58',
 'cobalt-60',
 'curium alpha',
 'hydrogen-3, tritium',
 'iodine-129',
 'iodine-131',
 'iodine-133',
 'krypton-85',
 'lead-210',
 'manganese-54',
 'noble gases, radioactive, unspecified',
 'plutonium-238',
 'plutonium-alpha',
 'polonium-210',
 'radium-226',
 'radon-222',
 'ruthenium-106',
 'silver-110',
 'strontium-90',
 'technetium-99',
 'thorium-230',
 'uranium-234',
 'uranium-235',
 'uranium-238',
 'xenon-133'}

In [92]:
in_ecoinvent = {bd.get_activity(o[0])['name'].lower() 
                for o in bd.Method(('ReCiPe Midpoint (E) V1.13', 'ionising radiation', 'IRP_HE')).load()}

In [93]:
known_missing = {
    'uranium alpha',
}

In [94]:
in_ecoinvent.difference(found).difference(known_missing)

set()

In [95]:
found.difference(in_ecoinvent)

{'technetium-99'}

In [96]:
missing = {cf['Name'] 
         for cf in dd['Ionizing radiation']['cfs'] 
         if not find_name_in_biosphere(cf, category_mapping)}
missing

{'Pu-239'}

# Human damage ozone formation

In [106]:
category_mapping = {
    'ch3cl': 'methane, monochloro-, r-40',
#     'ch3br': 'methane, bromo-, halon 1001',
#     'actinides, unspecified': 'actinides, radioactive, unspecified',
#     'cs-134': 'cesium-134',
#     'cs-137': 'cesium-137',
#     'co-58': 'cobalt-58',
#     'h-3': 'hydrogen-3, tritium',
#     'cm alphaa': 'curium alpha',
#     'pb-210': 'lead-210',
#     'pu-238': 'plutonium-238',
#     'pu-239': 'plutonium-239',
#     'mn-54': 'manganese-54',
#     'ra-226a': 'radium-226',
#     'ra-226': 'radium-226',
#     'sb-124': 'antimony-124',
#     'i-133': 'iodine-133',
#     'po-210': 'polonium-210',
#     'ag-110m': 'silver-110',
#     'pu alpha': 'plutonium-alpha',
#     'pu alphaa': 'plutonium-alpha',
#     'u-238a': 'uranium-238',
#     'xe-133': 'xenon-133',
}

In [107]:
found = {find_name_in_biosphere(cf, category_mapping) 
         for cf in dd['Human damage ozone formation']['cfs'] 
         if find_name_in_biosphere(cf, category_mapping)}
found

KeyError: 'Name'

In [92]:
in_ecoinvent = {bd.get_activity(o[0])['name'].lower() 
                for o in bd.Method(('ReCiPe Midpoint (E) V1.13', 'photochemical oxidant formation', 'POFP')).load()}

In [93]:
known_missing = {
    'uranium alpha',
}

In [94]:
in_ecoinvent.difference(found).difference(known_missing)

set()

In [95]:
found.difference(in_ecoinvent)

{'technetium-99'}

In [96]:
missing = {cf['Name'] 
         for cf in dd['Human damage ozone formation']['cfs'] 
         if not find_name_in_biosphere(cf, category_mapping)}
missing

{'Pu-239'}

In [79]:
s = "pluton"
{x for x in bio_names if s in x}

{'plutonium-238', 'plutonium-241', 'plutonium-alpha'}

In [108]:
sorted([m for m in bd.methods if m[0] == 'ReCiPe Midpoint (E) V1.13'])

[('ReCiPe Midpoint (E) V1.13', 'agricultural land occupation', 'ALOP'),
 ('ReCiPe Midpoint (E) V1.13', 'climate change', 'GWP500'),
 ('ReCiPe Midpoint (E) V1.13', 'fossil depletion', 'FDP'),
 ('ReCiPe Midpoint (E) V1.13', 'freshwater ecotoxicity', 'FETPinf'),
 ('ReCiPe Midpoint (E) V1.13', 'freshwater eutrophication', 'FEP'),
 ('ReCiPe Midpoint (E) V1.13', 'human toxicity', 'HTPinf'),
 ('ReCiPe Midpoint (E) V1.13', 'ionising radiation', 'IRP_HE'),
 ('ReCiPe Midpoint (E) V1.13', 'marine ecotoxicity', 'METPinf'),
 ('ReCiPe Midpoint (E) V1.13', 'marine eutrophication', 'MEP'),
 ('ReCiPe Midpoint (E) V1.13', 'metal depletion', 'MDP'),
 ('ReCiPe Midpoint (E) V1.13', 'natural land transformation', 'NLTP'),
 ('ReCiPe Midpoint (E) V1.13', 'ozone depletion', 'ODPinf'),
 ('ReCiPe Midpoint (E) V1.13', 'particulate matter formation', 'PMFP'),
 ('ReCiPe Midpoint (E) V1.13', 'photochemical oxidant formation', 'POFP'),
 ('ReCiPe Midpoint (E) V1.13', 'terrestrial acidification', 'TAP500'),
 ('ReCiPe M