In [1]:
import sys
sys.path.append('..')
import os
from os.path import expanduser

import lcatools

In [2]:
from lcatools.providers.ecoinvent_spreadsheet import EcoinventSpreadsheet

In [3]:
ECOINVENT_VERSION = '3.2'

SYSTEM_MODELS = ['undefined', 'apos', 'consequential', 'cut-off']

datapath = os.path.join(expanduser('~'), 'Dropbox', 'data', 'Ecoinvent', ECOINVENT_VERSION)
catalog_dir = os.path.join(os.path.expanduser('~'), 'GitHub', 'lca-tools-datafiles', 'catalogs')

In [4]:
ei = dict()

In [5]:
from lcatools.tools import archive_from_json

In [6]:
%time ei_lcia = archive_from_json(os.path.join(catalog_dir, 'ei_lcia.json.gz'))

Loading JSON data from /home/b/GitHub/lca-tools-datafiles/catalogs/ei_lcia.json.gz:
**Upstream reference encountered: /data/LCI/Ecoinvent/3.2/undefined

710 new quantity entities added (710 total)
3255 new flow entities added (3255 total)
0 new process entities added (0 total)
CPU times: user 3.32 s, sys: 80 ms, total: 3.4 s
Wall time: 3.39 s


In [7]:
model = 'undefined'
pub_file = os.path.join(datapath, 'activity_overview_for_users_' + ECOINVENT_VERSION + '_' + model + '.xlsx')
ei[model] = EcoinventSpreadsheet(pub_file, internal=False, version=ECOINVENT_VERSION, 
                                 ns_uuid='544efee9-6651-4c77-813f-b0134f561eb9',
                                 upstream=ei_lcia,
                                 quiet=True)
%time ei[model].load_all()

Loading /home/b/Dropbox/data/Ecoinvent/3.2/activity_overview_for_users_3.2_undefined.xlsx
Reading elementary exchanges ...
Reading intermediate exchanges ...
20 new quantity entities added (20 total)
Handling elementary exchanges [public spreadsheet]
4013 new flow entities added (4013 total)
Handling intermediate exchanges [public spreadsheet]
2754 new flow entities added (6767 total)
Handling activities...
Reading activity overview ...
13307 new process entities added (13307 total)
CPU times: user 57.3 s, sys: 120 ms, total: 57.4 s
Wall time: 57.7 s


In [8]:
F = [f for f in ei_lcia.flows() if f.reference_entity is None]
len(F)

0

In [9]:
ei[model].validate_entity_list()

20792 entities validated out of 20792


20792

In [10]:
import itertools

In [11]:
def group_by(result, group_key):
    keys = []
    groups = []
    for i, j in itertools.groupby(sorted(result, key=group_key),
                                  group_key):
        keys.append(i)
        groups.append(list(j))

    return keys, groups



In [12]:
origin, flows_by_origin = group_by(ei[model]._entities_by_type('flow'), lambda x: x.origin)

In [13]:
origin

['/data/LCI/Ecoinvent/3.2/undefined',
 '/data/LCI/LCIA implementation v3.1 2014_08_13.xlsx',
 '/home/b/Dropbox/data/Ecoinvent/3.2/activity_overview_for_users_3.2_undefined.xlsx']

In [14]:
[len(g) for g in flows_by_origin]

[1555, 1700, 3512]

from spold -- elementary flows for which characterizations were found

In [15]:
[str(f) for f in flows_by_origin[0][:10]]

['Sethoxydim [air, non-urban air or from high stacks]',
 'Occupation, lake, artificial [natural resource, land]',
 'Strontium (CAS 007440-24-6) [soil, agricultural]',
 'Pumice, in ground [natural resource, in ground]',
 'Basalt, in ground [natural resource, in ground]',
 'Cesium-134 [water, surface water]',
 'Aerosols, radioactive, unspecified [air, non-urban air or from high stacks]',
 'Zinc, ion (CAS 023713-49-7) [water, unspecified]',
 'Transformation, to river, artificial [natural resource, land]',
 'Antimony (CAS 007440-36-0) [water, ground-]']

from LCIA spreadsheet -- characterized elementary flows not found in the DB

In [16]:
[str(f) for f in flows_by_origin[1][:10]]

['Hydrocarbons, aliphatic, alkanes, cyclic [air, lower stratosphere + upper troposphere]',
 'Ethene, tetrachloro- (CAS 000127-18-4) [water, ground-]',
 'Iodine-135 [air, non-urban air or from high stacks]',
 'Neptunium-237 [water, surface water]',
 'Methane, bromotrifluoro-, Halon 1301 (CAS 000075-63-8) [air, unspecified]',
 'Butyrolactone (CAS 000096-48-0) [air, unspecified]',
 'Protactinium-234 (CAS 015100-28-4) [water, ground-]',
 'Chromium-51 (CAS 014392-02-0) [air, unspecified]',
 'Heat, waste [water, ocean]',
 'Antimony-125 (CAS 014234-35-6) [water, ground-, long-term]']

from xls spreadsheet- probably mostly intermediate flows

In [17]:
[str(f) for f in flows_by_origin[2][:10]]

['sodium silicate, solid (CAS 1344-09-8) [Intermediate flow]',
 'photovoltaic panel, single-Si wafer [Intermediate flow]',
 'AOX, Adsorbable Organic Halogen as Cl [water, ground-]',
 'cement, blast furnace slag 81-95%, non-US [Intermediate flow]',
 'Lanthanum (CAS 007439-91-0) [air, non-urban air or from high stacks]',
 'outside air intake, stainless steel, DN 370 [Intermediate flow]',
 'glued laminated timber, for outdoor use [Intermediate flow]',
 'Acidity, unspecified (CAS 012408-02-5) [water, ground-, long-term]',
 'heptane (CAS 000142-82-5) [Intermediate flow]',
 'waste packaging glass, unsorted [Intermediate flow]']

... but not strictly.

Let's see those grouped by compartment

In [18]:
my_origin = 2
comp, flows_by_comp = group_by(flows_by_origin[my_origin], lambda x: x['Compartment'][0])

In [19]:
def table(rows, entries):
    for i, r in enumerate(rows):

        print('%12d  %s' % (len(entries[i]), r))

In [24]:
table(comp, flows_by_comp)

        2754  Intermediate flow
         226  air
           5  economic
          41  natural resource
          66  soil
         420  water


In [26]:
table(*group_by(flows_by_comp[1], lambda x: str(x.reference_entity)))

           5  Ecoinvent Spreadsheet Quantity MJ
         216  Ecoinvent Spreadsheet Quantity kg
           5  Ecoinvent Spreadsheet Quantity m3


In [27]:
ei[model]['2a75d3cb-eb9c-3b44-aad4-0bbaa51005d3'].reference_entity.__dict__

{'_external_ref': None, '_unitstring': 'kg', '_uuid': None}

In [28]:
[str(x) for x in ei[model].search(entity_type='quantity', Name='mass')]

['cumulative exergy demand, biomass, renewable energy resources, biomass [LCIA]',
 'cumulative energy demand, biomass, renewable energy resources, biomass [LCIA]']

In [29]:
flows_by_comp[0][3].__dict__

{'_characterizations': {'5de72bbd-5a4c-3ab8-b63e-f707f95ceeeb': <lcatools.characterizations.Characterization at 0x7f735c6e75f8>},
 '_d': {'CasNumber': '',
  'Comment': '',
  'Compartment': ['Intermediate flow'],
  'Name': 'outside air intake, stainless steel, DN 370',
  'Synonyms': ''},
 '_external_ref': 'outside air intake, stainless steel, DN 370',
 '_origin': '/home/b/Dropbox/data/Ecoinvent/3.2/activity_overview_for_users_3.2_undefined.xlsx',
 '_ref_quantity_factor': 1.0,
 '_scenarios': {},
 '_uuid': UUID('79e8c1a4-fec9-359c-9dd0-91cf86d5201d'),
 'entity_type': 'flow',
 'reference_entity': <lcatools.entities.LcQuantity at 0x7f735d3a4ef0>}

In [30]:
ei[model].flows()[47].serialize(characterizations=True, values=True)

{'CasNumber': '010028-17-8',
 'Comment': '',
 'Compartment': ['water', 'unspecified'],
 'Formula': '',
 'Name': 'Hydrogen-3, Tritium',
 'Synonyms': '',
 'characterizations': [{'entityType': 'characterization',
   'quantity': '0148760a-5482-344b-95fc-e2e7327228ba',
   'value': 6.7024e-09},
  {'entityType': 'characterization',
   'quantity': '01ec7c12-cefc-3c28-abbe-904c026752e9',
   'value': 1.1688e-08},
  {'entityType': 'characterization',
   'quantity': '0755d155-caab-3ecf-9eaf-46d1d595cf8d',
   'value': 1.1688e-08},
  {'entityType': 'characterization',
   'quantity': '0b07f0a8-f0bf-3780-a222-fd50e7ae61a6',
   'value': 8.7097e-09},
  {'entityType': 'characterization',
   'quantity': '0c924ffe-72dc-3e70-a6e5-edcb64d5327a',
   'value': 6.338e-11},
  {'entityType': 'characterization',
   'quantity': '0e8c57b2-c05b-3c01-843c-742398e18016',
   'value': 3.4229e-09},
  {'entityType': 'characterization',
   'quantity': '209c3fc2-99ff-3015-b420-f9ba59015ea3',
   'value': 1.0},
  {'entityType':

In [31]:
for model in SYSTEM_MODELS[1:]:
    pub_file = os.path.join(datapath, 'activity_overview_for_users_' + ECOINVENT_VERSION + '_' + model + '.xlsx')
    ei[model] = EcoinventSpreadsheet(pub_file, internal=False, upstream=ei['undefined'], version=ECOINVENT_VERSION, ns_uuid='544efee9-6651-4c77-813f-b0134f561eb9', quiet=True)
    ei[model].load_all()
    

Loading /home/b/Dropbox/data/Ecoinvent/3.2/activity_overview_for_users_3.2_apos.xlsx
Reading elementary exchanges ...
Reading intermediate exchanges ...
20 new quantity entities added (20 total)
Handling elementary exchanges [public spreadsheet]
4013 new flow entities added (4013 total)
Handling intermediate exchanges [public spreadsheet]
2754 new flow entities added (6767 total)
Handling activities...
Reading activity overview ...
11420 new process entities added (11420 total)
Loading /home/b/Dropbox/data/Ecoinvent/3.2/activity_overview_for_users_3.2_consequential.xlsx
Reading elementary exchanges ...
Reading intermediate exchanges ...
20 new quantity entities added (20 total)
Handling elementary exchanges [public spreadsheet]
4013 new flow entities added (4013 total)
Handling intermediate exchanges [public spreadsheet]
2754 new flow entities added (6767 total)
Handling activities...
Reading activity overview ...
11468 new process entities added (11468 total)
Loading /home/b/Dropbox/d

In [32]:
table(*group_by(ei[model]._entities_by_type('flow'), lambda x: x.origin))

        1555  /data/LCI/Ecoinvent/3.2/undefined
        1700  /data/LCI/LCIA implementation v3.1 2014_08_13.xlsx
        3512  /home/b/Dropbox/data/Ecoinvent/3.2/activity_overview_for_users_3.2_undefined.xlsx


In [33]:
origin, flows_by_origin = group_by(ei[model]._entities_by_type('flow'), lambda x: x.origin)
origin

['/data/LCI/Ecoinvent/3.2/undefined',
 '/data/LCI/LCIA implementation v3.1 2014_08_13.xlsx',
 '/home/b/Dropbox/data/Ecoinvent/3.2/activity_overview_for_users_3.2_undefined.xlsx']

note: no flows from later spreadsheets

In [34]:
table(*group_by(ei['apos']._entities_by_type('process'), lambda x: x.origin))


        9491  /home/b/Dropbox/data/Ecoinvent/3.2/activity_overview_for_users_3.2_apos.xlsx
        1929  /home/b/Dropbox/data/Ecoinvent/3.2/activity_overview_for_users_3.2_undefined.xlsx


In [35]:
for k, v in ei.items():
    fname = 'ecoinvent_%s_%s_xlsx.json.gz' % (ECOINVENT_VERSION, k)
    #if k == 'undefined':
    #    v.write_to_file(os.path.join(catalog_dir, fname), exchanges=True, gzip=True, characterizations=True, values=True)
    #else:
    v.write_to_file(os.path.join(catalog_dir, fname), exchanges=True, gzip=True, characterizations=False, values=False)

In [36]:
ei

{'apos': <lcatools.providers.ecoinvent_spreadsheet.EcoinventSpreadsheet at 0x7f735b4e9128>,
 'consequential': <lcatools.providers.ecoinvent_spreadsheet.EcoinventSpreadsheet at 0x7f735ca21b70>,
 'cut-off': <lcatools.providers.ecoinvent_spreadsheet.EcoinventSpreadsheet at 0x7f735a0239e8>,
 'undefined': <lcatools.providers.ecoinvent_spreadsheet.EcoinventSpreadsheet at 0x7f735b4e9240>}

In [37]:
def process_set(arch):
    p = set()
    for j in arch.processes():
        p.add(j.get_external_ref())
    return p
        
def compare(a1, a2):
    s1 = process_set(a1)
    s2 = process_set(a2)
    return s1.intersection(s2)


In [38]:
len(compare(ei['apos'], ei['undefined']))

1929

In [39]:
len(set([p.get_uuid() for p in ei['apos'].processes()]))

11420

In [21]:
p = ei['apos'].search('00420798')[0]

In [22]:
p.serialize(exchanges=True)

{'Comment': 'industrial gas power//fossil fuels',
 'IsicClass': 'Electric power generation, transmission and distribution',
 'IsicNumber': '3510',
 'Name': 'treatment of blast furnace gas, in power plant',
 'SpatialScope': 'BR',
 'TechnologyLevel': 'Current',
 'TemporalScope': {'begin': '1980-01-01', 'end': '2015-12-31'},
 'entityId': '00420798-e9d1-4de9-8745-09bd85f31db8',
 'entityType': 'process',
 'exchanges': [{'direction': 'Output',
   'flow': '58271de9-51c4-3b86-ae82-98c2b46280ec',
   'isReference': True},
  {'direction': 'Output',
   'flow': 'c9655b4d-25ab-3e72-93ab-beb16f577e47',
   'isReference': True},
  {'direction': 'Output',
   'flow': 'd804f912-5b47-31b5-a0bb-aaa4c5e378c4',
   'isReference': True}],
 'externalId': '00420798-e9d1-4de9-8745-09bd85f31db8',
 'origin': '/home/b/Dropbox/data/Ecoinvent/3.2/activity_overview_for_users_3.2_undefined.xlsx'}

In [26]:
model = 'apos'
zip_dir = os.path.join(os.path.expanduser('~'), 'Dropbox', 'data', 'Ecoinvent', ECOINVENT_VERSION)
catalog_dir = os.path.join(os.path.expanduser('~'), 'GitHub', 'lca-tools-datafiles', 'catalogs')

spold = 'current_Version_' + ECOINVENT_VERSION + '_' + model + '_ecoSpold02.7z'

In [27]:
ei_spold = dict()

In [28]:
from lcatools.providers.ecospold2 import EcospoldV2Archive

In [29]:
ei_spold[model] = EcospoldV2Archive(os.path.join(zip_dir,spold), prefix='datasets', upstream=ei[model])

Found Extension: 7z


In [31]:
for k in ei_spold[model].list_datasets('00420798'):
    ei_spold[model].retrieve_or_fetch_entity(k)