In [33]:
import os
import sys
import json
import uuid 

from itertools import groupby

In [48]:
def group_by(iterable, keyfunc):
    groups = []
    uniquekeys = []
    data = sorted(iterable, key=keyfunc)
    for k, g in groupby(data, keyfunc):
        groups.append(list(g))      # Store group iterator as a list
        uniquekeys.append(k)
        
    return groups, uniquekeys

In [22]:
CATALOG = '/data/GitHub/lca-catalog/catalogs/'

EI32_A = 'ecoinvent_3.2_apos_spold.json.gz'
USLCI = 'uslci_ecospold.json.gz'

In [3]:
import re
import gzip

from eight import USING_PYTHON2


def from_json(fname):
    """
    Routine to reconstruct a catalog from a json archive.
    :param fname: json file, optionally gzipped
    :return: a subclass of ArchiveInterface
    """
    print('Loading JSON data from %s:' % fname)
    if bool(re.search('\.gz$', fname)):
        if USING_PYTHON2:
            with gzip.open(fname, 'r') as fp:
                j = json.load(fp)
        else:
            with gzip.open(fname, 'rt') as fp:
                j = json.load(fp)
    else:
        with open(fname, 'r') as fp:
            j = json.load(fp)
    return j


In [17]:
us = from_json(os.path.join(CATALOG, USLCI))

Loading JSON data from /data/GitHub/lca-catalog/catalogs/uslci_ecospold.json.gz:


In [23]:
ei = from_json(os.path.join(CATALOG, EI32_A))

Loading JSON data from /data/GitHub/lca-catalog/catalogs/ecoinvent_3.2_apos_spold.json.gz:


In [18]:
us.keys()

dict_keys(['quantities', '@context', 'dataSourceType', 'processes', 'catalogNames', 'flows', 'dataSourceReference', 'nsUuid'])

## utility functions

these would be methods of an lca-catalog class, should I ever get around to making one that is independent of the `lca-tools` infrastructure, should there be a need.

In [28]:
def make_flows(catalog):
    flows = dict()
    for f in catalog['flows']:
        flows[f['entityId']] = f
    return flows


## detect interior flows

The strategy here is to make two sets of UUIDs- of inputs and outputs- and then find their intersection

In [20]:
def interior_flows(catalog):
    ins = set()
    outs = set()
    for p in catalog['processes']:
        for x in p['exchanges']:
            if x['direction'] == 'Input':
                ins.add(x['flow'])
            elif x['direction'] == 'Output':
                outs.add(x['flow'])
            else:
                raise ValueError('mysterious direction designation "%s"' % x['direction'])
    print('Found %d inputs, %d outputs' % (len(ins), len(outs)))
    interior = ins.intersection(outs)
    print('Found %d interior flows' % len(interior))
    return interior
                    

In [13]:
ei['processes'][0]['exchanges'][0]['direction']

'Output'

In [24]:
%time in_ei = interior_flows(ei)

Found 2971 inputs, 4355 outputs
Found 2688 interior flows
CPU times: user 210 ms, sys: 0 ns, total: 210 ms
Wall time: 204 ms


In [27]:
us['flows'][430]

{'CasNumber': '',
 'Comment': '',
 'Compartment': ['air', 'unspecified'],
 'Name': 'Chromium III',
 'characterizations': [{'entityType': 'characterization',
   'isReference': True,
   'quantity': 'bbe8f43d-c141-3e21-b8c2-3ba1ce556ad0',
   'value': 1.0}],
 'entityId': '1c4485a2-5ac9-3c06-aca9-25aabf72e45c',
 'entityType': 'flow',
 'externalId': 9483,
 'origin': '/home/b/Dropbox/data/USLCI/USLCI_Processes_ecospold1.zip'}

In [26]:
us.keys()

dict_keys(['quantities', '@context', 'dataSourceType', 'processes', 'catalogNames', 'flows', 'dataSourceReference', 'nsUuid'])

In [29]:
us_flows = make_flows(us)

In [30]:
len(us_flows)

4176

In [49]:
gs, ks = group_by(us_flows.values(), lambda x: x['Compartment'][0])

In [52]:
ks

['Air Transportation',
 'Biofuels Manufacturing',
 'CUTOFF Flows',
 'Chemical Manufacturing',
 'Crop Production',
 'Elec. Equip., Appliance, and Comp. Manufacturing',
 'Elementary Flows',
 'Fabricated Metal Product Manufacturing',
 'Forestry and Logging',
 'Mining (except Oil and Gas)',
 'Nonmetallic Mineral Product Manufacturing',
 'Oil and Gas Extraction',
 'Paper Manufacturing',
 'Petroleum and Coal Products Manufacturing',
 'Plastics and Rubber Products Manufacturing',
 'Primary Metal Manufacturing',
 'Rail Transportation',
 'Transit and Ground Passenger Transportation',
 'Transportation Equipment Manufacturing',
 'Truck Transportation',
 'Utilities',
 'Waste Management and Remediation Services',
 'Water Transportation',
 'Wood Product Manufacturing',
 'air',
 'final-waste-flow',
 'non-material',
 'others',
 'resource',
 'soil',
 'water']

In [60]:
for i, g in enumerate(gs):
    print(' %2d %30.30s: %6d' % (i, ks[i], len(g)))

  0             Air Transportation:      1
  1         Biofuels Manufacturing:      7
  2                   CUTOFF Flows:    523
  3         Chemical Manufacturing:    128
  4                Crop Production:     55
  5 Elec. Equip., Appliance, and C:     10
  6               Elementary Flows:     19
  7 Fabricated Metal Product Manuf:     12
  8           Forestry and Logging:    111
  9    Mining (except Oil and Gas):      7
 10 Nonmetallic Mineral Product Ma:      5
 11         Oil and Gas Extraction:      3
 12            Paper Manufacturing:     49
 13 Petroleum and Coal Products Ma:     11
 14 Plastics and Rubber Products M:     22
 15    Primary Metal Manufacturing:     50
 16            Rail Transportation:      1
 17 Transit and Ground Passenger T:     46
 18 Transportation Equipment Manuf:      5
 19           Truck Transportation:    102
 20                      Utilities:    127
 21 Waste Management and Remediati:     22
 22           Water Transportation:      6
 23     Woo

In [62]:
for f in gs[13]:
    print(f['Name'])

Petroleum refining, at refinery
Diesel, at refinery
Residual fuel oil, at refinery
Petroleum refining coproduct, unspecified, at refinery
Kerosene, at refinery
Liquefied petroleum gas, at refinery
Gasoline, at refinery
Petroleum refining coproduct, at refinery
Refinery gas, at refinery
Bitumen, at refinery
Petroleum coke, at refinery


In [167]:
sorted([us_flows[f]['Name'].lower() for f in in_us if us_flows[f]['Name'].lower().find('combusted') >= 0])

['anthracite coal, combusted in industrial boiler',
 'bituminous coal, combusted in industrial boiler',
 'bituminous coal, combusted in industrial boiler, at pulp and paper mill (excl.)',
 'diesel, combusted in industrial boiler',
 'diesel, combusted in industrial boiler, at pulp and paper mill (excl.)',
 'diesel, combusted in industrial equipment',
 'gasoline, combusted in equipment',
 'gasoline, combusted in equipment, at pulp and paper mill (excl.)',
 'hog fuel, pur., combusted in industrial boiler, at pulp and paper mill (excl.)',
 'hog fuel, self-gen., combusted in ind. boiler, at pulp and paper mill (excl.)',
 'lignite coal, combusted in industrial boiler',
 'liquefied petroleum gas, combusted in industrial boiler',
 'lpg, combusted in industrial boiler, at pulp and paper mill (excl.)',
 'natural gas, combusted in industrial boiler',
 'natural gas, combusted in industrial boiler, at hydrocracker, for butadiene',
 'natural gas, combusted in industrial boiler, at hydrocracker, for 

In [171]:
us['nsUuid']

'a9d158e0-d48c-4427-8c55-42719e9e11cc'

In [176]:
[p for p in us['processes'] if p['entityId'] == str(uuid.uuid3(uuid.UUID(us['nsUuid']), 'Petroleum refining, at refinery'))]



[{'Classifications': ['Petroleum and Coal Products Manufacturing',
   'Petroleum Refineries'],
  'Comment': 'This module expresses data on the basis 1 kilogram of general refinery product as well as data allocated to specific refinery products. The data are allocated to specific refinery products based on the percent by mass of each product in the refinery output. Refinery products include gasoline, distillate/diesel oil, LPG, residual oil, kerosene/jet fuel, and other (still gas, petroleum, coke, asphalt, and petrochemical feedstocks).  Complete inventory data and metadata are available in full in the final report and appendices, Cradle-to-Gate Life Cycle Inventory of Nine Plastic Resins and Four Polyurethane Precursors. This report has been extensively reviewed within Franklin Associates and has undergone partial critical review by ACC Plastics Division members and is available at: www.americanchemistry.com. Quantities may vary slightly between the reference to main source and and th

In [85]:
next(f for f in us_flows.values())['Name'].lower().find('diesel')

-1

In [68]:
g.find('ds')

2

In [89]:
from collections import Counter

In [103]:
elem = {'air', 'water', 'soil', 'resource'}

output_counter = Counter()

for p in us['processes']:
    for x in p['exchanges']:
        if x['direction'] == 'Output':
            if x['flow'] in in_us:
            #if us_flows[x['flow']]['Compartment'][0] not in elem:
                output_counter[x['flow']] += 1
            

In [105]:
for k in output_counter.most_common(30):
    print('%2d: %s' % (k[1], us_flows[k[0]]['Name']))

86: Chromium
61: Chloride
54: Sodium, ion
50: Carbon dioxide, biogenic
19: Chromium, ion
16: Thallium
 8: Hydrogen cyanide (prussic acid)
 8: Chloride (unspecified)
 8: Inorganic salts and acids, unspecified
 8: Water, cooling
 7: Methane, bromotrifluoro-, Halon 1301
 7: Cooling water, non-contact
 3: Water
 2: Diesel, at refinery
 2: Oil, crude, 42 MJ per kg, in ground
 2: Kerosene, at refinery
 2: Quartz sand (silica sand; silicon dioxide)
 2: Sodium chloride, in ground
 2: Residual fuel oil, at refinery
 2: Lignite, 11 MJ per kg, in ground
 2: Petroleum coke, at refinery
 2: Liquefied petroleum gas, at refinery
 2: Oxygen
 2: Gas, natural, 46.8 MJ per kg, in ground
 2: Coal, hard, 30.7 MJ per kg, in ground
 2: Harvesting, fresh fruit bunch, at farm
 2: Gasoline, at refinery
 2: Wood fuel, hardwood, green, at veneer mill, E
 1: Fuels, burned at coated freesheet, average production, at mill
 1: Natural gas, combusted in industrial boiler, at pulp and paper mill (EXCL.)


In [111]:
us_flows[output_counter.most_common(4)[3][0]]

{'CasNumber': '',
 'Comment': '',
 'Compartment': ['air', 'unspecified'],
 'Name': 'Carbon dioxide, biogenic',
 'characterizations': [{'entityType': 'characterization',
   'isReference': True,
   'quantity': 'bbe8f43d-c141-3e21-b8c2-3ba1ce556ad0',
   'value': 1.0}],
 'entityId': '80d22a4f-d79f-3597-9e4e-23c25505d5ad',
 'entityType': 'flow',
 'externalId': 5875,
 'origin': '/home/b/Dropbox/data/USLCI/USLCI_Processes_ecospold1.zip'}

In [115]:
co2_biogenic =output_counter.most_common(4)[3][0]
chromium = output_counter.most_common(1)[0][0]

In [118]:
us_flows['11183286-23c2-3521-8b55-4907c5754aaf']

{'CasNumber': '',
 'Comment': '',
 'Compartment': ['resource', 'in ground'],
 'Name': 'Chromium, 25.5 in chromite, 11.6% in crude ore, in ground',
 'characterizations': [{'entityType': 'characterization',
   'isReference': True,
   'quantity': 'bbe8f43d-c141-3e21-b8c2-3ba1ce556ad0',
   'value': 1.0}],
 'entityId': '11183286-23c2-3521-8b55-4907c5754aaf',
 'entityType': 'flow',
 'externalId': 6570,
 'origin': '/home/b/Dropbox/data/USLCI/USLCI_Processes_ecospold1.zip'}

In [119]:
[p['Classifications'] for p in us['processes'] if chromium in [x['flow'] for x in p['exchanges'] if x['direction'] =='Input']]

[['Primary Metal Manufacturing', 'Primary Metal Manufacturing'],
 ['Primary Metal Manufacturing', 'Primary Metal Manufacturing']]

In [125]:
def set_to_index(flow_set):
    """
    Creates a dict of flow UUID to enumeration.  there's probably an easier [pythonic] way to do this.
    """
    index = dict()
    count = 0
    for f in flow_set:
        index[f] = count
        count +=1
    return index

In [122]:
import scipy as sp

In [124]:
from scipy.sparse import coo_matrix

In [143]:
_process_iter = (p for p in us['processes'])

In [126]:
p = next(p for p in us['processes'])

In [130]:
def reference_exchanges(p):
    """
    returns an iterable of exchange dicts that match x['isReference'] is True 
    """
    for x in p['exchanges']:
        if 'isReference' in x.keys():
            if x['isReference'] is True:
                yield x

In [146]:
[p for p in us['processes'] if len([x for x in reference_exchanges(p)]) < 1]

[{'Classifications': ['Paper Manufacturing',
   'Pulp, Paper, and Paperboard Mills'],
  'Comment': 'Uncoated Freesheet, average North American production, at mill.  By-products were allocated 0% of the impacts to enable the user to choose and apply their own allocation method more easily. All flows are production-weighted means. Data were developed for a full LCA that was critically reviewed.  Reviewers were: • Martha Stevenson, Chairperson – Private Consultant to the public interest with ten years of experience in sustainability, environmental management, packaging and materials recovery; led the development of the Design Guidelines for Sustainable Packaging and the COMPASS software used to compare the environmental impacts of package designs; • Pascal Lesage – Life cycle assessment consultant, Researcher at École Polytechnique de Montréal [at the Inter-University Research Centre for the Life Cycle of Products, Processes and Services (CIRAIG) in Montreal]; and • Dale Phenicie, Environ

In [123]:
def create_technology_matrix(catalog):
    interior = interior_flows(catalog)
    index = set_to_index(interior)  # this is a dict that maps interior flow to index
    

AttributeError: module 'scipy' has no attribute 'sparse'

In [149]:
len(in_ei)

2688

In [150]:
ei_flows = make_flows(ei)

In [151]:
[ei_flows[x]['Name'] for x in in_ei if ei_flows[x]['Name'].lower().find('electricity') >= 0 ]

['operation, computer, laptop, video mode, label-certified electricity',
 'electricity, for reuse in municipal waste incineration only',
 'energy use and operation emissions, electric bicycle, label-certified electricity',
 'operation, internet access equipment, label-certified electricity',
 'mini CHP plant, common components for heat+electricity',
 'electricity, low voltage',
 'heat and power co-generation unit, 6400kW thermal, components for electricity only',
 'transmission network, electricity, high voltage',
 'heat and power co-generation unit, 1MW electrical, components for electricity only',
 'operation, computer, laptop, 68% active work, label-certified electricity',
 'electricity, high voltage, for internal use in coal mining',
 'heat and power co-generation unit, 200kW electrical, diesel SCR, common components for heat+electricity',
 'transport, passenger, electric bicycle, label-certified electricity',
 'heat and power co-generation unit, 6400kW thermal, common components f

In [164]:
['%-8.8s %s' % (p['SpatialScope'], p['Name']) for p in ei['processes'] if re.search('^market group', p['Name'], flags=re.I)]

['RME      market group for electricity, high voltage',
 'RER      market group for heavy fuel oil',
 'CA       market group for electricity, high voltage',
 'GLO      market group for natural gas, high pressure',
 'RAS      market group for electricity, high voltage',
 'RAS      market group for electricity, medium voltage',
 'GLO      market group for diesel',
 'GLO      market group for transport, freight train',
 'Canada w market group for electricity, high voltage',
 'US       market group for electricity, medium voltage',
 'RER      market group for heat, district or industrial, other than natural gas',
 'RER      market group for tap water',
 'Europe w market group for electricity, high voltage',
 'UCTE     market group for electricity, low voltage',
 'Canada w market group for electricity, low voltage',
 'ENTSO-E  market group for electricity, high voltage',
 'GLO      market group for tap water',
 'RNA      market group for electricity, medium voltage',
 'US       market group