In [27]:
import ecoinvent_interface as ei
import ecoinvent_migrate as em
import bw2data as bd
from pathlib import Path
import xmltodict
import pandas as pd
import itertools
from pycasreg.validation import validate_cas

In [3]:
cs = ei.CachedStorage()

In [13]:
source_version = "3.8"
target_version = "3.9"
project_name: str = "ecoinvent-migration"

source_db_name = f"ecoinvent-{source_version}-biosphere"
target_db_name = f"ecoinvent-{target_version}-biosphere"

excel_filepath = em.main.get_change_report_context(
    source_version=source_version,
    target_version=target_version,
    project_name=project_name,
)

sheet_names = pd.ExcelFile(excel_filepath).sheet_names
candidates = [name for name in sheet_names if name.lower() == "ee deletions"]

data = pd.read_excel(io=excel_filepath, sheet_name=candidates[0]).to_dict(orient="records")
data = em.main.source_target_biosphere_pair(
    data=data,
    source_version=source_version,
    target_version=target_version,
    keep_deletions=True,
)

[32m2024-08-30 08:05:38.465[0m | [1mINFO    [0m | [36mecoinvent_migrate.main[0m:[36mget_change_report_context[0m:[36m59[0m - [1mVersions available for this license: ['3.10', '3.9.1', '3.9', '3.8', '3.7.1', '3.7', '3.6', '3.5', '3.4', '3.3', '3.2', '3.1', '3.01', '2'][0m
[32m2024-08-30 08:05:39.043[0m | [1mINFO    [0m | [36mecoinvent_migrate.main[0m:[36mget_change_report_context[0m:[36m72[0m - [1mUsing change report annex file Change Report Annex v3.8 - v3.9.xlsx[0m


In [19]:
data['delete'][:5]

[{'source': {'uuid': '5188df39-2bb8-4826-b469-fa9f86bacc09',
   'name': 'Carbon dioxide, from soil or biomass stock'},
  'comment': 'indoor compartment deleted as not used'},
 {'source': {'uuid': '05f6e226-0593-46f5-a788-01fa306e66ad',
   'name': 'Carbon monoxide, from soil or biomass stock'},
  'comment': 'indoor compartment deleted as not used'},
 {'source': {'uuid': '6abf9e1a-c33e-49ae-aeae-ab84cee7d2ab',
   'name': 'Dimethyl ether'},
  'comment': 'indoor compartment deleted as not used'},
 {'source': {'uuid': '288c4e8a-d371-4bc1-b45b-c4b0cefd400e',
   'name': 'Methane, from soil or biomass stock'},
  'comment': 'indoor compartment deleted as not used'},
 {'source': {'uuid': 'd07867e3-66a8-4454-babd-78dc7f9a21f8',
   'name': '[Deleted]Carfentrazone ethyl ester'},
  'comment': 'should already have been deleted last time'}]

In [18]:
affected_uuids = {o['source']['uuid'] for o in itertools.chain(data['replace'], data['delete'])}
affected_uuids

{'05f6e226-0593-46f5-a788-01fa306e66ad',
 '09cf7c11-0269-4fc1-a5f3-47121a7882d3',
 '288c4e8a-d371-4bc1-b45b-c4b0cefd400e',
 '3fa03c96-b976-4f0f-8089-220968515ee1',
 '43b2649e-26f8-400d-bc0a-a0667e850915',
 '4412a76b-eb71-4fd3-afc9-6871a58cae5f',
 '5188df39-2bb8-4826-b469-fa9f86bacc09',
 '66a6dad0-e450-4206-88e1-f823a04f8b1d',
 '6abf9e1a-c33e-49ae-aeae-ab84cee7d2ab',
 '7346ba9f-476c-4343-9f37-d1bb174eed6d',
 '831f48fc-ca00-4534-9ede-730190b3bee0',
 '9547aff9-e1fc-5fad-a674-9b9a9fdb1c9c',
 '9c2a7dc9-8b1f-46ba-bc16-0d761a4f6016',
 'a07b8a8c-8cab-4656-a82f-310e8069e323',
 'b53d3744-3629-4219-be20-980865e54031',
 'b8c794de-ac20-47f6-ae87-84d91e95da93',
 'c4e01cfb-2f50-52d5-8177-1518ad8b7bea',
 'c5c25aa6-d630-40bd-bed7-4e718c877ef4',
 'c941d6d0-a56c-4e6c-95de-ac685635218d',
 'd07867e3-66a8-4454-babd-78dc7f9a21f8',
 'e030108f-2125-4bcb-a73b-ad72130fcca3',
 'e3043a7f-5347-4c7b-89ee-93f11b2f6d9b',
 'ec420d84-577b-402e-bd90-f8a4b2310135',
 'f3e5bff4-5bdf-55d7-8dd9-3cac7b09e57f',
 'f9c73aca-3d5c-

In [28]:
def format(ecospold: dict) -> dict:
    return {
        obj['@id']: {
            'name': obj['name']['#text'],
            'CAS number': validate_cas(obj.get('@casNumber')) if obj.get('@casNumber') else None,
            'formula': obj.get('@formula'),
            'unit': obj['unitName']['#text'],
        }
        for obj in ecospold['validElementaryExchanges']['elementaryExchange']
    }

In [29]:
source_ee = format(xmltodict.parse(open(
    Path(cs.catalogue[ei.ReleaseType.ecospold.filename(version="3.8", system_model_abbr="cutoff")]['path']) / "MasterData" / "ElementaryExchanges.xml",
    "rb"
)))
target_ee = format(xmltodict.parse(open(
    Path(cs.catalogue[ei.ReleaseType.ecospold.filename(version="3.9", system_model_abbr="cutoff")]['path']) / "MasterData" / "ElementaryExchanges.xml",
    "rb"
)))

ValueError: CAS not valid: 007727-34-7 (CAS Check Digit error: CAS '007727-34-7' has check digit of 7, but it should be 6)

In [30]:
for key_source, value_source in source_ee.items():
    if key_source not in target_ee and key_source not in affected_uuids:
        data['delete'].append({
            'source': {
                'uuid': key_source,
                'name': value_source['name']
            },
            'comment': 'Deleted flow not listed in change report'
        })
        continue
    elif key_source not in target_ee:
        # Included in change report
        continue

    diff = {key: value for key, value in target_ee[key_source].items() if value and value != value_source[key]}
    if diff:
        print(value_source, diff)
        data['update'].append({
            'source': {k, v for k, v in value_source.items() if v} | {"uuid": key_source},
            'target': diff | {"uuid": key_source},
            'comment': 'Change in attributes not listed in change report'
        })

{'name': '2-Methyl pentane', 'CAS number': '000107-83-5', 'formula': 'C6H14', 'unit': 'kg'} {'name': '2-Methylpentane'}
{'name': 'Acrylate, ion', 'CAS number': '000079-10-7', 'formula': 'C3H4O2', 'unit': 'kg'} {'name': 'Acrylate', 'CAS number': '010344-93-1', 'formula': 'C3H3O2-'}
{'name': 'Aluminium', 'CAS number': '007429-90-5', 'formula': 'Al', 'unit': 'kg'} {'name': 'Aluminium III', 'CAS number': '022537-23-1', 'formula': 'Al+3'}
{'name': 'Aluminium, 24% in bauxite, 11% in crude ore, in ground', 'CAS number': '001318-16-7', 'formula': None, 'unit': 'kg'} {'name': 'Aluminium, 24% in bauxite, 11% in crude ore', 'formula': 'Al'}
{'name': 'Ammonium, ion', 'CAS number': '014798-03-9', 'formula': 'H4N+', 'unit': 'kg'} {'name': 'Ammonium'}
{'name': 'Anhydrite, in ground', 'CAS number': '014798-04-0', 'formula': 'CaH2O4S', 'unit': 'kg'} {'name': 'Anhydrite'}
{'name': 'Antimony', 'CAS number': '007440-36-0', 'formula': 'Sb', 'unit': 'kg'} {'name': 'Antimony ion'}
{'name': 'AOX, Adsorbable O

885

In [16]:
[(k, v) for k, v in cs.catalogue.items() if v.get('archive') == ei.ReleaseType.ecospold.filename(version="3.8", system_model_abbr="cutoff")]

[('ecoinvent 3.8_cutoff_ecoSpold02',
  {'path': '/Users/cmutel/Library/Application Support/EcoinventInterface/cache/ecoinvent 3.8_cutoff_ecoSpold02',
   'archive': 'ecoinvent 3.8_cutoff_ecoSpold02.7z',
   'extracted': True,
   'created': '2023-11-06T16:14:12.572963',
   'system_model': 'cutoff',
   'version': '3.8',
   'kind': 'release'}),
 ('ecoinvent 3.8_cutoff_ecoSpold02.7z',
  {'path': '/Users/cmutel/Library/Application Support/EcoinventInterface/cache/ecoinvent 3.8_cutoff_ecoSpold02',
   'archive': 'ecoinvent 3.8_cutoff_ecoSpold02.7z',
   'extracted': True,
   'created': '2023-11-12T09:42:39.527453',
   'system_model': 'cutoff',
   'version': '3.8',
   'kind': 'release'})]

In [8]:
settings = ei.Settings()

In [9]:
ei.ReleaseType.ecospold.filename(version="3.8", system_model_abbr="cutoff")

'ecoinvent 3.8_cutoff_ecoSpold02.7z'

In [10]:
er = ei.EcoinventRelease(settings)

In [11]:
er._filename_dict(version="3.8")

{'ecoinvent 3.8_cutoff_cumulative_lcia_xlsx.7z': {'uuid': '36a30e54-b405-4e0b-b22f-f4ee4267704d',
  'size': 184779194,
  'modified': datetime.datetime(2023, 4, 25, 0, 0)},
 'ecoinvent 3.8_cutoff_cumulative_lci_xlsx.7z': {'uuid': '1520f048-2c3f-4413-bf91-dd698549db2a',
  'size': 432278088,
  'modified': datetime.datetime(2023, 4, 25, 0, 0)},
 'ecoinvent 3.8_cutoff_ecoSpold02.7z': {'uuid': '945e4e60-1e45-4ede-b432-d4db4b1b333a',
  'size': 63981644,
  'modified': datetime.datetime(2023, 4, 25, 0, 0)},
 'ecoinvent 3.8_cutoff_lcia_ecoSpold02.7z': {'uuid': '193c0d61-0d58-4911-97f8-ed8edd9ccfcb',
  'size': 225879056,
  'modified': datetime.datetime(2023, 4, 25, 0, 0)},
 'ecoinvent 3.8_cutoff_lci_ecoSpold02.7z': {'uuid': 'e483b611-857b-419b-9187-2dd00e89b451',
  'size': 1105433422,
  'modified': datetime.datetime(2023, 4, 25, 0, 0)},
 'universal_matrix_export_3.8_cut-off.7z': {'uuid': 'c66536fb-287e-40fc-8579-cb5438129307',
  'size': 4720543,
  'modified': datetime.datetime(2023, 4, 25, 0, 0)}