# 0.1. imports

In [1]:
# i/o
import sys
import os
from pathlib import Path
import gzip
import pickle
# git
import git
# configuration
import yaml
# data science
import pandas as pd
import copy
# lca
import pymrio
import ecospold2matrix as e2m
# type hints
from pymrio import IOSystem
from pathlib import PosixPath

# 0.2. file paths
## 0.2.1. directories

In [2]:
%%capture
print(path_dir_data := Path(Path.home() / 'data'))
print(path_dir_data_raw := Path(path_dir_data / 'data_raw'))
print(path_dir_data_processed := Path(path_dir_data / 'data_processed'))

In [3]:
%%capture
print(path_dir_repo_pylcaio_parent := Path(Path.home() / 'github'))
print(path_dir_repo_pylcaio_src := Path(path_dir_repo_pylcaio_parent / 'pylcaio/src'))

In [4]:
%%capture
print(path_dir_exiobase := path_dir_data_raw / 'exiobase_3_8')
print(path_dir_ecoinvent := path_dir_data_raw / 'ecoinvent_3.8_cutoff_ecoSpold02')

## 0.2.2. files

In [5]:
%%capture
print(path_file_ecoinvent_LCIA_implementation := str(path_dir_ecoinvent / 'LCIA Implementation v3.8.xlsx'))

### ℹ️ prerequisites

1. `Ecoinvent` data present in directory `path_dir_data_raw`
2. `Exiobase` data present in directory `path_dir_data_raw` or internet connection for automatic download from zenodo.org

# 1. data preparation

## 1.0. variables 

In [6]:
str_exiobase_system: str = 'pxp'
str_exiobase_year: str = '2011'
str_exiobase_zip_file: str = 'IOT_' + str_exiobase_year + '_' + str_exiobase_system + '.zip'

## 1.1. download `Exiobase` from zenodo.org 

In [7]:
if path_dir_exiobase.exists():
    pass
else:
    pymrio.download_exiobase3(
        storage_folder = path_dir_exiobase,
        system = str_exiobase_system,
        years = str_exiobase_year
    )

## 1.2. check if `Ecoinvent` data is present

In [8]:
assert path_dir_ecoinvent.exists(), 'Ecoinvent data not found.'

# 2. parse databases
## 2.1. parse `Exiobase`

In [9]:
exiobase: pymrio.IOSystem = pymrio.parse_exiobase3(path_dir_exiobase / str_exiobase_zip_file)

## 2.2. parse `Ecoinvent`

In [83]:
parser = e2m.Ecospold2Matrix(
    sys_dir = str(path_dir_ecoinvent), # passing a Posix Path object breaks the functionality
    project_name = 'ecoinvent_3_8',
    out_dir = path_dir_data_processed,
    characterisation_file = path_file_ecoinvent_LCIA_implementation,
    positive_waste = False,
    nan2null = True
)
parser.save_interm = False
parser.prefer_pickles = True

2023-01-12 16:06:26,010 - ecoinvent_3_8 - INFO - Ecospold2Matrix Processing
INFO:ecoinvent_3_8:Ecospold2Matrix Processing
2023-01-12 16:06:26,143 - ecoinvent_3_8 - INFO - Current git commit: 7446636687ac9de51a2c39d0f8557bd8e23413cd
INFO:ecoinvent_3_8:Current git commit: 7446636687ac9de51a2c39d0f8557bd8e23413cd
2023-01-12 16:06:26,145 - ecoinvent_3_8 - INFO - Project name: ecoinvent_3_8
INFO:ecoinvent_3_8:Project name: ecoinvent_3_8
2023-01-12 16:06:26,146 - ecoinvent_3_8 - INFO - Unit process and Master data directory: /Users/michaelweinold/data/data_raw/ecoinvent_3.8_cutoff_ecoSpold02
INFO:ecoinvent_3_8:Unit process and Master data directory: /Users/michaelweinold/data/data_raw/ecoinvent_3.8_cutoff_ecoSpold02
2023-01-12 16:06:26,146 - ecoinvent_3_8 - INFO - Data saved in: /Users/michaelweinold/data/data_processed
INFO:ecoinvent_3_8:Data saved in: /Users/michaelweinold/data/data_processed
2023-01-12 16:06:26,147 - ecoinvent_3_8 - INFO - Replace Not-a-Number instances with 0.0 in all ma

In [84]:
parser.ecospold_to_Leontief(
    fileformats = 'Pandas',
    with_absolute_flows = True
)

2023-01-12 16:06:27,126 - ecoinvent_3_8 - INFO - Products extracted from IntermediateExchanges.xml with SHA-1 of 1da23bc8fd24d97422a2a21ba3626d2cdfa6a428
INFO:ecoinvent_3_8:Products extracted from IntermediateExchanges.xml with SHA-1 of 1da23bc8fd24d97422a2a21ba3626d2cdfa6a428
2023-01-12 16:07:02,493 - ecoinvent_3_8 - INFO - Activities extracted from ActivityIndex.xml with SHA-1 of 03403c01ac6f74a5d6cc5ca8820593f7e516b709
INFO:ecoinvent_3_8:Activities extracted from ActivityIndex.xml with SHA-1 of 03403c01ac6f74a5d6cc5ca8820593f7e516b709
2023-01-12 16:07:02,666 - ecoinvent_3_8 - INFO - Processing 19565 files in /Users/michaelweinold/data/data_raw/ecoinvent_3.8_cutoff_ecoSpold02/datasets
INFO:ecoinvent_3_8:Processing 19565 files in /Users/michaelweinold/data/data_raw/ecoinvent_3.8_cutoff_ecoSpold02/datasets
2023-01-12 16:08:26,728 - ecoinvent_3_8 - INFO - Processing 19565 files - this may take a while ...
INFO:ecoinvent_3_8:Processing 19565 files - this may take a while ...
2023-01-12 1

starting characterisation


2023-01-12 16:17:59,356 - ecoinvent_3_8 - INFO - Starting characterisation matching
INFO:ecoinvent_3_8:Starting characterisation matching
2023-01-12 16:18:03,956 - ecoinvent_3_8 - INFO - Characterisation matching done. C matrix created
INFO:ecoinvent_3_8:Characterisation matching done. C matrix created
2023-01-12 16:18:03,956 - ecoinvent_3_8 - INFO - Starting to export to file
INFO:ecoinvent_3_8:Starting to export to file
2023-01-12 16:18:03,957 - ecoinvent_3_8 - INFO - about to write to file
INFO:ecoinvent_3_8:about to write to file
2023-01-12 16:23:01,175 - ecoinvent_3_8 - INFO - Final, symmetric, normalized matrices saved in /Users/michaelweinold/data/data_processed/ecoinvent_3_8Pandas_symmNorm.gz.pickle with SHA-1 of a39b4e71f0e953ba18b995c33619865b62f679a5
INFO:ecoinvent_3_8:Final, symmetric, normalized matrices saved in /Users/michaelweinold/data/data_processed/ecoinvent_3_8Pandas_symmNorm.gz.pickle with SHA-1 of a39b4e71f0e953ba18b995c33619865b62f679a5
2023-01-12 16:28:01,666 - 

In [85]:
Path.unlink(Path.cwd() / ('ecoinvent_3_8' + '_characterisation.db'), missing_ok = True)
Path.unlink(Path.cwd() / 'C_long', missing_ok = True)

# 3. `pylcaio`
## 3.1. `pylcaio` import

In [9]:
if path_dir_repo_pylcaio_src.exists():
    pass
else:
    git.Git(path_dir_repo_pylcaio_parent).clone("https://github.com/michaelweinold/pylcaio.git")

In [11]:
sys.path.append(str(path_dir_repo_pylcaio_src))
import pylcaio 

In [11]:
with gzip.open('/Users/michaelweinold/data/data_processed/ecoinvent_3_8Pandas_symmNorm.gz.pickle','rb') as f:
    ecoinvent = pd.read_pickle(f)

In [14]:
database_loader: pylcaio.DatabaseLoader  = pylcaio.DatabaseLoader(
    lca_database_processed = ecoinvent,
    io_database_processed = exiobase,
    lca_database_name_and_version = 'ecoinvent3.8',
    io_database_name_and_version = 'exiobase3'
)

In [15]:
lcaio_object: pylcaio.LCAIO = database_loader.combine_ecoinvent_exiobase(
    complete_extensions = False,
    impact_world = False,
    regionalized = False
)

No path for the capital folder was provided. Capitals will not be endogenized


In [16]:
lcaio_object.hybridize(
    price_neutral_cut_off_matrix = False,
    method_double_counting = 'STAM',
    capitals = False,
    priceless_scaling = True
)

Indentifying Rest of World regions...
Updating electricity prices...
Calculating productions volumes...
Adjusting low production volume processes...
Extending inventory...
Building H matrix...


  self.H = self.H.append([self.H] * (self.number_of_countries_IO + self.number_of_RoW_IO - 1))


Building geography concordance...
Filter H matrix...
Build Cut-off matrix...
Add processes with 'priceless scaling' to Cut-off matrix...


  self.G = self.G.append([self.G] * (self.number_of_countries_IO + self.number_of_RoW_IO - 1))


In [19]:
lcaio_object.save_system(
    file_name = 'hybrid.pickle',
    file_path = path_dir_data_processed,
    format = 'pickle'
)

Database saved to /Users/michaelweinold/data/data_processed/hybrid.pickle
Description file saved to /Users/michaelweinold/data/data_processed/description_hybrid.txt


In [20]:
with gzip.open('/Users/michaelweinold/data/data_processed/hybrid.pickle','rb') as f:
    hybrid_pickle = pd.read_pickle(f)

In [12]:
lcaio_analysis: pylcaio.Analysis = pylcaio.Analysis('/Users/michaelweinold/data/data_processed/hybrid.pickle')

In [13]:
eidatasets = str(Path(path_dir_ecoinvent / 'datasets'))

In [14]:
lcaio_analysis.export_to_brightway2(
    bw2_project_name = 'test2',
    created_database_name = 'hybrid_test2',
    path_to_ecoinvent_ecospold_datasets = eidatasets,
    aggregated = False
)

Biosphere database already present!!! No setup is needed
Extracting XML data from 19565 datasets
Extracted 19565 datasets in 17.73 seconds
Applying strategy: normalize_units
Applying strategy: update_ecoinvent_locations
Applying strategy: remove_zero_amount_coproducts
Applying strategy: remove_zero_amount_inputs_with_no_activity
Applying strategy: remove_unnamed_parameters
Applying strategy: es2_assign_only_product_with_amount_as_reference_product
Applying strategy: assign_single_product_as_activity
Applying strategy: create_composite_code
Applying strategy: drop_unspecified_subcategories
Applying strategy: fix_ecoinvent_flows_pre35
Applying strategy: drop_temporary_outdated_biosphere_flows
Applying strategy: link_biosphere_by_flow_uuid
Applying strategy: link_internal_technosphere_by_composite_code
Applying strategy: delete_exchanges_missing_activity
Applying strategy: delete_ghost_exchanges
Applying strategy: remove_uncertainty_from_negative_loss_exchanges
Applying strategy: fix_unre

AttributeError: 'tuple' object has no attribute 'split'

In [16]:
import bw2data

In [53]:
type(bw2data.methods.get('CML 2001 (superseded)'))

NoneType

In [62]:
for i in bw2data.methods:
    #print(i)

SyntaxError: incomplete input (520947138.py, line 2)

In [23]:
impact_categories: list = lcaio_analysis.impact_categories_IO
methods: list = list(bw2data.methods)

In [32]:
dict_impact_categories = dict.fromkeys(impact_categories)

In [39]:
str(impact_categories[0]).split(' (')[0]

"('Problem oriented approach: baseline"

In [63]:
search_term: str = 'IMPACTWorld+'
for tup in bw2data.methods:
    if search_term in tup:
        print(tup)

In [70]:
bw2data.methods

Methods dictionary with 1020 objects, including:
	('CML 2001 (superseded)', 'acidification potential', 'average European')
	('CML 2001 (superseded)', 'acidification potential', 'generic')
	('CML 2001 (superseded)', 'climate change', 'GWP 100a')
	('CML 2001 (superseded)', 'climate change', 'GWP 20a')
	('CML 2001 (superseded)', 'climate change', 'GWP 500a')
	('CML 2001 (superseded)', 'climate change', 'lower limit of net GWP')
	('CML 2001 (superseded)', 'climate change', 'upper limit of net GWP')
	('CML 2001 (superseded)', 'eutrophication potential', 'average European')
	('CML 2001 (superseded)', 'eutrophication potential', 'generic')
	('CML 2001 (superseded)', 'freshwater aquatic ecotoxicity', 'FAETP 100a')
Use `list(this object)` to get the complete list.

In [68]:
list(bw2data.methods)[-30:-1]

[('IMPACTWorld+ (Default_Recommended_Midpoint 1.28)',
  'Land occupation, biodiversity'),
 ('IMPACTWorld+ (Default_Recommended_Midpoint 1.28)',
  'Particulate matter formation'),
 ('IMPACTWorld+ (Default_Recommended_Midpoint 1.28)', 'Ionizing radiations'),
 ('IMPACTWorld+ (Default_Recommended_Damage 1.46)',
  'Climate change, ecosystem quality, short term'),
 ('IMPACTWorld+ (Default_Recommended_Damage 1.46)',
  'Climate change, ecosystem quality, long term'),
 ('IMPACTWorld+ (Default_Recommended_Damage 1.46)',
  'Climate change, human health, short term'),
 ('IMPACTWorld+ (Default_Recommended_Damage 1.46)',
  'Climate change, human health, long term'),
 ('IMPACTWorld+ (Default_Recommended_Damage 1.46)',
  'Marine acidification, short term'),
 ('IMPACTWorld+ (Default_Recommended_Damage 1.46)',
  'Marine acidification, long term'),
 ('IMPACTWorld+ (Default_Recommended_Damage 1.46)',
  'Land occupation, biodiversity'),
 ('IMPACTWorld+ (Default_Recommended_Damage 1.46)',
  'Water availabil

In [69]:
bw2data.methods.get('IMPACTWorld+ (Default_Recommended_Damage 1.46)')

In [50]:
for IW_category in IW_pylcaio_to_bw2:
    if 'PDF' in IW_category or 'DALY' in IW_category:
        print(str(IW_category).split( ' (' )[0])

('ECOINDICATOR 99
('ECOINDICATOR 99
('ECOINDICATOR 99
('ECOINDICATOR 99
('ECOINDICATOR 99
('ECOINDICATOR 99
('ECOINDICATOR 99
('ECOINDICATOR 99
('ECOINDICATOR 99
('ECOINDICATOR 99
('ECOINDICATOR 99
('ECOINDICATOR 99
('ECOINDICATOR 99
('ECOINDICATOR 99
('ECOINDICATOR 99
('ECOINDICATOR 99
('ECOINDICATOR 99
('ECOINDICATOR 99


In [45]:
IW_pylcaio_to_bw2 = dict.fromkeys(impact_categories)
for IW_category in IW_pylcaio_to_bw2:
    if 'PDF' in IW_category or 'DALY' in IW_category:
        IW_pylcaio_to_bw2[IW_category] = \
        bw2data.methods.get(('IMPACTWorld+ (Default_Recommended_Damage 1.46)', str(IW_category).split( ' (' )[0] ) )[
            'abbreviation']
    else:
        IW_pylcaio_to_bw2[IW_category] = \
        bw2data.methods.get(('IMPACTWorld+ (Default_Recommended_Midpoint 1.28)', str(IW_category).split(' (')[0]))[
            'abbreviation']
aggregated_results.index = list(IW_pylcaio_to_bw2.values())

TypeError: 'NoneType' object is not subscriptable

In [11]:
lcaio_analysis.F_io

<1113x9800 sparse matrix of type '<class 'numpy.float64'>'
	with 1852282 stored elements in Compressed Sparse Row format>

In [12]:
lcaio_analysis.A_io_f

In [None]:
        for IW_category in IW_pylcaio_to_bw2:

In [28]:
from bw2data import methods

In [27]:
IW_pylcaio_to_bw2 = dict.fromkeys(lcaio_object.a
impact_categories_IO)

AttributeError: 'LCAIO' object has no attribute 'impact_categories_IO'

In [17]:
lcaio_object.A_io_f_uncorrected

<9800x19565 sparse matrix of type '<class 'numpy.float64'>'
	with 34068818 stored elements in Compressed Sparse Row format>

In [36]:
[*lcaio_analysis.impact_categories_IO][0:3]

[('Problem oriented approach: baseline (CML, 2001)',
  'abiotic depletion (elements, ultimate ultimate reserves)',
  'ADPelements (Oers et al. 2001)',
  'kg antimony eq.'),
 ('Problem oriented approach: baseline (CML, 2001)',
  'abiotic depletion (fossil fuels)',
  'ADPfossil fuels (Oers et al., 2001)',
  'MJ'),
 ('Problem oriented approach: non baseline (CML, 2001)',
  'abiotic depletion (elements, reserve base)',
  'ADPelements (Oers et al. 2001)',
  'kg antimony eq.')]

In [None]:
IW_pylcaio_to_bw2[IW_category] = methods.get(('IMPACTWorld+ (Default_Recommended_Midpoint 1.28)', IW_category.split(' (')[0]))[
                    'abbreviation']