### ✅ prerequisites

 - [ ] conda environment `pylcaio_ecoinvent_3_8` created with [`pylcaio_ecoinvent_3_8.yml`](https://github.com/michaelweinold/config_conda/blob/main/bw_import_ei38.yml)

# 0.1. imports

In [1]:
# i/o
import sys
import os
from pathlib import Path
import gzip
import pickle
# git
import git
# configuration
import yaml
# data science
import pandas as pd
import copy
# lca
import pymrio
import ecospold2matrix as e2m
# type hints
from pymrio import IOSystem
from pathlib import PosixPath

# 0.2. file paths
## 0.2.1. directories

### ✅ prerequisites

- [ ] Ecoinvent database in directory names `ecoinvent_3.8_cutoff_ecoSpold02` in raw data directory (`path_dir_data_raw`) 
- [ ] Ecoinvent characterization file `LCIA Implementation v3.8.xlsx` in the Ecoinvent directory

In [2]:
%%capture
print(path_dir_data := Path(Path.home() / 'data'))
print(path_dir_data_raw := Path(path_dir_data / 'data_raw'))
print(path_dir_data_processed := Path(path_dir_data / 'data_processed'))

In [3]:
%%capture
print(path_dir_repo_pylcaio_parent := Path(Path.home() / 'github'))
print(path_dir_repo_pylcaio_src := Path(path_dir_repo_pylcaio_parent / 'pylcaio/src'))

In [4]:
%%capture
print(path_dir_exiobase_raw := path_dir_data_raw / 'exiobase_3_8')
print(path_dir_ecoinvent_raw := path_dir_data_raw / 'ecoinvent_3.8_cutoff_ecoSpold02')

## 0.2.2. files

In [5]:
%%capture
print(path_file_exiobase_processed := path_dir_data_processed / 'exiobase_3_8.pickle')
print(path_file_ecoinvent_processed := path_dir_data_processed / 'ecoinvent_3_8Pandas_symmNorm.gz.pickle')

In [6]:
%%capture
print(path_file_hybrid_system := Path(path_dir_data_processed / 'hybrid.pickle'))

In [7]:
%%capture
print(path_file_ecoinvent_LCIA_implementation := str(path_dir_ecoinvent_raw / 'LCIA Implementation v3.8.xlsx'))

### ℹ️ prerequisites

1. `Ecoinvent` data present in directory `path_dir_data_raw`
2. `Exiobase` data present in directory `path_dir_data_raw` or internet connection for automatic download from zenodo.org

# 1. data preparation

## 1.0. variables 

In [8]:
str_exiobase_system: str = 'pxp'
str_exiobase_year: str = '2011'
str_exiobase_zip_file: str = 'IOT_' + str_exiobase_year + '_' + str_exiobase_system + '.zip'

## 1.1. download `Exiobase` from zenodo.org 

In [9]:
if path_dir_exiobase_raw.exists():
    pass
else:
    pymrio.download_exiobase3(
        storage_folder = path_dir_exiobase_raw,
        system = str_exiobase_system,
        years = str_exiobase_year
    )

## 1.2. check if `Ecoinvent` data is present

In [10]:
assert path_dir_ecoinvent_raw.exists(), 'Ecoinvent data not found.'

# 2. parse databases (needs to be run only once)
## 2.1. parse `Exiobase`

⏳ ~1.5 min on MacBook Pro

In [12]:
exiobase: pymrio.IOSystem = pymrio.parse_exiobase3(path_dir_exiobase_raw / str_exiobase_zip_file)
with open(path_file_exiobase_processed, 'wb') as file_handle:
    pickle.dump(obj = exiobase, file = file_handle, protocol=pickle.HIGHEST_PROTOCOL)

## 2.2. parse `Ecoinvent`

⏳ ~15 min on MacBook Pro

In [13]:
parser = e2m.Ecospold2Matrix(
    sys_dir = str(path_dir_ecoinvent_raw), # passing a Posix Path object breaks the functionality
    project_name = 'ecoinvent_3_8',
    out_dir = path_dir_data_processed,
    characterisation_file = path_file_ecoinvent_LCIA_implementation,
    positive_waste = False,
    nan2null = True
)
parser.save_interm = False
parser.prefer_pickles = True

2023-01-16 09:51:35,170 - ecoinvent_3_8 - INFO - Ecospold2Matrix Processing
INFO:ecoinvent_3_8:Ecospold2Matrix Processing
2023-01-16 09:51:35,197 - ecoinvent_3_8 - INFO - Current git commit: 104a593aca2a8315e39eebd43e7722070f9cc584
INFO:ecoinvent_3_8:Current git commit: 104a593aca2a8315e39eebd43e7722070f9cc584
2023-01-16 09:51:35,199 - ecoinvent_3_8 - INFO - Project name: ecoinvent_3_8
INFO:ecoinvent_3_8:Project name: ecoinvent_3_8
2023-01-16 09:51:35,199 - ecoinvent_3_8 - INFO - Unit process and Master data directory: /Users/michaelweinold/data/data_raw/ecoinvent_3.8_cutoff_ecoSpold02
INFO:ecoinvent_3_8:Unit process and Master data directory: /Users/michaelweinold/data/data_raw/ecoinvent_3.8_cutoff_ecoSpold02
2023-01-16 09:51:35,200 - ecoinvent_3_8 - INFO - Data saved in: /Users/michaelweinold/data/data_processed
INFO:ecoinvent_3_8:Data saved in: /Users/michaelweinold/data/data_processed
2023-01-16 09:51:35,201 - ecoinvent_3_8 - INFO - Replace Not-a-Number instances with 0.0 in all ma

In [14]:
parser.ecospold_to_Leontief(
    fileformats = 'Pandas',
    with_absolute_flows = True
)

2023-01-16 09:51:41,699 - ecoinvent_3_8 - INFO - Products extracted from IntermediateExchanges.xml with SHA-1 of 1da23bc8fd24d97422a2a21ba3626d2cdfa6a428
INFO:ecoinvent_3_8:Products extracted from IntermediateExchanges.xml with SHA-1 of 1da23bc8fd24d97422a2a21ba3626d2cdfa6a428
2023-01-16 09:52:05,727 - ecoinvent_3_8 - INFO - Activities extracted from ActivityIndex.xml with SHA-1 of 03403c01ac6f74a5d6cc5ca8820593f7e516b709
INFO:ecoinvent_3_8:Activities extracted from ActivityIndex.xml with SHA-1 of 03403c01ac6f74a5d6cc5ca8820593f7e516b709
2023-01-16 09:52:05,857 - ecoinvent_3_8 - INFO - Processing 19565 files in /Users/michaelweinold/data/data_raw/ecoinvent_3.8_cutoff_ecoSpold02/datasets
INFO:ecoinvent_3_8:Processing 19565 files in /Users/michaelweinold/data/data_raw/ecoinvent_3.8_cutoff_ecoSpold02/datasets
2023-01-16 09:53:16,845 - ecoinvent_3_8 - INFO - Processing 19565 files - this may take a while ...
INFO:ecoinvent_3_8:Processing 19565 files - this may take a while ...
2023-01-16 0

starting characterisation


2023-01-16 09:55:20,283 - ecoinvent_3_8 - INFO - Starting characterisation matching
INFO:ecoinvent_3_8:Starting characterisation matching
2023-01-16 09:55:24,324 - ecoinvent_3_8 - INFO - Characterisation matching done. C matrix created
INFO:ecoinvent_3_8:Characterisation matching done. C matrix created
2023-01-16 09:55:24,375 - ecoinvent_3_8 - INFO - Starting to export to file
INFO:ecoinvent_3_8:Starting to export to file
2023-01-16 09:55:24,375 - ecoinvent_3_8 - INFO - about to write to file
INFO:ecoinvent_3_8:about to write to file
2023-01-16 10:00:19,401 - ecoinvent_3_8 - INFO - Final, symmetric, normalized matrices saved in /Users/michaelweinold/data/data_processed/ecoinvent_3_8Pandas_symmNorm.gz.pickle with SHA-1 of 4c3f0a4025e9920d7b6b508bd85a35fca5974a95
INFO:ecoinvent_3_8:Final, symmetric, normalized matrices saved in /Users/michaelweinold/data/data_processed/ecoinvent_3_8Pandas_symmNorm.gz.pickle with SHA-1 of 4c3f0a4025e9920d7b6b508bd85a35fca5974a95
2023-01-16 10:05:18,678 - 

### 2.2.1. remove temporary files

In [15]:
Path.unlink(Path.cwd() / ('ecoinvent_3_8' + '_characterisation.db'), missing_ok = True)
Path.unlink(Path.cwd() / 'C_long', missing_ok = True)

# 3. load databases

In [14]:
exiobase: IOSystem = pd.read_pickle(path_file_exiobase_processed)
with gzip.open(path_file_ecoinvent_processed,'rb') as f:
    ecoinvent = pd.read_pickle(f)

# 4. `pylcaio`
## 4.1. `pylcaio` import

⏳ ~5 min on MacBook Pro

In [8]:
if path_dir_repo_pylcaio_src.exists():
    pass
else:
    git.Git(path_dir_repo_pylcaio_parent).clone("https://github.com/michaelweinold/pylcaio.git")

In [9]:
sys.path.append(str(path_dir_repo_pylcaio_src))
import pylcaio 

In [11]:
database_loader: pylcaio.DatabaseLoader  = pylcaio.DatabaseLoader(
    lca_database_processed = ecoinvent,
    io_database_processed = exiobase,
    lca_database_name_and_version = 'ecoinvent3.8',
    io_database_name_and_version = 'exiobase3'
)

NameError: name 'ecoinvent' is not defined

In [18]:
lcaio_object: pylcaio.LCAIO = database_loader.combine_ecoinvent_exiobase(
    complete_extensions = False,
    impact_world = True,
    regionalized = False
)

No path for the capital folder was provided. Capitals will not be endogenized


In [19]:
lcaio_object.hybridize(
    price_neutral_cut_off_matrix = False,
    method_double_counting = 'STAM',
    capitals = False,
    priceless_scaling = True
)

Indentifying Rest of World regions...
Updating electricity prices...
Calculating productions volumes...
Adjusting low production volume processes...
Extending inventory...
Building H matrix...


  self.H = self.H.append([self.H] * (self.number_of_countries_IO + self.number_of_RoW_IO - 1))


Building geography concordance...
Filter H matrix...
Build Cut-off matrix...
Add processes with 'priceless scaling' to Cut-off matrix...


  self.G = self.G.append([self.G] * (self.number_of_countries_IO + self.number_of_RoW_IO - 1))


In [20]:
lcaio_object.save_system(
    file_name = 'hybrid_iwp.pickle',
    file_path = path_dir_data_processed,
    format = 'pickle'
)

Database saved to /Users/michaelweinold/data/data_processed/hybrid_iwp.pickle
Description file saved to /Users/michaelweinold/data/data_processed/description_hybrid_iw.txt


## 3.2. `pylcaio` export to `brightway`

In [10]:
lcaio_analysis: pylcaio.Analysis = pylcaio.Analysis('/Users/michaelweinold/data/data_processed/hybrid_iwp.pickle')
eidatasets = str(Path(path_dir_ecoinvent_raw / 'datasets')) # needs to be str, passing a Posix Path object breaks the functionality

In [11]:
lcaio_analysis.export_to_brightway2(
    bw2_project_name = 'test_2',
    created_database_name = 'hybrid_test_2',
    path_to_ecoinvent_ecospold_datasets = '/Users/michaelweinold/data/data_raw/ecoinvent_3.8_cutoff_ecoSpold02/datasets',
    aggregated = False
)

Biosphere database already present!!! No setup is needed
Extracting XML data from 19565 datasets
Extracted 19565 datasets in 15.91 seconds
Applying strategy: normalize_units
Applying strategy: update_ecoinvent_locations
Applying strategy: remove_zero_amount_coproducts
Applying strategy: remove_zero_amount_inputs_with_no_activity
Applying strategy: remove_unnamed_parameters
Applying strategy: es2_assign_only_product_with_amount_as_reference_product
Applying strategy: assign_single_product_as_activity
Applying strategy: create_composite_code
Applying strategy: drop_unspecified_subcategories
Applying strategy: fix_ecoinvent_flows_pre35
Applying strategy: drop_temporary_outdated_biosphere_flows
Applying strategy: link_biosphere_by_flow_uuid
Applying strategy: link_internal_technosphere_by_composite_code
Applying strategy: delete_exchanges_missing_activity
Applying strategy: delete_ghost_exchanges
Applying strategy: remove_uncertainty_from_negative_loss_exchanges
Applying strategy: fix_unre

# 5. investigation

In [12]:
hybrid

NameError: name 'hybrid' is not defined

In [25]:
import bw2data
import bw2io

In [27]:
[*bw2data.methods][-20:-1]

[('IMPACTWorld+ (Default_Recommended_Damage 1.46)',
  'Water availability, human health'),
 ('IMPACTWorld+ (Default_Recommended_Damage 1.46)',
  'Water availability, freshwater ecosystem'),
 ('IMPACTWorld+ (Default_Recommended_Damage 1.46)',
  'Water availability, terrestrial ecosystem'),
 ('IMPACTWorld+ (Default_Recommended_Damage 1.46)',
  'Thermally polluted water'),
 ('IMPACTWorld+ (Default_Recommended_Damage 1.46)',
  'Photochemical oxidant formation'),
 ('IMPACTWorld+ (Default_Recommended_Damage 1.46)',
  'Ionizing radiation, human health'),
 ('IMPACTWorld+ (Default_Recommended_Damage 1.46)', 'Ozone layer depletion'),
 ('IMPACTWorld+ (Default_Recommended_Damage 1.46)',
  'Freshwater ecotoxicity, short term'),
 ('IMPACTWorld+ (Default_Recommended_Damage 1.46)',
  'Freshwater ecotoxicity, long term'),
 ('IMPACTWorld+ (Default_Recommended_Damage 1.46)',
  'Human toxicity cancer, short term'),
 ('IMPACTWorld+ (Default_Recommended_Damage 1.46)',
  'Human toxicity cancer, long term'),


In [29]:
# export bw2data.methods to pickle for investigation
bw2data_methods = bw2data.methods
with open('/Users/michaelweinold/github/bw_hybrid/notebooks/bw2data_methods.pickle', 'wb') as file_handle:
    pickle.dump(obj = bw2data_methods, file = file_handle, protocol=pickle.HIGHEST_PROTOCOL)

In [28]:
lcaio_analysis.impact_categories_IO[1:5]

[('Problem oriented approach: baseline (CML, 2001)',
  'abiotic depletion (fossil fuels)',
  'ADPfossil fuels (Oers et al., 2001)',
  'MJ'),
 ('Problem oriented approach: non baseline (CML, 2001)',
  'abiotic depletion (elements, reserve base)',
  'ADPelements (Oers et al. 2001)',
  'kg antimony eq.'),
 ('Problem oriented approach: non baseline (CML, 2001)',
  'abiotic depletion (elements, economic reserve)',
  'ADPelements (Oers et al. 2001)',
  'kg antimony eq.'),
 ('Problem oriented approach: non baseline (CML, 2001)',
  'Landuse increase of land competition',
  'LUC (Guinee et al, 2001)',
  'm2*yr')]