# 0.1. imports

In [37]:
# i/o
import sys
import os
from pathlib import Path
import gzip
import pickle
# configuration
import yaml
# data science
import pandas as pd
import copy
# lca
import pymrio
import ecospold2matrix as e2m
# type hints
from pymrio import IOSystem
from pathlib import PosixPath

# 0.2. file paths
## 0.2.1. directories

In [7]:
%%capture
print(path_dir_data := Path(Path.home() / 'data'))
print(path_dir_data_raw := Path(path_dir_data / 'data_raw'))
print(path_dir_data_processed := Path(path_dir_data / 'data_processed'))

In [25]:
print(path_dir_exiobase := path_dir_data_raw / 'exiobase_3_8')
print(path_dir_ecoinvent := path_dir_data_raw / 'ecoinvent_3.8_cutoff_ecoSpold02')

/Users/michaelweinold/data/data_raw/exiobase_3_8
/Users/michaelweinold/data/data_raw/ecoinvent_3.8_cutoff_ecoSpold02


## 0.2.2. files

### ℹ️ prerequisites



1. `Ecoinvent` data present in directory `path_dir_data_raw`
2. `Exiobase` data present in directory `path_dir_data_raw` or internet connection for automatic download from zenodo.org

# 1. data preparation

## 1.0. variables 

In [33]:
str_exiobase_system: str = 'pxp'
str_exiobase_year: str = '2011'
str_exiobase_zip_file: str = 'IOT_' + str_exiobase_year + '_' + str_exiobase_system + '.zip'

## 1.1. download `Exiobase` from zenodo.org 

In [31]:
if path_dir_exiobase.exists():
    print('Exiobase data already present.')
    pass
else:
    pymrio.download_exiobase3(
        storage_folder = path_dir_exiobase,
        system = str_exiobase_system,
        years = str_exiobase_year
    )

Exiobase data already present.


## 1.2. check if `Ecoinvent` data is present

In [28]:
assert path_dir_ecoinvent.exists(), 'Ecoinvent data not found.'

# 2. parse databases
## 2.1. parse `Exiobase`

In [35]:
exiobase: pymrio.IOSystem = pymrio.parse_exiobase3(path_dir_exiobase / str_exiobase_zip_file)

## 2.2. parse `Ecoinvent`

In [38]:
parser = e2m.Ecospold2Matrix(
    sys_dir = str(path_dir_ecoinvent), # passing a Posix Path object breaks the functionality
    project_name = 'ecoinvent_3_8',
    out_dir = path_dir_data_processed,
    positive_waste = False,
    nan2null = True
)
parser.save_interm = False
parser.prefer_pickles = True

2023-01-12 12:17:13,932 - ecoinvent_3_8 - INFO - Ecospold2Matrix Processing
INFO:ecoinvent_3_8:Ecospold2Matrix Processing
2023-01-12 12:17:14,029 - ecoinvent_3_8 - INFO - Current git commit: d167650d60bc9a54ebcf80744e72d8e6d8d3ece3
INFO:ecoinvent_3_8:Current git commit: d167650d60bc9a54ebcf80744e72d8e6d8d3ece3
2023-01-12 12:17:14,031 - ecoinvent_3_8 - INFO - Project name: ecoinvent_3_8
INFO:ecoinvent_3_8:Project name: ecoinvent_3_8
2023-01-12 12:17:14,032 - ecoinvent_3_8 - INFO - Unit process and Master data directory: /Users/michaelweinold/data/data_raw/ecoinvent_3.8_cutoff_ecoSpold02
INFO:ecoinvent_3_8:Unit process and Master data directory: /Users/michaelweinold/data/data_raw/ecoinvent_3.8_cutoff_ecoSpold02
2023-01-12 12:17:14,032 - ecoinvent_3_8 - INFO - Data saved in: /Users/michaelweinold/data/data_processed
INFO:ecoinvent_3_8:Data saved in: /Users/michaelweinold/data/data_processed
2023-01-12 12:17:14,033 - ecoinvent_3_8 - INFO - Replace Not-a-Number instances with 0.0 in all ma

In [39]:
parser.ecospold_to_Leontief(
    fileformats = 'Pandas',
    with_absolute_flows = True
)

2023-01-12 12:17:58,799 - ecoinvent_3_8 - INFO - Products extracted from IntermediateExchanges.xml with SHA-1 of 1da23bc8fd24d97422a2a21ba3626d2cdfa6a428
INFO:ecoinvent_3_8:Products extracted from IntermediateExchanges.xml with SHA-1 of 1da23bc8fd24d97422a2a21ba3626d2cdfa6a428
2023-01-12 12:18:24,690 - ecoinvent_3_8 - INFO - Activities extracted from ActivityIndex.xml with SHA-1 of 03403c01ac6f74a5d6cc5ca8820593f7e516b709
INFO:ecoinvent_3_8:Activities extracted from ActivityIndex.xml with SHA-1 of 03403c01ac6f74a5d6cc5ca8820593f7e516b709
2023-01-12 12:18:24,866 - ecoinvent_3_8 - INFO - Processing 19565 files in /Users/michaelweinold/data/data_raw/ecoinvent_3.8_cutoff_ecoSpold02/datasets
INFO:ecoinvent_3_8:Processing 19565 files in /Users/michaelweinold/data/data_raw/ecoinvent_3.8_cutoff_ecoSpold02/datasets
2023-01-12 12:19:38,336 - ecoinvent_3_8 - INFO - Processing 19565 files - this may take a while ...
INFO:ecoinvent_3_8:Processing 19565 files - this may take a while ...
2023-01-12 1

In [40]:
Path.unlink(Path.cwd() / ('ecoinvent_3_8' + '_characterisation.db'), missing_ok = True)
Path.unlink(Path.cwd() / 'C_long', missing_ok = True)

# 3. `pylcaio` import