### 0.0 follow setup instructions

ℹ️ use [`pylcaio.yml`](https://github.com/michaelweinold/config_conda/blob/main/pylcaio.yml) to set up working conda environment.

### 0.1. imports
#### 0.1.1. regular imports

In [1]:
# i/o
import sys
import os
from pathlib import Path
import gzip
import pickle
import git
import json
# os specific settings
import platform
# configuration
import yaml
# lca
import ecospold2matrix as e2m
import pymrio
#import brightway2 as bw
# type hints
from ecospold2matrix import ecospold2matrix
from pymrio import IOSystem
# data science
import pandas as pd
import numpy as np
# deep copy
import copy

#### 0.1.2. load configuration file

In [2]:
with open('../config.yaml', 'r') as filestream:
    config = yaml.load(filestream, Loader = yaml.FullLoader)

#### 0.1.3. local imports

In [3]:
sys.path.append(os.path.join(Path.home(), config['pylcaio'])) # required for local import of pylcaio
import pylcaio

In [4]:
from rest_of_world import (
    identify_rest_of_world_regions,
    identify_rows
)

### 0.2. file paths
#### 0.2.1. directories

In [5]:
%%capture
# home directory
print(path_dir_home := Path.home())
print(path_dir_repo := git.Repo('.', search_parent_directories=True).working_tree_dir)
# input directory
print(path_dir_databases := os.path.join(path_dir_home, config['path_dir_databases']))
# output directories
print(path_dir_data := os.path.join(path_dir_home, config['path_dir_data']))
print(path_dir_pylcaio := os.path.join(path_dir_home, path_dir_data, config['path_dir_pylcaio']))
print(path_dir_pymrio := os.path.join(path_dir_home, path_dir_data, config['path_dir_pymrio']))
print(path_dir_e2m := os.path.join(path_dir_home, path_dir_data, config['path_dir_e2m']))

#### 0.2.2. files

In [6]:
%%capture
# databases
print(path_exiobase := os.path.join(path_dir_home, path_dir_databases, config['exiobase']))
print(path_dir_ecoinvent := os.path.join(path_dir_home, path_dir_databases, config['ecoinvent']))
# pylcaio output
print(path_pylcaio_database_loader_class_instance := os.path.join(path_dir_pylcaio, config['pylcaio_database_loader_class_instance']))
print(path_pylcaio_class_instance_before_hybrid := os.path.join(path_dir_pylcaio, config['pylcaio_class_instance_before_hybrid']))
print(path_pylcaio_class_instance_after_hybrid := os.path.join(path_dir_pylcaio, config['pylcaio_class_instance_after_hybrid']))
# pymrio output
print(path_pymrio_class_instance := os.path.join(path_dir_pymrio, config['pymrio_class_instance']))
# e2m output
print(e2m_project_name := config['e2m_project_name'])
print(path_file_e2m_pickle := os.path.join(path_dir_e2m, e2m_project_name + config['e2m_pickle_filename']))

In [7]:
%%capture
print(path_dict_io_countries_per_lca_region := os.path.join(path_dir_repo, config['path_dict_io_countries_per_lca_region']))
print(path_list_io_countries_and_regions := os.path.join(path_dir_repo, config['path_list_io_countries_and_regions']))
print(path_list_io_countries := os.path.join(path_dir_repo, config['path_list_io_countries']))

In [8]:
with open(file = path_dict_io_countries_per_lca_region, mode = 'r', encoding = 'utf-8') as filestream:
    dict_io_countries_per_lca_region: dict = json.load(fp = filestream)
with open(file = path_list_io_countries_and_regions, mode = 'r', encoding = 'utf-8') as filestream:
    list_io_countries_and_regions: list = json.load(fp = filestream)
with open(file = path_list_io_countries, mode = 'r', encoding = 'utf-8') as filestream:
    list_io_countries: list = json.load(fp = filestream)

In [9]:
with open(path_pylcaio_class_instance_before_hybrid, 'rb') as file_in:
    pylcaio_object_before_hybrid: pylcaio.LCAIO = pd.read_pickle(file_in)
PRO_f = pylcaio_object_before_hybrid.PRO_f

#### 1.1. function implementation

In [13]:
%%timeit
df_test = identify_rest_of_world_regions(df_in = PRO_f, list_io_countries=list_io_countries, dict_io_countries_per_lca_region=dict_io_countries_per_lca_region)

69.5 ms ± 419 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [12]:
%%timeit
df_test = identify_rows(pylcaio_object_before_hybrid)

273 ms ± 4.41 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


PRO_f dataframe:

| index | activityNameId | io_geography |
| ----- | -------------- | ------------ |
| 10 | 1 | RoW |
| 11 | 1 | CH |
| 12 | 1 | AT |
| 13 | 2 | RoW |
| 14 | 2 | DE |
| 15 | 2 | CH |
| 16 | 3 | FR |
| 17 | 3 | BE |
| 18 | 4 | RoW |
| 19 | 4 | CH |
| 20 | 4 | AT |

should look like:

| activityNameId | io_geography_list | RoW_region |
| -------------- | ----------------- | ---------- |
| 1 | RoW, CH, AT | RoW(1) |
| 2 | RoW, DE, CH | RoW(2) |
| 4 | RoW, CH, AT | RoW(1) |

where for activityNameId == 1, RoW region is list_io_countries - [CH, AT]
