# 0.1. imports

In [4]:
# i/o
import sys
import os
from pathlib import Path
import gzip
import pickle
import json
# configuration
import yaml
# lca
#import pymrio
#import brightway2 as bw
# type hints
#from pymrio import IOSystem
# data science
import pandas as pd
# deep copy
import copy

# 0.2. file paths
## 0.2.1. config

In [2]:
with open('../../config.yaml', 'r') as filestream:
    config = yaml.load(filestream, Loader = yaml.FullLoader)

# 0.3. file paths
## 0.3.1. directories

In [3]:
%%capture
print(path_dir_data := Path(Path.home(), config['path_dir_data']))
print(path_dir_data_raw := Path(path_dir_data, config['path_dir_data_raw']))
print(path_dir_data_processed := Path(path_dir_data, config['path_dir_data_processed']))

## 0.3.2. files

In [5]:
%%capture
print(path_file_exiobase_pymrio_io_system := Path(path_dir_data_processed, config['pymrio_class_instance']))

# 1. refactoring

## 1.1. `completing_extensions()`
### 1.1.1. legacy

[`pylcaio.py > lines 2498-2563`](https://github.com/OASES-project/pylcaio/blob/fa5378df55c314c2f021f4f10b675bb822b3d912/src/pylcaio.py#L2498https://github.com/OASES-project/pylcaio/blob/fa5378df55c314c2f021f4f10b675bb822b3d912/src/pylcaio.py#L2498)

In [None]:
def completing_extensions(OG_extensions, new_extensions):
    """Function to modify the names of the extensions of the original exiobase to match the ones resulting from the
    matching with USEEIO. Also concatenates both original and new extensions at the end, resulting in the extended
    exiensions."""
    # just remove Energy Carrier Net flows and not defined waste flows
    OG_extensions.drop([i for i in OG_extensions.index if 'Energy Carrier Net' in i],inplace=True)
    OG_extensions.drop(['Emissions nec - waste - undef'],inplace=True)
    # simple sum of all values for old names pollutants, e.g., 'CH4 - non combustion - Cement production - air' => 'CH4 - air'
    easy_match = ['CH4','N2O','SOx','NH3','HCB','NMVOC','PM10','PM2.5','TSP','Cd','Hg','Pb','Zn','PAH']
    for pollutant in easy_match:
        list_old_names = [i for i in OG_extensions.index if pollutant in i]
        OG_extensions.loc[pollutant+' - air'] = OG_extensions.loc[list_old_names].sum()
        OG_extensions.drop(list_old_names, inplace=True)
    # hardcoded stuff
    OG_extensions.loc['Benzo(a)pyrene - air'] = OG_extensions.loc[['Benzo(a)pyrene - combustion - air','B(a)P - non combustion - Primary aluminium production - air','B(a)P - non combustion - Production of coke oven coke - air','B(a)P - non combustion - Production of gascoke - air']].sum()
    OG_extensions.drop(['Benzo(a)pyrene - combustion - air','B(a)P - non combustion - Primary aluminium production - air','B(a)P - non combustion - Production of coke oven coke - air','B(a)P - non combustion - Production of gascoke - air'],inplace=True)
    OG_extensions.loc['Benzo(b)fluoranthene - air'] = OG_extensions.loc[['Benzo(b)fluoranthene - combustion - air','B(b)F - non combustion - Primary aluminium production - air','B(b)F - non combustion - Production of coke oven coke - air','B(b)F - non combustion - Production of gascoke - air']].sum()
    OG_extensions.drop(['Benzo(b)fluoranthene - combustion - air','B(b)F - non combustion - Primary aluminium production - air','B(b)F - non combustion - Production of coke oven coke - air','B(b)F - non combustion - Production of gascoke - air'],inplace=True)
    OG_extensions.loc['Benzo(k)fluoranthene - air'] = OG_extensions.loc[['Benzo(k)fluoranthene - combustion - air','B(k)F - non combustion - Primary aluminium production - air','B(k)F - non combustion - Production of coke oven coke - air','B(k)F - non combustion - Production of gascoke - air']].sum()
    OG_extensions.drop(['Benzo(k)fluoranthene - combustion - air','B(k)F - non combustion - Primary aluminium production - air','B(k)F - non combustion - Production of coke oven coke - air','B(k)F - non combustion - Production of gascoke - air'],inplace=True)
    OG_extensions.loc['CO2 - biogenic - air'] = OG_extensions.loc['CO2 - waste - biogenic - air']
    OG_extensions.drop(['CO2 - waste - biogenic - air'],inplace=True)
    OG_extensions.loc['Pxx - soil'] = OG_extensions.loc[['Pxx - agriculture - soil','P - agriculture - soil']].sum()
    OG_extensions.drop(['Pxx - agriculture - soil','P - agriculture - soil'],inplace=True)
    old_CO2_flows = [i for i in OG_extensions.index if 'CO2' in i and 'biogenic' not in i]
    OG_extensions.loc['CO2 - air'] = OG_extensions.loc[old_CO2_flows].sum() # adds new row 'CO2 - air' and sums all CO2 flows
    OG_extensions.drop(old_CO2_flows,inplace=True)
    old_CO_flows = [i for i in OG_extensions.index if 'CO' in i and '2' not in i]
    OG_extensions.loc['CO - air'] = OG_extensions.loc[old_CO_flows].sum()
    OG_extensions.drop(old_CO_flows,inplace=True)
    old_NOx_flows = [i for i in OG_extensions.index if 'NOx' in i or 'NOX' in i]
    OG_extensions.loc['NOx - air'] = OG_extensions.loc[old_NOx_flows].sum()
    OG_extensions.drop(old_NOx_flows,inplace=True)
    old_indeno_flows = [i for i in OG_extensions.index if 'Indeno' in i]
    OG_extensions.loc['Indeno(1,2,3-cd)pyrene - air'] = OG_extensions.loc[old_indeno_flows].sum()
    OG_extensions.drop(old_indeno_flows,inplace=True)
    old_PCB_flows = [i for i in OG_extensions.index if 'PCB' in i]
    OG_extensions.loc['PCBs - air'] = OG_extensions.loc[old_PCB_flows].sum()
    OG_extensions.drop(old_PCB_flows,inplace=True)
    old_PCDD_flows = [i for i in OG_extensions.index if 'PCDD' in i]
    OG_extensions.loc['PCDD_F - air'] = OG_extensions.loc[old_PCDD_flows].sum()
    OG_extensions.drop(old_PCDD_flows,inplace=True)
    old_As_flows = [i for i in OG_extensions.index if 'As -' in i]
    OG_extensions.loc['As - air'] = OG_extensions.loc[old_As_flows].sum()
    OG_extensions.drop(old_As_flows,inplace=True)
    old_Ni_flows = [i for i in OG_extensions.index if 'Ni -' in i]
    OG_extensions.loc['Ni - air'] = OG_extensions.loc[old_Ni_flows].sum()
    OG_extensions.drop(old_Ni_flows,inplace=True)
    old_Cr_flows = [i for i in OG_extensions.index if 'Cr -' in i]
    OG_extensions.loc['Cr - air'] = OG_extensions.loc[old_Cr_flows].sum()
    OG_extensions.drop(old_Cr_flows,inplace=True)
    old_Cu_flows = [i for i in OG_extensions.index if 'Cu -' in i]
    OG_extensions.loc['Cu - air'] = OG_extensions.loc[old_Cu_flows].sum()
    OG_extensions.drop(old_Cu_flows,inplace=True)
    old_Se_flows = [i for i in OG_extensions.index if 'Se -' in i]
    OG_extensions.loc['Se - air'] = OG_extensions.loc[old_Se_flows].sum()
    OG_extensions.drop(old_Se_flows,inplace=True)
    old_N_flows = [i for i in OG_extensions.index if 'N -' in i]
    OG_extensions.loc['N - water'] = OG_extensions.loc[old_N_flows].sum()
    OG_extensions.drop(old_N_flows,inplace=True)
    old_P_flows = [i for i in OG_extensions.index if 'P -' in i and 'water' in i]
    OG_extensions.loc['P - water'] = OG_extensions.loc[old_P_flows].sum()
    OG_extensions.drop(old_P_flows,inplace=True)
    # after all this hardwork, concatenate with extensions
    extended_extensions = pd.concat([OG_extensions,new_extensions])
    return extended_extensions

### 1.1.2 new

In [7]:
exiobase: IOSystem = pd.read_pickle(path_file_exiobase_pymrio_io_system)

In [9]:
exiobase.satellite.F.index

Index(['Taxes less subsidies on products purchased: Total',
       'Other net taxes on production',
       'Compensation of employees; wages, salaries, & employers' social contributions: Low-skilled',
       'Compensation of employees; wages, salaries, & employers' social contributions: Medium-skilled',
       'Compensation of employees; wages, salaries, & employers' social contributions: High-skilled',
       'Operating surplus: Consumption of fixed capital',
       'Operating surplus: Rents on land',
       'Operating surplus: Royalties on resources',
       'Operating surplus: Remaining net operating surplus',
       'Employment: Low-skilled male',
       ...
       'Water Withdrawal Blue - Domestic - domestic Water Withdrawal Blue',
       'Energy Carrier Net Total', 'Energy Carrier Net NENE',
       'Energy Carrier Net NTRA', 'Energy Carrier Net TAVI',
       'Energy Carrier Net TMAR', 'Energy Carrier Net TOTH',
       'Energy Carrier Net TRAI', 'Energy Carrier Net TROA',
       '

In [10]:
with open('./dict_exiobase_environmental_extensions_sum_rows.json', mode = 'r', encoding = 'utf-8') as json_file:
    dict_exiobase_environmental_extensions_sum_rows: dict = json.load(json_file)
with open('./list_exiobase_environmental_extensions_drop_rows.json', mode = 'r', encoding = 'utf-8') as json_file:
    list_exiobase_environmental_extensions_drop_rows: list = json.load(json_file)

In [None]:
def sum_rows_based_on_row_names_conditions(
    df_input: pd.DataFrame,
    dict_row_names_conditions: dict,
) -> pd.DataFrame:
    """
    Takes an input dataframe with an index column of row names and sums the rows specified in the rows dictionary.
    The rows dictionary has keys that are the new row names and values that are boolean conditions on the row names of the rows to be summed.
    Rows that are summed over are dropped from the dataframe.

    Args:
        df_input (pd.DataFrame): input dataframe with an index column of row names
        dict_row_names_conditions (dict): a dictionary where rows.keys() are the new row names and rows.values() are conditions on the row names of the rows to be summed

    Returns:
        pd.DataFrame: output dataframe
    """

    for new_row_name, old_rows_names_conditions in dict_row_names_conditions.items():
        old_rows_names: list = [*df_input.query(old_rows_names_conditions).index]
        df_input.loc[new_row_name] = df.loc[old_rows_names].sum()
        df_output: pd.DataFrame = df_input.drop(labels = old_rows_names, errors = 'ignore')

    return df_output

In [None]:
def complete_environmental_extensions(
    F_IO_original_extensions: pd.DataFrame,
    F_IO_new_extensions: pd.DataFrame,
) -> pd.DataFrame:
    """_summary_

    Args:
        F_IO_original_extensions (pd.DataFrame): _description_
        F_IO_new_extensions (pd.DataFrame): _description_

    Returns:
        pd.DataFrame: _description_
    """

    df = F_IO_original_extensions.copy()

    df = df.drop(labels = list_exiobase_environmental_extensions_drop_rows, errors = 'ignore')
    df = sum_rows_based_on_row_names_conditions(
        df_input = df,
        dict_row_names_conditions = dict_exiobase_environmental_extensions_sum_rows
    )

    df_output: pd.DataFrame = pd.concat([df, F_IO_new_extensions])

    return df_output

## 1.2. `get_inflation()`
### 1.2.1. legacy

In [None]:
def get_inflation(reference_year):
    """ Returns the inflation rate between the year 2005 (base year for ecoinvent prices) and the reference year of
    the used IO database, from https://www.inflationtool.com/euro/2005-to-present-value"""

    if reference_year == 1995:
        inflation = 0.83
    elif reference_year == 1996:
        inflation = 0.84
    elif reference_year == 1997:
        inflation = 0.86
    elif reference_year == 1998:
        inflation = 0.87
    elif reference_year == 1999:
        inflation = 0.88
    elif reference_year == 2000:
        inflation = 0.9
    elif reference_year == 2001:
        inflation = 0.92
    elif reference_year == 2002:
        inflation = 0.94
    elif reference_year == 2003:
        inflation = 0.96
    elif reference_year == 2004:
        inflation = 0.98
    elif reference_year == 2005:
        inflation = 1
    elif reference_year == 2006:
        inflation = 1.02
    elif reference_year == 2007:
        inflation = 1.04
    elif reference_year == 2008:
        inflation = 1.08
    elif reference_year == 2009:
        inflation = 1.08
    elif reference_year == 2010:
        inflation = 1.10
    elif reference_year == 2011:
        inflation = 1.13
    elif reference_year == 2012:
        inflation = 1.16
    elif reference_year == 2013:
        inflation = 1.18
    elif reference_year == 2014:
        inflation = 1.19
    elif reference_year == 2015:
        inflation = 1.19
    elif reference_year == 2016:
        inflation = 1.19
    elif reference_year == 2017:
        inflation = 1.21
    elif reference_year == 2018:
        inflation = 1.22
    elif reference_year == 2019:
        inflation = 1.24
    elif reference_year == 2020:
        inflation = 1.26
    elif reference_year == 2021:
        inflation = 1.25
    # no data available for 2022, same data as 2021 by default
    elif reference_year == 2022:
        inflation = 1.25
    else:
        inflation = 1

    return inflation

### 1.2.2. new

In [2]:
import dbnomics
from datetime import datetime

In [47]:
def get_consumer_price_index_data(
    baseline_year: int,
) -> dict:
    """
    Returns a dataframe containing a time series of the Inflation Multiplier (as determined by the Consumer Price Index) normalized against a baseline year.
    Date range covered is [1996, current year]. Geographic coverage is the European Economic Area (EEA18-1995, EEA28-2004, EEA30-2007, EEA31-2013, EEA30-2020).
    Data source: https://db.nomics.world/Eurostat/prc_hicp_aind/A.INX_A_AVG.CP00.EEA

    Args:
        baseline_year (int): baseline year for normalization

    Returns:
        pd.DataFrame: output dataframe
    """

    df_inflation: pd.DataFrame = dbnomics.fetch_series('Eurostat/prc_hicp_aind/A.INX_A_AVG.CP00.EEA')[['period', 'value']]
    df_inflation['value'] = df_inflation['value'] / df_inflation.loc[df_inflation['period'] == datetime.strptime('2005', '%Y'), 'value'].iloc[0]

    while df_inflation['period'].iloc[-1].year < datetime.now().year:

        next_year = df_inflation['period'].iloc[-1] + pd.DateOffset(years=1)
        last_available_value = df_inflation['value'].iloc[-1]

        df_inflation = pd.concat(
            [df_inflation, pd.DataFrame({'period': [next_year], 'value': [last_available_value]})],
            axis = 0,
            ignore_index = False
        )

    return df_inflation

In [48]:
test = get_consumer_price_index_data(2005)

In [49]:
test

Unnamed: 0,period,value
0,1996-01-01,0.849027
1,1997-01-01,0.863653
2,1998-01-01,0.874894
3,1999-01-01,0.885289
4,2000-01-01,0.902212
5,2001-01-01,0.922156
6,2002-01-01,0.941255
7,2003-01-01,0.959749
8,2004-01-01,0.978847
9,2005-01-01,1.0
