# Calculate emissions for IHS materials contained in process recipes using conversion factors

In [None]:
# Import packages
import numpy as np
import pandas as pd
from tqdm import tqdm

pd.set_option('max_columns', None)
pd.options.mode.chained_assignment = None

## Data imports

In [None]:
# Data file paths
input_path = '../data/'
output_path = '../data/combined/'
ihs_materials_path = input_path+'processed/ihsMaterials_w_uncertainties.csv'
ecoinvent_file = input_path+'processed/conversionFactors_ecoinvent_grouped.csv'
carbonMinds_file = input_path+'processed/conversionFactors_carbonMinds_grouped.csv'
match_list_path = input_path+'extra_inputs/nameMatches_IHS_to_convFactors.csv'

In [None]:
# Import IHS process recipes
ihs_materials = pd.read_csv(ihs_materials_path, index_col=0)
ihs_materials.head()

In [None]:
# Import conversion factors
ei_emissions = pd.read_csv(ecoinvent_file, index_col=0)
cm_emissions = pd.read_csv(carbonMinds_file, index_col=0)

ei_emissions.head()

In [None]:
## Add crude oil in g/MJ and 41.686MJ/kg crude
crude_input = pd.DataFrame(np.array([['crude oil input'], ['Sourced from Crude oil Europe'], ['GLO'], [0.012*41.868], [0.012*41.868], [0.012*41.868], [0.005*41.868], [0.005*41.868], [0.005*41.868]]).transpose(), columns=['Source', 'generalComment', 'location', 'CO2e_20a', 'CO2e_100a', 'Carbon dioxide', 'CO2e_20a_sigma', 'CO2e_100a_sigma', 'Carbon dioxide_sigma'])
ei_emissions = pd.concat((ei_emissions, crude_input))

ei_emissions[ei_emissions.columns[3:]] = ei_emissions[ei_emissions.columns[3:]].astype(float)

## Assign emissions from feedstocks and indirect utilities

In [None]:
def filter_rows(df:pd.DataFrame, column:str, item:str, exact:bool=True):
    """Function for finding best match for input item in a df column"""
    # If exact match enforced
    if exact:
        return df[df[column].str.lower() == item.lower()]

    # If item is in string but not entire string
    else: return df[[item in row for row in df[column].str.lower()]]

def uncertainty_propagation(calc:str, x:float, dx:float, y:float=1, dy:float=0, z:float=1, propagation_type:str='simple') -> float:
    """Function for propagating uncertainty through calculations"""
    # Multiplication
    if calc == 'mult':
        xdiv = np.divide(dx, x, out=np.zeros_like(dx), where=x!=0)
        ydiv = np.divide(dy, y, out=np.zeros_like(dy), where=y!=0)
        if propagation_type == 'simple':
            return (xdiv + ydiv)*z
        elif propagation_type == 'stdev':
            return np.sqrt(pow(xdiv,2) + pow(ydiv,2))*z
        else: Exception('Specified propagation_type not recognised.')

    # Addition
    elif calc == 'add':
        if propagation_type == 'simple':
            return abs(dx)+abs(dy)
        elif propagation_type == 'stdev':
            return np.sqrt(pow(dx,2) + pow(dy,2))
        else: Exception('Specified propagation_type not recognised.')
    else: Exception('Please specify calc of propagation')

In [None]:
def assign_emissions(df:pd.DataFrame, emissions_df:pd.DataFrame, product_col:str, emissions_col:str,
                     product_val_col:str='Value', emission_val_cols:list=None, emission_val_cols_sigma:list= None, match_list=None, db_name:str='db', production_unit_conv:float=1, keep_all:bool=False) -> (pd.DataFrame, pd.DataFrame):
    """These function assigns appropriate emissions values from EcoInvent or Carbonminds to products or materials in IHS given a pre-determined match from file or finding the best matches available"""

    # Create values if none exist
    if match_list is None:
        match_list = {}
    if emission_val_cols is None:
        emission_val_cols = ['Cradle-to-gate']
    if emission_val_cols_sigma is None:
        emission_val_cols_sigma = ['Cradle-to-gate_sigma']
    product_val_col_sigma = product_val_col+'_sigma'

    # Create columns to receive emissions, match name, emissions conversion factor
    val_col, match_name_col, conv_factor_col = pd.DataFrame(columns=emission_val_cols), [], pd.DataFrame(columns=emission_val_cols)
    # Columns for uncertainties of above
    val_col_sigma, conv_factor_col_sigma = pd.DataFrame(columns=emission_val_cols_sigma), pd.DataFrame(columns=emission_val_cols_sigma)

    # Create match dictionary from appropriate match dataframe column
    length = len(emission_val_cols+emission_val_cols_sigma)
    if isinstance(match_list, pd.DataFrame) and db_name in match_list.columns:
        match_list = dict(zip(match_list['IHS'], match_list[db_name]))

    # Loop through rows in assignment dataframe
    for row_num, row in tqdm(enumerate(df.iloc())):

        # Check match_list for match
        if row[product_col].lower() in match_list.keys():

            # If already defined as no match in db
            if str(match_list[row[product_col].lower()]) == '0':
                correspondence = pd.DataFrame()
                emission_val, name = pd.DataFrame(np.array([np.NAN]*length).reshape(1,length), columns=emission_val_cols+emission_val_cols_sigma), np.NAN
            # If match has corresponding db value
            else:
                correspondence = filter_rows(emissions_df, emissions_col, match_list[row[product_col].lower()])
                emission_val = correspondence[emission_val_cols+emission_val_cols_sigma]
                name = correspondence.iloc[0][emissions_col]

        # If no match yet assigned
        else:
            # Find correspondence in emissions dataframe
            correspondence = filter_rows(emissions_df, emissions_col, row[product_col].lower()) # Exact matching
            
            if len(correspondence) == 0: # No exact match -> Try name contained within a match
                correspondence = filter_rows(emissions_df, emissions_col, row[product_col].lower(), exact=False)

                if len(correspondence) > 1: # If multiple inexact matches
                        take = input('Enter number of best match for '+row[product_col].lower()+':\n'+str(correspondence[emissions_col])+'\n Type n to skip') # Ask user for best match
                        if take != 'n': 
                            correspondence = correspondence[correspondence[emissions_col]==correspondence.loc[int(take)][emissions_col]] # Take best match
                        else:
                            correspondence = pd.DataFrame() # If none correspond then empty correspondence

            if len(correspondence) == 0: # No exact match -> Try match contained within name
                matching = emissions_df[[i in row[product_col].lower() for i in emissions_df[emissions_col]]] # Emission string contained in row matching

                if len(matching) > 0: # If multiple matches
                    correspondence = matching.iloc[np.argmax([len(i) for i in matching[emissions_col]])] # Take greatest length of match if multiple
                    emission_val = correspondence[emission_val_cols+emission_val_cols_sigma]
                    name = correspondence[emissions_col]

                else: emission_val, name = pd.DataFrame(np.array([np.NAN]*length).reshape(1,length), columns=emission_val_cols+emission_val_cols_sigma), np.NAN # If no matches identified

            else:
                emission_val = correspondence[emission_val_cols+emission_val_cols_sigma]
                name = correspondence[emissions_col].values[0]

            # Add match to match_list
            if len(correspondence) != 0:
                if isinstance(correspondence, pd.DataFrame):
                    match_list.update({row[product_col].lower():correspondence.iloc[0]['Source']})
                else:
                    match_list.update({row[product_col].lower():correspondence['Source']})
            else: match_list.update({row[product_col].lower():0})
            del correspondence

        # Add matching values to dataframe
        val_col = pd.concat((val_col, row[product_val_col]*production_unit_conv*emission_val[emission_val_cols]))

        # Calculate implied uncertainties and add to dataframe
        val_col_sigma = pd.concat((val_col_sigma, uncertainty_propagation('mult', row[product_val_col], row[product_val_col_sigma], emission_val[emission_val_cols], emission_val[emission_val_cols_sigma], z=(row[product_val_col]*production_unit_conv*emission_val[emission_val_cols]).values)*production_unit_conv))

        # Add other parameters to parameter lists
        match_name_col += [name]
        conv_factor_col = pd.concat((conv_factor_col, emission_val[emission_val_cols]))
        conv_factor_col_sigma = pd.concat((conv_factor_col_sigma, emission_val[emission_val_cols_sigma]))

    df[db_name + '_match'] = match_name_col
    for column, sigma_col in zip(emission_val_cols, emission_val_cols_sigma):
        df[db_name + '_' + column + '_cradle-to-gate'] = val_col[column].values
        df[db_name + '_' + column + '_cradle-to-gate_sigma'] = val_col_sigma[sigma_col].values
        df[db_name + '_' + column + '_conv_factor'] = conv_factor_col[column].values
        df[db_name + '_' + column + '_conv_factor_sigma'] = conv_factor_col_sigma[sigma_col].values

    return df, pd.DataFrame.from_dict(match_list, orient='index').reset_index().rename(columns={'index':'IHS', 0:db_name})

In [None]:
# Match equivalent emissions to materials
keep_all = False
if keep_all:
    emission_val_cols = list(ei_emissions.columns[3:5])
    emission_val_cols_sigma = list(ei_emissions.columns[16:18])
else:
    emission_val_cols = list(ei_emissions.columns[3:16])
    emission_val_cols_sigma = list(ei_emissions.columns[16:])

match_list_ei = pd.read_csv(match_list_path, index_col=False, usecols=['IHS','ei'])

# EI matching
material_emissions, upt_list = assign_emissions(ihs_materials.copy(), ei_emissions, 'Source/Object', 'Source', match_list=match_list_ei, db_name='ei', emission_val_cols=emission_val_cols, emission_val_cols_sigma=emission_val_cols_sigma, keep_all=keep_all)

match_list_ei = pd.concat((match_list_ei, upt_list)).drop_duplicates(subset=['IHS'], keep='last')

CM matching
match_list_cm = pd.read_csv(match_list_path, index_col=False, usecols=['IHS','cm'])
material_emissions, upt_list = assign_emissions(material_emissions, cm_emissions, 'Source/Object', 'Source', match_list=match_list_cm, db_name='cm', emission_val_cols=emission_val_cols, emission_val_cols_sigma=emission_val_cols_sigma)
match_list_cm = pd.concat((match_list_cm, upt_list)).drop_duplicates(subset=['IHS'], keep='last')

# Combine match lists
all_matches = match_list_ei[['IHS','ei']]
all_matches['cm'] = match_list_cm['cm']
all_matches.sort_values('IHS').reset_index(drop=True).to_csv(match_list_path, index=False)

# Create materials emissions
material_emissions = material_emissions.drop_duplicates(subset=['Code', 'Source/Object']).reset_index(drop=True)

material_emissions.head()

In [None]:
# Output emissions per material for process recipes
material_emissions.to_csv(output_path+'ihsMaterialsEmissions_w_upstream.csv')

## Assign emissions from direct utilities

In [None]:
# Read in material emissions (with feedstock & indirect utilities)
material_emissions = pd.read_csv(output_path+'ihsMaterialsEmissions_w_upstream.csv', index_col=0)
direct_utl_conv = pd.read_csv(input_path+'extra_inputs/direct_utility_conversion_factors.csv')

material_emissions['Type'] = material_emissions['Type'].replace({'Utilities':'Indirect Utilities'})

In [None]:
# Add direct emissions for each utility
emission_val_cols = list(ei_emissions.columns[3:16])
emission_val_cols_sigma = list(ei_emissions.columns[16:])

direct_utl_ems = material_emissions[material_emissions['Type']=='Indirect Utilities'][material_emissions.columns[:14]]
direct_utl_ems['Type'] = 'Direct Utilities'
direct_utils = direct_utl_ems.merge(direct_utl_conv, left_on='Source/Object', right_on='Source', how='left').rename(columns={'Source':'ei_match'})

for col in emission_val_cols+emission_val_cols_sigma+['Value', 'Value_sigma']:
    direct_utils[col] = direct_utils[col].astype(float)

for gas in emission_val_cols:
    direct_utils['ei_'+gas+'_cradle-to-gate'] = direct_utils['Value']*direct_utils[gas]
    direct_utils['ei_'+gas+'_cradle-to-gate_sigma'] = uncertainty_propagation('mult', direct_utils['Value'], direct_utils['Value_sigma'], direct_utils[gas], direct_utils[gas+'_sigma'], z=direct_utils['ei_'+gas+'_cradle-to-gate'])
    direct_utils['ei_'+gas+'_conv_factor'] = direct_utils[gas]
    direct_utils['ei_'+gas+'_conv_factor_sigma'] = direct_utils[gas+'_sigma']

direct_utils.drop(columns=emission_val_cols+emission_val_cols_sigma, inplace=True)

In [None]:
# Merge with material emissions
input_emissions = pd.concat((material_emissions, direct_utils), axis='index').sort_values(by=['Product', 'Target/Process', 'Code', 'Type', 'Source/Object'])

input_emissions.to_csv(output_path+'ihsMaterialsEmissions_w_utilities.csv')

## Assign emissions from direct process

In [None]:
# Import materials from previous and define emission_val_cols

emission_val_cols = list(ei_emissions.columns[3:16])
emission_val_cols_sigma = list(ei_emissions.columns[16:])

input_emissions = pd.read_csv(output_path+'ihsMaterialsEmissions_w_utilities.csv', index_col=0)
input_emissions.head()

In [None]:
# Import direct emissions and match to existing products in ihsMaterials

direct_emissions = pd.read_excel(input_path+'extra_inputs/Direct process emissions.xlsx', skiprows=2)[1:].dropna(subset=['Process']).sort_values('Process').reset_index(drop=True)
direct_emissions = direct_emissions[['Process']+list(direct_emissions.columns[-5:])]
direct_emissions['Process'] = direct_emissions['Process'].str.upper()

product_process_match = pd.read_csv(input_path+'extra_inputs/product_to_directProcess_matches.csv')

direct_emissions = direct_emissions.merge(product_process_match, on='Process', how='right').dropna(subset=['Product']).drop(columns=['Process']).drop_duplicates(subset=['Product']).rename(columns={'est. CO2':'Carbon dioxide', 'est. CH4':'Methane','est. N2O':'Nitric oxide', 'est. CO2e_20a':'CO2e_20a', 'est. CO2e_100a': 'CO2e_100a'})

uncertainty_ratio = 0.01

for col in emission_val_cols:
    if col in list(direct_emissions.columns):
        direct_emissions['ei_'+col+'_cradle-to-gate'] = direct_emissions[col].fillna(0).astype(float)
        direct_emissions['ei_'+col+'_cradle-to-gate_sigma'] = (direct_emissions[col].astype(float)*uncertainty_ratio)
        direct_emissions.drop(columns=[col], inplace=True)
    else:
        direct_emissions['ei_'+col+'_cradle-to-gate'] = 0
        direct_emissions['ei_'+col+'_cradle-to-gate_sigma'] = 0

direct_emissions.head()

In [None]:
# Add emissions for each direct process
process_emissions = input_emissions[[i in list(direct_emissions['Product']) for i in list(input_emissions['Product'])]][input_emissions.columns[:14]].drop_duplicates(subset=['Code','Target/Process','Product']).reset_index(drop=True)
process_emissions['Type'], process_emissions['MeasType'] = 'Direct Process', 'Chemical'
process_emissions['Source/Object'] = process_emissions['Product']
process_emissions['Value'], process_emissions['Value_sigma'] = 1, 0
process_emissions = process_emissions.merge(direct_emissions, on='Product', how='inner')

# Merge with all input emissions
output_emissions = pd.concat((input_emissions, process_emissions), axis='index').sort_values(by=['Product', 'Target/Process', 'Code', 'Type', 'Source/Object'])
output_emissions.head()

In [None]:
# Write to file
output_emissions.to_csv(output_path+'ihsMaterialsEmissions_w_uncertainties.csv')