# Allocate emissions for IHS processes and calculate implied conversion factors

In [1]:
# Import packages
import itertools
import numpy as np
import pandas as pd

pd.set_option('max_columns', None)
pd.options.mode.chained_assignment = None

## Data imports

In [2]:
input_path = '../data/'

material_emissions_path = input_path+'combined/ihsMaterialsEmissions_w_uncertainties.csv'
ei_emissions_path = input_path+'processed/conversionFactors_ecoinvent_grouped.csv'


output_path = '../data/combined/'

In [None]:
material_emissions = pd.read_csv(material_emissions_path, index_col=0)

material_emissions.head()

In [7]:
# material_emissions = material_emissions[material_emissions['Target/Process']=='ETHYLENE FROM 50:50 ETHANE:PROPANE MIX']

## Allocate emissions

In [8]:
## Functions to allocate emissions from materials to products output from IHS recipes

def uncertainty_propagation(calc:str, x:float, dx:float, y:float=1, dy:float=0, z:float=1, propagation_type:str='simple') -> float:
    """This function calculates uncertainty dz given x, y, dx, dy and z"""
    if calc == 'mult':
        xdiv = np.divide(dx, x, out=np.zeros_like(dx), where=x!=0)
        ydiv = np.divide(dy, y, out=np.zeros_like(dy), where=y!=0)
        if propagation_type == 'simple':
            return (xdiv + ydiv)*z
        elif propagation_type == 'stdev':
            return np.sqrt(pow(xdiv,2) + pow(ydiv,2))*z
        else: Exception('Specified propagation_type not recognised.')

    elif calc == 'add':
        if propagation_type == 'simple':
            return abs(dx)+abs(dy)
        elif propagation_type == 'stdev':
            return np.sqrt(pow(dx,2) + pow(dy,2))
        else: Exception('Specified propagation_type not recognised.')
    else: Exception('Please specify calc of propagation')

In [9]:
def filter_df_for_type(df, types, type_col):
    return df[[i in types for i in df[type_col]]]

def calculate_type_emissions(materials_df, product_df, emission_types:list, group_name:str, emissions_cols:list, emissions_cols_sigma:list, emission_val_cols:list, product_ratio_col, product_value_col, emission_type_col = 'Type'):

    # Impose lists
    if type(emission_type_col) is str:
        emission_type_col = [emission_type_col]
        emission_types = [emission_types]
    if type(emission_types) is str:
        emission_types = [emission_types]

    # Sum for groups
    grouped_df, grouped_df_sigma = materials_df.copy(), materials_df.copy()

    for emission_type_list, col in zip(emission_types, emission_type_col):
        if grouped_df.empty is False:
            grouped_df = filter_df_for_type(grouped_df, emission_type_list, col)

    group_ems = np.sum(grouped_df[emissions_cols])
    group_ems_sigma = np.sum(grouped_df[emissions_cols_sigma])

    # Loop through each value/gas column
    for val_column, gas_column, val_column_sigma, gas_column_sigma in zip(emission_val_cols, emissions_cols, [col + '_sigma' for col in emission_val_cols], emissions_cols_sigma):
        # Allocate emissions for value and uncertainty
        product_df[group_name+' allocated ' + val_column] = group_ems[gas_column]*product_df[product_ratio_col]
        product_df[group_name+' allocated ' + val_column_sigma] = uncertainty_propagation('mult', group_ems[gas_column], group_ems_sigma[gas_column_sigma], product_df[product_ratio_col], product_df[product_ratio_col+'_sigma'], z=product_df[group_name+' allocated ' + val_column])

        # Calculate emissions intensity for values and uncertainty
        product_df[group_name+' unit emission intensity ' + val_column] = product_df[group_name+' allocated ' + val_column]/product_df[product_value_col]
        product_df[group_name+' unit emission intensity ' + val_column_sigma] = uncertainty_propagation('mult', product_df[group_name+' allocated ' + val_column], product_df[group_name+' allocated ' + val_column_sigma], product_df[product_value_col], product_df[product_value_col+'_sigma'], z=product_df[group_name+' unit emission intensity ' + val_column])

    return product_df

In [25]:
from tqdm import tqdm
def allocate_emissions(df:pd.DataFrame, emission_val_cols:list, mass_to_other_convs=False, mass_to_other_uncertainty=0.01, value_col= 'Mass, kg', ratio_col = 'Mass ratio', unit='kg'):
    # Get inputs to products
    df_ins = df[df['Type']!='By-Product']
    for column in emission_val_cols:
        df_ins['combined_' + column] = np.nanmean([df_ins['ei_' + column + '_cradle-to-gate'], df_ins['cm_' + column + '_cradle-to-gate']], axis=0)
        df_ins['combined_' + column + '_sigma'] = np.nanmean([df_ins['ei_' + column + '_cradle-to-gate_sigma'], df_ins['cm_' + column + '_cradle-to-gate_sigma']], axis=0)
    combined_cols = ['combined_' + column for column in emission_val_cols]
    combined_cols_sigma = ['combined_' + column + '_sigma' for column in emission_val_cols]

    allocation = pd.DataFrame()

    # Loop through each process
    for code in tqdm(df['Code'].unique()):

        # Get by-products and mass ratios
        temp = df[df['Code']==code][['Code', 'Data Version', 'Source/Object', 'Type', 'Target/Process', 'Research Year', 'Country/Reg', 'Product', 'Value', 'Value unit', 'Value_sigma', 'Capacity unit', 'MeasType', 'Provenance']]
        a = temp.iloc[0]
        a['Source/Object'], a['Type'], a['Value'], a['Value unit'], a['Value_sigma'] = a['Product'], 'Product', float(1), 'kg/kg', float(0)
        a = pd.DataFrame(a.values.reshape(1,-1), columns=a.index)
        temp = temp[temp['Type']=='By-Product']
        temp = pd.concat([temp, a], axis=0)

        # Convert values to energy if conversion exists in mass_to_enrgy_convs file
        if mass_to_other_convs is not False:
            # value_col, ratio_col, unit = 'Energy, MJ', 'Energy ratio', 'MJ'
            temp = temp.merge(mass_to_other_convs, how='left', left_on='Source/Object', right_on=mass_to_other_convs['Product'].str.upper())
            if temp['Conversion'].isnull().values.any():
                continue
            else:
                temp[value_col] = temp['Conversion']*abs(temp['Value'])
                temp[value_col+'_sigma'] = uncertainty_propagation('mult', abs(temp['Value']), temp['Value_sigma'], temp['Conversion'], mass_to_other_uncertainty*temp['Conversion'], z=temp[value_col])

        else:
            #value_col, ratio_col, unit = 'Mass, kg', 'Mass ratio', 'kg'
            temp[value_col] = abs(temp['Value'])
            temp[value_col+'_sigma'] = temp['Value_sigma']

        # Get ratio of product vs all products+by-products
        temp[ratio_col] = temp[value_col]/sum(temp[value_col])
        temp[ratio_col+'_sigma'] = uncertainty_propagation('mult', temp[value_col], temp[value_col+'_sigma'], sum(temp[value_col]), sum(temp[value_col+'_sigma']), z=temp[ratio_col])
        ### -> Assumption of adding uncertainties together for sum(temp[value_col+'_sigma'])

        # Get process emissions & allocate
        used_mats = df_ins[df_ins['Code']==code]

        #unique_types = list(used_mats['Type'].unique())
        # group_names = ['Total', 'CtG', 'GtG'] + unique_types
        # types_lists = [unique_types, ['Raw Material'], [x for x in unique_types if x!='Raw Material']]+unique_types

        # for group_name, types in zip(group_names, types_lists):
        #     temp = calculate_type_emissions(used_mats, temp, types, group_name, combined_cols, combined_cols_sigma, emission_val_cols, ratio_col, value_col)
        #
        # temp = calculate_type_emissions(used_mats, temp, [['Indirect Utilities'], ['ELECTRICITY']], 'Electricity', combined_cols, combined_cols_sigma, emission_val_cols, ratio_col, value_col, emission_type_col = ['Type', 'Source/Object'])

        group_names = used_mats[['Source/Object', 'Type']].drop_duplicates().reset_index(drop=True)

            #list(used_mats['Source/Object'].unique())
        for group_name in group_names.iloc:
            group_name = list(group_name)
        #     temp = calculate_type_emissions(used_mats, temp, types, group_name, combined_cols, combined_cols_sigma, emission_val_cols, ratio_col, value_col)
            temp = calculate_type_emissions(used_mats, temp, group_name, str(group_name), combined_cols, combined_cols_sigma, emission_val_cols, ratio_col, value_col, emission_type_col = ['Source/Object', 'Type'])

        # Identify missing material emissions
        temp['Missing raw materials (>1% mass)'] = str(used_mats[(used_mats['Type']=='Raw Material') & (str(used_mats[combined_cols[0]])=='nan') & (used_mats['Value']> 0.01*np.sum(used_mats['Value']))]['Source/Object'].tolist())

        # Identify missing utility emissions
        temp['Missing utilities'] = str(used_mats[(used_mats['Type']=='Utilities') & (str(used_mats[combined_cols[0]])=='nan')]['Source/Object'].tolist())

        # Add current product to allocation list
        allocation = pd.concat([allocation, temp], axis=0)

    return allocation
    #return allocation

In [26]:
keep_all = False

ei_emissions = pd.read_csv(ei_emissions_path, index_col=0)
if keep_all:
    emission_val_cols = list(ei_emissions.columns[3:16])
    emission_val_cols_sigma = list(ei_emissions.columns[16:])
else:
    emission_val_cols = list(ei_emissions.columns[3:5])
    emission_val_cols_sigma = list(ei_emissions.columns[16:18])


# Mass allocation
mass_allocation = allocate_emissions(material_emissions, emission_val_cols)

  df_ins['combined_' + column] = np.nanmean([df_ins['ei_' + column + '_cradle-to-gate'], df_ins['cm_' + column + '_cradle-to-gate']], axis=0)
  df_ins['combined_' + column + '_sigma'] = np.nanmean([df_ins['ei_' + column + '_cradle-to-gate_sigma'], df_ins['cm_' + column + '_cradle-to-gate_sigma']], axis=0)
100%|██████████| 1/1 [00:00<00:00,  9.08it/s]


In [34]:
mass_allocation.to_parquet(output_path+'ihsProcessEmissions_decomposition_tempfile-2000.parquet')

In [35]:
mass_all_1 = pd.read_parquet(output_path+'ihsProcessEmissions_decomposition_tempfile-1000.parquet')#, index_col=0)

In [4]:
mass_all_2 = pd.read_parquet(output_path+'ihsProcessEmissions_decomposition_tempfile-2000.parquet')#, index_col=0)

In [None]:
mass_all_1.head()

In [None]:
mass_allocation.head()

In [38]:
mass_allocation = pd.concat((mass_all_1, mass_allocation))

In [None]:
del mass_all_1
del mass_all_2

In [40]:
index_cols = list(mass_allocation.columns[:18])+['Missing raw materials (>1% mass)', 'Missing utilities']
mass_filt = mass_allocation[index_cols+list(mass_allocation.columns[18:][['allocated' in i for i in mass_allocation.columns[18:]]])]
mass_filt = mass_filt[mass_filt['Type']=='Product']
mass_melt = mass_filt.melt(id_vars=index_cols, var_name='Emission source', value_name='value').dropna(subset=['value'])
mass_melt = mass_melt[mass_melt['value'] != 0]
mass_melt['Material'] = [i.split(',')[0][2:-1] for i in mass_melt['Emission source']]
mass_melt['Material Type'] = [i.split(',')[1].split(']')[0][2:-1] for i in mass_melt['Emission source']]
mass_melt['Gas'] = [i.split('allocated ')[1] for i in mass_melt['Emission source']]

In [41]:
mass_melt['Material'] = [i.replace('"', "'").split("', '")[0][2:] for i in mass_melt['Emission source']]
mass_melt['Material Type'] = [i.replace('"', "'").split("', '")[1].split(']')[0][:-1] for i in mass_melt['Emission source']]
mass_melt['Gas'] = [i.split('allocated ')[1] for i in mass_melt['Emission source']]

In [42]:
mass_melt.to_csv(output_path+'ihsProcessEmissions_decomposition_mass.csv')

In [14]:
# Integrate IFA conversion factors
ifa_factors = pd.read_csv(input_path+'extracted/conversionFactors_from_IFA.csv', index_col=0)
ifa_factors['Product'] = ifa_factors['Product'].str.upper()
ifa_matches = pd.read_csv(input_path+'extra_inputs/ifa_matches.csv')
ifa_factors = ifa_factors.merge(ifa_matches.dropna(), on='Product', how='right').drop(columns='Product').rename(columns={'Match':'Product'})

combined_factors = combined_factors.merge(ifa_factors, on='Product', how='left')

In [15]:
# Output file
combined_factors.sort_values('Product').to_csv(output_path+'processConversionFactors_w_elec_allgases_allalloc.csv', index=False)

In [None]:
combined_factors.sort_values('Product')

In [None]:
# Output condensed file
combined_factors[['Product', 'ei_match', 'ei_CO2e_100a_conv_factor', 'cm_match', 'cm_CO2e_100a_conv_factor', 'ihs_match', 'ihs_cradle-to-out-gate CO2e_100a,  allocation factor']]
#condensed.to_csv(output_path+'processConversionFactors_condensed.csv', index=False)