# Comparing ecoinvent and EXIOBASE
Part 1) Matching dict preparation

Part 2) Matching

## Scenario name

In [1]:
scenario = '2019-12-02_WW'  # must be the same as the HIOT Excel sheetname

# Part 1) Matching dict preparation

In [2]:
import pandas as pd
import pickle
import numpy as np
import os

## Files

In [3]:
DATA_PATH = "../data/"
SCENARIO_PATH = "../results/" + scenario + "/"
print(SCENARIO_PATH)

if not os.path.exists(SCENARIO_PATH):
    os.makedirs(SCENARIO_PATH)
    os.makedirs(SCENARIO_PATH + 'exiobase/')
    os.makedirs(SCENARIO_PATH + 'matching/')
    os.makedirs(SCENARIO_PATH + 'matching_results/')
    os.makedirs(SCENARIO_PATH + 'figures/')
    os.makedirs(SCENARIO_PATH + 'tables/')

../results/2019-12-02_WW/


In [4]:
# ecoinvent
activity_overview = f"{DATA_PATH}ecoinvent/activity_overview_3.4_cut-off_LCIA_results.xlsx"

# EXIOBASE
hiot_results = f"{DATA_PATH}exiobase/hiot_results_PTM_v2.xlsx"
hiot_results_sheet = scenario

# Matching file
matching_file = f"{DATA_PATH}matching/Matching file.xlsx"
# Sheets
product_matching_sheet = 'product matching'
process_matching_sheet = 'process matching electricity'
region_matching_sheet = "regional matching"

# Load data

### ecoinvent activity overview

In [5]:
AO = pd.read_excel(activity_overview)
print(AO.shape)
AO.head(2)

(14889, 36)


Unnamed: 0,key,#,id,filename,activityName,geography,startDate,endDate,specialActivityType,technologyLevel,...,amount,unitName,productionVolumeAmount,absolute PV,productionVolumeComment,price,By-product classification,mft,functional unit,"('IPCC 2013', 'climate change', 'GWP 100a')"
0,"('ecoinvent 3.4 cutoff', '60274947cf82e6633d8c...",2607,2d83c1a0-0c9b-4589-bb9b-6a81e815f5e2,2d83c1a0-0c9b-4589-bb9b-6a81e815f5e2_4c2b1cc3-...,[sulfonyl]urea-compound production,RER,2000-01-01,2017-12-31,ordinary transforming activity,Current,...,1,kg,4900020.0,True,Unknown\n\nThe addition of the consumption vol...,5.13,allocatable product,non-mft,1,10.727158
1,"('ecoinvent 3.4 cutoff', '7dd298333a92144698cf...",5307,59913008-58ba-4f1a-8bf2-86925bbd6005,59913008-58ba-4f1a-8bf2-86925bbd6005_4c2b1cc3-...,[sulfonyl]urea-compound production,RoW,2000-01-01,2017-12-31,ordinary transforming activity,Current,...,1,kg,19911450.0,True,The addition of the consumption volumes in the...,5.13,allocatable product,non-mft,1,12.586356


### exiobase regionalized sectors

In [6]:
EX = pd.read_excel(hiot_results, sheet_name=hiot_results_sheet)
EX = EX[EX.columns.drop(list(EX.filter(regex='Unnamed')))]
print(EX.shape)
EX.head(2)

(8200, 10)


Unnamed: 0,#,CountryCode,ProductTypeName_of_hiot,product code 1,product code 2,OriginalPhysicalUnit,GHGs-eq - tonnes,PhysicalUnit,CarbonFootprint,unit
0,1,AU,Cultivation of paddy rice,c01.a,P_PARI,tonnes,1.595365,kg,1.595365,kg CO2-eq/kg
1,2,AU,Cultivation of wheat,c01.b,P_WHEA,tonnes,0.306648,kg,0.306648,kg CO2-eq/kg


#### drop first row, which is data description

In [7]:
EX.drop([0], axis=0, inplace=True)
print(EX.shape)
EX.head(2)

(8199, 10)


Unnamed: 0,#,CountryCode,ProductTypeName_of_hiot,product code 1,product code 2,OriginalPhysicalUnit,GHGs-eq - tonnes,PhysicalUnit,CarbonFootprint,unit
1,2,AU,Cultivation of wheat,c01.b,P_WHEA,tonnes,0.306648,kg,0.306648,kg CO2-eq/kg
2,3,AU,Cultivation of cereal grains nec,c01.c,P_OCER,tonnes,0.266411,kg,0.266411,kg CO2-eq/kg


# Helper Functions

## Dictionaries to help matching

In [8]:
def map_data_from_column(DF, key_column="ISIC", value_column=None):
    """Map one or more values from the value_column to one unique value in the key_column."""
    unique_elements = DF[key_column].unique()
    print("Mapping", value_column if value_column else "Indices", "values to", len(unique_elements), "unique", key_column, "keys.")
    if not value_column:  # then return the indices of the DF
        return {ue: DF[DF[key_column] == ue].index.tolist() for ue in unique_elements}
    else:
        return {ue: DF[DF[key_column] == ue][value_column].tolist() for ue in unique_elements}

# Product matching

In [9]:
pm = pd.read_excel(matching_file, sheet_name=product_matching_sheet, skiprows=[0, 2])
pm.drop(labels=["Unnamed: 0", "Unnamed: 2",  102, "Comments", "Unnamed: 204"], axis=1, inplace=True)
pm.set_index("Unnamed: 1", inplace=True)
pm.index.names = ['product']
pm.fillna(0, inplace=True)
print(pm.shape)
pm.head(2)

(2851, 200)


Unnamed: 0_level_0,Paddy rice,Wheat,Cereal grains nec,"Vegetables, fruit, nuts",Oil seeds,"Sugar cane, sugar beet",Plant-based fibers,Crops nec,Cattle,Pigs,...,Paper for treatment: landfill,Plastic waste for treatment: landfill,Inert/metal/hazardous waste for treatment: landfill,Textiles waste for treatment: landfill,Wood waste for treatment: landfill,Membership organisation services n.e.c.,"Recreational, cultural and sporting services",Other services,Private households with employed persons,Extra-territorial organizations and bodies
product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"heat and power co-generation unit, 160kW electrical, common components for heat+electricity",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"heat and power co-generation unit, 1MW electrical, common components for heat+electricity",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
product_EI_to_EX = {}
for index, row in pm.iterrows():
    corresponding_sectors = [x for x in row.index[row.to_numpy().nonzero()]]
    product_EI_to_EX.update({row.name: corresponding_sectors})

In [11]:
product_EX_to_EI = {}
for column in pm.columns:
    corresponding_sectors = [x for x in pm.index[pm[column].to_numpy().nonzero()]]
    product_EX_to_EI.update({column: corresponding_sectors})

In [12]:
with open(f'{SCENARIO_PATH}matching/product_EX_to_EI.pickle', 'wb') as handle:
    pickle.dump(product_EX_to_EI, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [13]:
with open(f'{SCENARIO_PATH}matching/product_EI_to_EX.pickle', 'wb') as handle:
    pickle.dump(product_EI_to_EX, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Process matching (for electricity sector)

In [14]:
ppm = pd.read_excel(matching_file, sheet_name=process_matching_sheet, skiprows=[0, 2])
ppm.drop(labels=["Unnamed: 0", 
                 "Unnamed: 2",  
                 "Unnamed: 3",  
                 "Unnamed: 4",  
                 "Unnamed: 5",  
                 "Unnamed: 6",  
                 "Unnamed: 7",  
                 "Unnamed: 8",
                 "Comments",
                 12,
                ], axis=1, inplace=True)
ppm.set_index("Unnamed: 1", inplace=True)
ppm.index.names = ['key']
ppm.fillna(0, inplace=True)
print(ppm.shape)
ppm.head(2)

(14889, 201)


Unnamed: 0_level_0,Paddy rice,Wheat,Cereal grains nec,"Vegetables, fruit, nuts",Oil seeds,"Sugar cane, sugar beet",Plant-based fibers,Crops nec,Cattle,Pigs,...,Plastic waste for treatment: landfill,Inert/metal/hazardous waste for treatment: landfill,Textiles waste for treatment: landfill,Wood waste for treatment: landfill,Membership organisation services n.e.c.,"Recreational, cultural and sporting services",Other services,Private households with employed persons,Extra-territorial organizations and bodies,Unnamed: 210
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"('ecoinvent 3.4 cutoff', '60274947cf82e6633d8c8488b7029c34')",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"('ecoinvent 3.4 cutoff', '7dd298333a92144698cfa6ee16325dbc')",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
process_EI_to_EX = {}
for index, row in ppm.iterrows():
    corresponding_sectors = [x for x in row.index[row.to_numpy().nonzero()]]
    process_EI_to_EX.update({row.name: corresponding_sectors})

In [16]:
process_EX_to_EI = {}
for column in ppm.columns:
    corresponding_sectors = [x for x in ppm.index[ppm[column].to_numpy().nonzero()]]
    process_EX_to_EI.update({column: corresponding_sectors})

In [17]:
with open(f'{SCENARIO_PATH}matching/process_EX_to_EI.pickle', 'wb') as handle:
    pickle.dump(process_EX_to_EI, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [18]:
with open(f'{SCENARIO_PATH}matching/process_EI_to_EX.pickle', 'wb') as handle:
    pickle.dump(process_EI_to_EX, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Region matching

In [19]:
GEO = pd.read_excel(matching_file, sheet_name=region_matching_sheet, skiprows=[0])
print(GEO.shape)
GEO.head(2)

(261, 4)


Unnamed: 0,#,# Activities,ecoinvent 3.4,Exiobase 3
0,1,5,AE,WM
1,2,8,AL,WE


In [20]:
geo_EX_to_EI = map_data_from_column(GEO, key_column="ecoinvent 3.4", value_column="Exiobase 3")

Mapping Exiobase 3 values to 261 unique ecoinvent 3.4 keys.


In [21]:
geo_EI_to_EX = map_data_from_column(GEO, key_column="Exiobase 3", value_column="ecoinvent 3.4")

Mapping ecoinvent 3.4 values to 52 unique Exiobase 3 keys.


In [22]:
with open(f'{SCENARIO_PATH}matching/geo_EX_to_EI.pickle', 'wb') as handle:
    pickle.dump(geo_EX_to_EI, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [23]:
with open(f'{SCENARIO_PATH}matching/geo_EI_to_EX.pickle', 'wb') as handle:
    pickle.dump(geo_EI_to_EX, handle, protocol=pickle.HIGHEST_PROTOCOL)

## Units

In [24]:
EX["PhysicalUnit"].unique()

array(['kg', 'MJ', 'Meuro'], dtype=object)

In [25]:
AO["unitName"].unique()

array(['kg', 'unit', 'm', 'm3', 'm2', 'ha', 'MJ', 'kWh', 'm*year', 'hour',
       'km', 'l', 'm2*year', 'metric ton*km', 'kg*day', 'km*year',
       'person*km'], dtype=object)

In [26]:
units_EI_to_EX = {
    "kg": ["kg"],  # l
    "MJ": ["MJ", "kWh"],
    "Meuro": [],
}

In [27]:
units_EX_to_EI = {
    "kg": ["kg"],  # l
    "MJ": ["MJ"],
    "kWh": ["MJ"],
}

In [28]:
with open(f'{SCENARIO_PATH}matching/units_EI_to_EX.pickle', 'wb') as handle:
    pickle.dump(units_EI_to_EX, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [29]:
with open(f'{SCENARIO_PATH}matching/units_EX_to_EI.pickle', 'wb') as handle:
    pickle.dump(units_EX_to_EI, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Part 2) Matching

# Matching Exiobase - Ecoinvent

In [30]:
pd.set_option('display.max_columns', 500)
pd.options.display.float_format = '{:.3g}'.format
from itertools import islice
import random

import helper_functions as hf

import matplotlib.pyplot as plt
%matplotlib inline  

# Load data

In [31]:
def load_pickle(filename):
    with open(filename, 'rb') as handle:
        data = pickle.load(handle)
        print(len(data))
        if isinstance(data, dict):
            print(list(islice(data.items(), 2)))
    return data

In [32]:
# for updating the matching dictionaries to the miot equivalent of the hiot names
def replace_dict_keys(original_dict, key_map):
    new_dict = {}
    for key, value in original_dict.items():
        new_dict.update({
            key_map[key]: value
        })
    return new_dict

def replace_dict_values(original_dict, key_map):
    new_dict = {}
    for key, values in original_dict.items():
        new_dict.update({
            #key: [key_map.get(v, None) for v in values]
            key: [key_map[v] for v in values]
        })
    return new_dict

#### Product and process matching (from Bridge matrices)

In [33]:
product_EX_to_EI = load_pickle(f'{SCENARIO_PATH}matching/product_EX_to_EI.pickle')

200
[('Paddy rice', ['rice', 'rice seed, for sowing']), ('Wheat', ['wheat grain', 'wheat grain, feed', 'wheat grain, feed, organic', 'wheat grain, feed, Swiss integrated production', 'wheat grain, organic', 'wheat grain, Swiss integrated production', 'wheat seed, for sowing', 'wheat seed, organic, for sowing', 'wheat seed, Swiss integrated production, for sowing'])]


In [34]:
product_EI_to_EX = load_pickle(f'{SCENARIO_PATH}matching/product_EI_to_EX.pickle')

2851
[('heat and power co-generation unit, 160kW electrical, common components for heat+electricity', ['Construction work']), ('heat and power co-generation unit, 1MW electrical, common components for heat+electricity', ['Construction work'])]


In [35]:
process_EX_to_EI = load_pickle(f'{SCENARIO_PATH}matching/process_EX_to_EI.pickle')

201
[('Paddy rice', []), ('Wheat', [])]


In [36]:
process_EI_to_EX = load_pickle(f'{SCENARIO_PATH}matching/process_EI_to_EX.pickle')

14889
[("('ecoinvent 3.4 cutoff', '60274947cf82e6633d8c8488b7029c34')", []), ("('ecoinvent 3.4 cutoff', '7dd298333a92144698cfa6ee16325dbc')", [])]


#### Geographical matching

In [37]:
geo_EI_to_EX = load_pickle(f'{SCENARIO_PATH}matching/geo_EI_to_EX.pickle')

52
[('WM', ['AE', 'BH', 'IAI Area, Gulf Cooperation Council', 'IL', 'IQ', 'IR', 'JO', 'KW', 'LB', 'OM', 'QA', 'RME', 'SA', 'SY', 'YE']), ('WE', ['AL', 'AM', 'BA', 'GI', 'IAI Area, EU27 & EFTA', 'IS', 'ME', 'MK', 'RS', 'UA', 'WEU', 'XK'])]


In [38]:
print("Excluding RoW!")
geo_EI_to_EX["ROW"].pop()

Excluding RoW!


'RoW'

In [39]:
geo_EX_to_EI = load_pickle(f'{SCENARIO_PATH}matching/geo_EX_to_EI.pickle')

261
[('AE', ['WM']), ('AL', ['WE'])]


#### Unit matching

In [40]:
units_EX_to_EI = load_pickle(f'{SCENARIO_PATH}matching/units_EX_to_EI.pickle')

3
[('kg', ['kg']), ('MJ', ['MJ'])]


In [41]:
units_EI_to_EX = load_pickle(f'{SCENARIO_PATH}matching/units_EI_to_EX.pickle')

3
[('kg', ['kg']), ('MJ', ['MJ', 'kWh'])]


# Load EX-EI Dataframes for matching

#### Load ecoinvent Dataframe

#### Add ISIC level 1-5 codes and descriptions to AO

In [42]:
ISIC = pd.read_excel(f"{DATA_PATH}matching/ISIC.xlsx")
ISIC = ISIC.astype(str)
ISIC['#activities'] = ISIC['#activities'].astype(int)
ISIC.rename(columns={"ISIC": "ISIC v4"}, inplace=True)
print(ISIC.shape)
ISIC.head(2)

(186, 21)


Unnamed: 0.1,Unnamed: 0,ISIC v4,#activities,Code (ecoinvent),Description (ecoinvent),ISIC level,Code 1,Code 2,Code 3,Code 4,Code 5,Description 1,Description 2,Description 3,Description 4,Description 5,ISIC 1,ISIC 2,ISIC 3,ISIC 4,ISIC 5
0,0,"0111:Growing of cereals (except rice), legumin...",205,111,"Growing of cereals (except rice), leguminous c...",4,A,1.0,11.0,111.0,,"Agriculture, forestry and fishing","Crop and animal production, hunting and relate...",Growing of non-perennial crops,"Growing of cereals (except rice), leguminous c...",,"A:Agriculture, forestry and fishing","01:Crop and animal production, hunting and rel...",011:Growing of non-perennial crops,"0111:Growing of cereals (except rice), legumin...",
1,1,0112:Growing of rice,8,112,Growing of rice,4,A,1.0,11.0,112.0,,"Agriculture, forestry and fishing","Crop and animal production, hunting and relate...",Growing of non-perennial crops,Growing of rice,,"A:Agriculture, forestry and fishing","01:Crop and animal production, hunting and rel...",011:Growing of non-perennial crops,0112:Growing of rice,


In [43]:
AO = pd.merge(AO, ISIC, on=["ISIC v4"], how="outer")
print(AO.shape)
AO.head(2)

(14889, 56)


Unnamed: 0.1,key,#,id,filename,activityName,geography,startDate,endDate,specialActivityType,technologyLevel,inheritance status,parentActivityId,tags,synonyms,ISIC v4,ISIC code,ISIC name,specialty production,constrained market,accessRestrictedTo,dataEntryBy,dataGenerator,isActiveAuthor,group,name,CPC,amount,unitName,productionVolumeAmount,absolute PV,productionVolumeComment,price,By-product classification,mft,functional unit,"('IPCC 2013', 'climate change', 'GWP 100a')",Unnamed: 0,#activities,Code (ecoinvent),Description (ecoinvent),ISIC level,Code 1,Code 2,Code 3,Code 4,Code 5,Description 1,Description 2,Description 3,Description 4,Description 5,ISIC 1,ISIC 2,ISIC 3,ISIC 4,ISIC 5
0,"('ecoinvent 3.4 cutoff', '60274947cf82e6633d8c...",2607,2d83c1a0-0c9b-4589-bb9b-6a81e815f5e2,2d83c1a0-0c9b-4589-bb9b-6a81e815f5e2_4c2b1cc3-...,[sulfonyl]urea-compound production,RER,2000-01-01,2017-12-31,ordinary transforming activity,Current,allocated dataset,6e91178f-f508-433c-b2bd-0d635a2da38c,ConvertedDataset,,2021:Manufacture of pesticides and other agroc...,2021,Manufacture of pesticides and other agrochemic...,True,,1,Jürgen Sutter,Jürgen Sutter,False,ReferenceProduct,[sulfonyl]urea-compound,"34663: Herbicides, anti-sprouting products and...",1,kg,4900000.0,True,Unknown\n\nThe addition of the consumption vol...,5.13,allocatable product,non-mft,1,10.7,71,145,2021,Manufacture of pesticides and other agrochemic...,4,C,20.0,202.0,2021.0,,Manufacturing,Manufacture of chemicals and chemical products,Manufacture of other chemical products,Manufacture of pesticides and other agrochemic...,,C:Manufacturing,20:Manufacture of chemicals and chemical products,202:Manufacture of other chemical products,2021:Manufacture of pesticides and other agroc...,
1,"('ecoinvent 3.4 cutoff', '7dd298333a92144698cf...",5307,59913008-58ba-4f1a-8bf2-86925bbd6005,59913008-58ba-4f1a-8bf2-86925bbd6005_4c2b1cc3-...,[sulfonyl]urea-compound production,RoW,2000-01-01,2017-12-31,ordinary transforming activity,Current,allocated dataset,fbd7e15f-077b-4db7-9e5d-8e574409a15d,ConvertedDataset,,2021:Manufacture of pesticides and other agroc...,2021,Manufacture of pesticides and other agrochemic...,True,,1,[System],Jürgen Sutter,False,ReferenceProduct,[sulfonyl]urea-compound,"34663: Herbicides, anti-sprouting products and...",1,kg,19900000.0,True,The addition of the consumption volumes in the...,5.13,allocatable product,non-mft,1,12.6,71,145,2021,Manufacture of pesticides and other agrochemic...,4,C,20.0,202.0,2021.0,,Manufacturing,Manufacture of chemicals and chemical products,Manufacture of other chemical products,Manufacture of pesticides and other agrochemic...,,C:Manufacturing,20:Manufacture of chemicals and chemical products,202:Manufacture of other chemical products,2021:Manufacture of pesticides and other agroc...,


In [44]:
# save to Excel
AO.to_excel(f"{DATA_PATH}ecoinvent/activity_overview_3.4_cut-off_LCIA_results_with_ISIC_details.xlsx")

#### Excluding markets

In [45]:
AO = AO[AO['specialActivityType'] == 'ordinary transforming activity']
print(AO.shape)

(10805, 56)


#### Excluding RoW

In [46]:
AO = AO[AO['geography'] != 'RoW']
print(AO.shape)

(7986, 56)


#### Load Exiobase DF

In [47]:
EX['CarbonFootprint'].replace(0, np.nan, inplace=True)  # replace 0 values with nan (otherwise mean, etc. are disturbed)
print(EX.shape)
EX.dropna(inplace=True)
print(EX.shape)
EX.head(2)

(8199, 10)
(7012, 10)


Unnamed: 0,#,CountryCode,ProductTypeName_of_hiot,product code 1,product code 2,OriginalPhysicalUnit,GHGs-eq - tonnes,PhysicalUnit,CarbonFootprint,unit
1,2,AU,Cultivation of wheat,c01.b,P_WHEA,tonnes,0.307,kg,0.307,kg CO2-eq/kg
2,3,AU,Cultivation of cereal grains nec,c01.c,P_OCER,tonnes,0.266,kg,0.266,kg CO2-eq/kg


### ProductTypeName is different in hiot; for consistency reasons with the miot, we convert the names back to the ones used in miot

In [48]:
pm = pd.read_excel(matching_file, sheet_name="miot_hiot")
print(pm.shape)
pm.head(2)

(200, 3)


Unnamed: 0,old_name,new_name,old_or_new
0,Paddy rice,Cultivation of paddy rice,old
1,Wheat,Cultivation of wheat,old


In [49]:
# construct a dictionary from new(hiot) to old(miot) names
hiot_to_miot_dict = {}
miot_to_hiot_dict = {}
miot_to_new_miot = {}
for index, row in pm.iterrows():
    hiot_to_miot_dict.update({
        row["new_name"]: row["new_name"] if row["old_or_new"] == "new" else row["old_name"]
    })
    miot_to_hiot_dict.update({
        row["old_name"]: row["new_name"]
    })
    miot_to_new_miot.update(
    {
        row["old_name"]: row["new_name"] if row["old_or_new"] == "new" else row["old_name"]
    })
#hiot_to_miot_dict
print("Hiot to miot:", len(set(hiot_to_miot_dict.keys())), len(set(hiot_to_miot_dict.values())))
print("Miot to hiot:", len(set(miot_to_hiot_dict.keys())), len(set(miot_to_hiot_dict.values())))
print("Miot to new miot:", len(set(miot_to_new_miot.keys())), len(set(miot_to_new_miot.values())))

Hiot to miot: 164 164
Miot to hiot: 200 164
Miot to new miot: 200 164


#### Save MIOT to HIOT matching dicts

In [50]:
with open(f'{DATA_PATH}matching/hiot_to_miot_dict.pickle', 'wb') as handle:
    pickle.dump(hiot_to_miot_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
with open(f'{DATA_PATH}matching/miot_to_hiot_dict.pickle', 'wb') as handle:
    pickle.dump(miot_to_hiot_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
with open(f'{DATA_PATH}matching/miot_to_new_miot.pickle', 'wb') as handle:
    pickle.dump(miot_to_new_miot, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [51]:
# apply this map to EX
hiot_names = EX["ProductTypeName_of_hiot"]
EX["ProductTypeName"] = hiot_names.map(hiot_to_miot_dict)
print(EX.shape)
EX.head(2)

(7012, 11)


Unnamed: 0,#,CountryCode,ProductTypeName_of_hiot,product code 1,product code 2,OriginalPhysicalUnit,GHGs-eq - tonnes,PhysicalUnit,CarbonFootprint,unit,ProductTypeName
1,2,AU,Cultivation of wheat,c01.b,P_WHEA,tonnes,0.307,kg,0.307,kg CO2-eq/kg,Wheat
2,3,AU,Cultivation of cereal grains nec,c01.c,P_OCER,tonnes,0.266,kg,0.266,kg CO2-eq/kg,Cereal grains nec


In [52]:
# save to Excel
EX.to_excel(f"{SCENARIO_PATH}exiobase/hiot_results.xlsx")

#### Update also the matching dictionaries with the new miot names

In [53]:
product_EI_to_EX = replace_dict_values(product_EI_to_EX, miot_to_new_miot)
print(len(product_EI_to_EX), list(islice(product_EI_to_EX.items(), 2)))
process_EI_to_EX = replace_dict_values(process_EI_to_EX, miot_to_new_miot)
print(len(process_EI_to_EX), list(islice(process_EI_to_EX.items(), 2)))

2851 [('heat and power co-generation unit, 160kW electrical, common components for heat+electricity', ['Construction work']), ('heat and power co-generation unit, 1MW electrical, common components for heat+electricity', ['Construction work'])]
14889 [("('ecoinvent 3.4 cutoff', '60274947cf82e6633d8c8488b7029c34')", []), ("('ecoinvent 3.4 cutoff', '7dd298333a92144698cfa6ee16325dbc')", [])]


# Match EX and EI dataframes (Pandas way)

1. Create lists of tuples from matched sectors, geographies and units.
1. Then make DFs from each list
1. Then merge each of theses DFs with EX
1. the resulting DF can be merged with EI based on "product OR process & geo & unit"

In [54]:
def tuples_from_dict(matching_dict):
    tuples = []
    for key, values in matching_dict.items():
        for value in values:
            tuples.append((key, value))
    return tuples

In [55]:
tuples_from_dict(geo_EI_to_EX)[:3]

[('WM', 'AE'), ('WM', 'BH'), ('WM', 'IAI Area, Gulf Cooperation Council')]

In [56]:
# Products
DF_product_matching = pd.DataFrame(tuples_from_dict(product_EI_to_EX), columns=["name", "ProductTypeName"])
print(DF_product_matching.shape)
DF_product_matching.head(2)

(2327, 2)


Unnamed: 0,name,ProductTypeName
0,"heat and power co-generation unit, 160kW elect...",Construction work
1,"heat and power co-generation unit, 1MW electri...",Construction work


In [57]:
# Processes
DF_process_matching = pd.DataFrame(tuples_from_dict(process_EI_to_EX), columns=["key", "ProductTypeName"])
print(DF_process_matching.shape)
DF_process_matching.head(2)

(1987, 2)


Unnamed: 0,key,ProductTypeName
0,"('ecoinvent 3.4 cutoff', '22772a024d350dc0101b...",Electricity by biomass and waste
1,"('ecoinvent 3.4 cutoff', '888c4aa6b7c44a3fec5e...",Electricity by biomass and waste


In [58]:
# Geographies
DF_geo_matching = pd.DataFrame(tuples_from_dict(geo_EI_to_EX), columns=["CountryCode", "geography"])
print(DF_geo_matching.shape)
DF_geo_matching.head(2)

(260, 2)


Unnamed: 0,CountryCode,geography
0,WM,AE
1,WM,BH


In [59]:
# Units
DF_unit_matching = pd.DataFrame(tuples_from_dict(units_EI_to_EX), columns=["PhysicalUnit", "unitName"])
print(DF_unit_matching.shape)
DF_unit_matching.head()

(3, 2)


Unnamed: 0,PhysicalUnit,unitName
0,kg,kg
1,MJ,MJ
2,MJ,kWh


# Merging...

In [60]:
EX_geo = pd.merge(EX, DF_geo_matching, on=["CountryCode"], how="inner")
print(EX_geo.shape)
EX_geo.head(2)

(38042, 12)


Unnamed: 0,#,CountryCode,ProductTypeName_of_hiot,product code 1,product code 2,OriginalPhysicalUnit,GHGs-eq - tonnes,PhysicalUnit,CarbonFootprint,unit,ProductTypeName,geography
0,2,AU,Cultivation of wheat,c01.b,P_WHEA,tonnes,0.307,kg,0.307,kg CO2-eq/kg,Wheat,AU
1,3,AU,Cultivation of cereal grains nec,c01.c,P_OCER,tonnes,0.266,kg,0.266,kg CO2-eq/kg,Cereal grains nec,AU


In [61]:
EX_geo_unit = pd.merge(EX_geo, DF_unit_matching, on=["PhysicalUnit"], how="inner")
print(EX_geo_unit.shape)
EX_geo_unit.head(2)

(31435, 13)


Unnamed: 0,#,CountryCode,ProductTypeName_of_hiot,product code 1,product code 2,OriginalPhysicalUnit,GHGs-eq - tonnes,PhysicalUnit,CarbonFootprint,unit,ProductTypeName,geography,unitName
0,2,AU,Cultivation of wheat,c01.b,P_WHEA,tonnes,0.307,kg,0.307,kg CO2-eq/kg,Wheat,AU,kg
1,3,AU,Cultivation of cereal grains nec,c01.c,P_OCER,tonnes,0.266,kg,0.266,kg CO2-eq/kg,Cereal grains nec,AU,kg


In [62]:
EX_geo_unit_product = pd.merge(EX_geo_unit, DF_product_matching, on=["ProductTypeName"], how="inner")
print(EX_geo_unit_product.shape)
EX_geo_unit_product.head(2)

(471019, 14)


Unnamed: 0,#,CountryCode,ProductTypeName_of_hiot,product code 1,product code 2,OriginalPhysicalUnit,GHGs-eq - tonnes,PhysicalUnit,CarbonFootprint,unit,ProductTypeName,geography,unitName,name
0,2,AU,Cultivation of wheat,c01.b,P_WHEA,tonnes,0.307,kg,0.307,kg CO2-eq/kg,Wheat,AU,kg,wheat grain
1,2,AU,Cultivation of wheat,c01.b,P_WHEA,tonnes,0.307,kg,0.307,kg CO2-eq/kg,Wheat,AU,kg,"wheat grain, feed"


In [63]:
EX_geo_unit_process = pd.merge(EX_geo_unit, DF_process_matching, on=["ProductTypeName"], how="inner")
print(EX_geo_unit_process.shape)
EX_geo_unit_process.head(2)

(936452, 14)


Unnamed: 0,#,CountryCode,ProductTypeName_of_hiot,product code 1,product code 2,OriginalPhysicalUnit,GHGs-eq - tonnes,PhysicalUnit,CarbonFootprint,unit,ProductTypeName,geography,unitName,key
0,96,AU,Production of electricity by coal,c40.11.a,P_POWC,TJ,361,MJ,0.361,kg CO2-eq/MJ,Electricity by coal,AU,MJ,"('ecoinvent 3.4 cutoff', '58353379c2ac55bb6bb0..."
1,96,AU,Production of electricity by coal,c40.11.a,P_POWC,TJ,361,MJ,0.361,kg CO2-eq/MJ,Electricity by coal,AU,MJ,"('ecoinvent 3.4 cutoff', '6f335a441b837359e503..."


In [64]:
EX_geo_unit_process["PhysicalUnit"].unique()

array(['MJ'], dtype=object)

## Matching (the Pandas way)

In [65]:
M1 = pd.merge(EX_geo_unit_product, AO, on=["geography", "name", "unitName"], how="inner")
M1.apply(str)
numeric_columns = ["CarbonFootprint", "('IPCC 2013', 'climate change', 'GWP 100a')"]
M1[numeric_columns] = M1[numeric_columns].apply(pd.to_numeric)
print(M1.shape)
M1.head(2)

(2812, 67)


Unnamed: 0.1,#_x,CountryCode,ProductTypeName_of_hiot,product code 1,product code 2,OriginalPhysicalUnit,GHGs-eq - tonnes,PhysicalUnit,CarbonFootprint,unit,ProductTypeName,geography,unitName,name,key,#_y,id,filename,activityName,startDate,endDate,specialActivityType,technologyLevel,inheritance status,parentActivityId,tags,synonyms,ISIC v4,ISIC code,ISIC name,specialty production,constrained market,accessRestrictedTo,dataEntryBy,dataGenerator,isActiveAuthor,group,CPC,amount,productionVolumeAmount,absolute PV,productionVolumeComment,price,By-product classification,mft,functional unit,"('IPCC 2013', 'climate change', 'GWP 100a')",Unnamed: 0,#activities,Code (ecoinvent),Description (ecoinvent),ISIC level,Code 1,Code 2,Code 3,Code 4,Code 5,Description 1,Description 2,Description 3,Description 4,Description 5,ISIC 1,ISIC 2,ISIC 3,ISIC 4,ISIC 5
0,2,AU,Cultivation of wheat,c01.b,P_WHEA,tonnes,0.307,kg,0.307,kg CO2-eq/kg,Wheat,AU,kg,wheat grain,"('ecoinvent 3.4 cutoff', 'd0bd9c43e8becfcc1a3d...",3759,40d2fd8e-c1c6-421a-a7e7-0723c4599f56,40d2fd8e-c1c6-421a-a7e7-0723c4599f56_0201ee86-...,wheat production,2009-01-01,2017-12-31,ordinary transforming activity,Current,allocated dataset,3be71d91-dd7a-48f2-80a1-49cdea26814c,WFLDB,,"0111:Growing of cereals (except rice), legumin...",111,"Growing of cereals (except rice), leguminous c...",False,,1,Patrik Mouron,Eliane Riedener,True,ReferenceProduct,"01112: Wheat, other",1,25300000000.0,True,"Source: FAOSTAT. Average, 2009-2012",0.131,allocatable product,non-mft,1,1.64,0,205,111,"Growing of cereals (except rice), leguminous c...",4,A,1.0,11.0,111.0,,"Agriculture, forestry and fishing","Crop and animal production, hunting and relate...",Growing of non-perennial crops,"Growing of cereals (except rice), leguminous c...",,"A:Agriculture, forestry and fishing","01:Crop and animal production, hunting and rel...",011:Growing of non-perennial crops,"0111:Growing of cereals (except rice), legumin...",
1,822,CA,Cultivation of wheat,c01.b,P_WHEA,tonnes,0.327,kg,0.327,kg CO2-eq/kg,Wheat,Canada without Quebec,kg,wheat grain,"('ecoinvent 3.4 cutoff', 'f053a42fddfcd4278d97...",14158,f34462a8-dcd0-4706-ac4b-aa748b36a0f4,f34462a8-dcd0-4706-ac4b-aa748b36a0f4_0201ee86-...,wheat production,2009-01-01,2017-12-31,ordinary transforming activity,Current,allocated dataset,16b1f765-0dd1-4660-841c-efa7c6a5a8cd,WFLDB,,"0111:Growing of cereals (except rice), legumin...",111,"Growing of cereals (except rice), leguminous c...",False,,1,Patrik Mouron,Eliane Riedener,True,ReferenceProduct,"01112: Wheat, other",1,25600000000.0,True,"Source: FAOSTAT. Average, 2009-2012",0.131,allocatable product,non-mft,1,0.503,0,205,111,"Growing of cereals (except rice), leguminous c...",4,A,1.0,11.0,111.0,,"Agriculture, forestry and fishing","Crop and animal production, hunting and relate...",Growing of non-perennial crops,"Growing of cereals (except rice), leguminous c...",,"A:Agriculture, forestry and fishing","01:Crop and animal production, hunting and rel...",011:Growing of non-perennial crops,"0111:Growing of cereals (except rice), legumin...",


In [66]:
M2 = pd.merge(EX_geo_unit_process, AO, on=["geography", "key", "unitName"], how="inner")
M2.apply(str)
numeric_columns = ["CarbonFootprint", "('IPCC 2013', 'climate change', 'GWP 100a')"]
M2[numeric_columns] = M2[numeric_columns].apply(pd.to_numeric)
print(M2.shape)
M2.head(2)

(1755, 67)


Unnamed: 0.1,#_x,CountryCode,ProductTypeName_of_hiot,product code 1,product code 2,OriginalPhysicalUnit,GHGs-eq - tonnes,PhysicalUnit,CarbonFootprint,unit,ProductTypeName,geography,unitName,key,#_y,id,filename,activityName,startDate,endDate,specialActivityType,technologyLevel,inheritance status,parentActivityId,tags,synonyms,ISIC v4,ISIC code,ISIC name,specialty production,constrained market,accessRestrictedTo,dataEntryBy,dataGenerator,isActiveAuthor,group,name,CPC,amount,productionVolumeAmount,absolute PV,productionVolumeComment,price,By-product classification,mft,functional unit,"('IPCC 2013', 'climate change', 'GWP 100a')",Unnamed: 0,#activities,Code (ecoinvent),Description (ecoinvent),ISIC level,Code 1,Code 2,Code 3,Code 4,Code 5,Description 1,Description 2,Description 3,Description 4,Description 5,ISIC 1,ISIC 2,ISIC 3,ISIC 4,ISIC 5
0,96,AU,Production of electricity by coal,c40.11.a,P_POWC,TJ,361,MJ,0.361,kg CO2-eq/MJ,Electricity by coal,AU,kWh,"('ecoinvent 3.4 cutoff', 'c4d3dcc31c97a01ef7b1...",14092,f2513c58-268c-4b43-a617-f227ca3ac5a1,f2513c58-268c-4b43-a617-f227ca3ac5a1_66c93e71-...,"electricity production, hard coal",1980-01-01,2017-12-31,ordinary transforming activity,Modern,allocated dataset,24731d86-38a5-40a5-a949-9b213556ac7f,hard coal power\n fossil fuels\n coal power,coking coal\n anthracite\n other bituminous coal,"3510:Electric power generation, transmission a...",3510,"Electric power generation, transmission and di...",True,,1,Karin Treyer,Karin Treyer,True,ReferenceProduct,"electricity, high voltage",17100: Electrical energy,1,30400000000.0,True,Calculated value,0.0977,allocatable product,non-mft,1,1.11,138,2817,3510,"Electric power generation, transmission and di...",4,D,35.0,351.0,3510.0,,"Electricity, gas, steam and air conditioning s...","Electricity, gas, steam and air conditioning s...","Electric power generation, transmission and di...","Electric power generation, transmission and di...",,"D:Electricity, gas, steam and air conditioning...","35:Electricity, gas, steam and air conditionin...","351:Electric power generation, transmission an...","3510:Electric power generation, transmission a...",
1,96,AU,Production of electricity by coal,c40.11.a,P_POWC,TJ,361,MJ,0.361,kg CO2-eq/MJ,Electricity by coal,AU,kWh,"('ecoinvent 3.4 cutoff', '3a54ec6f33bd5af8782a...",11601,c6d76656-4cdb-4efd-82a2-95ee3a5f67ab,c6d76656-4cdb-4efd-82a2-95ee3a5f67ab_66c93e71-...,"electricity production, lignite",1980-01-01,2017-12-31,ordinary transforming activity,Modern,allocated dataset,9ec806d6-e465-4995-b573-980122e5b816,coal power\n lignite power\n fossil fuels\n,brown coal\n sub-bituminous coal,"3510:Electric power generation, transmission a...",3510,"Electric power generation, transmission and di...",True,,1,Karin Treyer,Karin Treyer,True,ReferenceProduct,"electricity, high voltage",17100: Electrical energy,1,128000000000.0,True,Calculated value,0.0977,allocatable product,non-mft,1,1.24,138,2817,3510,"Electric power generation, transmission and di...",4,D,35.0,351.0,3510.0,,"Electricity, gas, steam and air conditioning s...","Electricity, gas, steam and air conditioning s...","Electric power generation, transmission and di...","Electric power generation, transmission and di...",,"D:Electricity, gas, steam and air conditioning...","35:Electricity, gas, steam and air conditionin...","351:Electric power generation, transmission an...","3510:Electric power generation, transmission a...",


In [67]:
M = pd.concat([M1, M2], sort=False)
M = M.reset_index()
print(M.shape)
M.head(2)

(4567, 68)


Unnamed: 0.1,index,#_x,CountryCode,ProductTypeName_of_hiot,product code 1,product code 2,OriginalPhysicalUnit,GHGs-eq - tonnes,PhysicalUnit,CarbonFootprint,unit,ProductTypeName,geography,unitName,name,key,#_y,id,filename,activityName,startDate,endDate,specialActivityType,technologyLevel,inheritance status,parentActivityId,tags,synonyms,ISIC v4,ISIC code,ISIC name,specialty production,constrained market,accessRestrictedTo,dataEntryBy,dataGenerator,isActiveAuthor,group,CPC,amount,productionVolumeAmount,absolute PV,productionVolumeComment,price,By-product classification,mft,functional unit,"('IPCC 2013', 'climate change', 'GWP 100a')",Unnamed: 0,#activities,Code (ecoinvent),Description (ecoinvent),ISIC level,Code 1,Code 2,Code 3,Code 4,Code 5,Description 1,Description 2,Description 3,Description 4,Description 5,ISIC 1,ISIC 2,ISIC 3,ISIC 4,ISIC 5
0,0,2,AU,Cultivation of wheat,c01.b,P_WHEA,tonnes,0.307,kg,0.307,kg CO2-eq/kg,Wheat,AU,kg,wheat grain,"('ecoinvent 3.4 cutoff', 'd0bd9c43e8becfcc1a3d...",3759,40d2fd8e-c1c6-421a-a7e7-0723c4599f56,40d2fd8e-c1c6-421a-a7e7-0723c4599f56_0201ee86-...,wheat production,2009-01-01,2017-12-31,ordinary transforming activity,Current,allocated dataset,3be71d91-dd7a-48f2-80a1-49cdea26814c,WFLDB,,"0111:Growing of cereals (except rice), legumin...",111,"Growing of cereals (except rice), leguminous c...",False,,1,Patrik Mouron,Eliane Riedener,True,ReferenceProduct,"01112: Wheat, other",1,25300000000.0,True,"Source: FAOSTAT. Average, 2009-2012",0.131,allocatable product,non-mft,1,1.64,0,205,111,"Growing of cereals (except rice), leguminous c...",4,A,1.0,11.0,111.0,,"Agriculture, forestry and fishing","Crop and animal production, hunting and relate...",Growing of non-perennial crops,"Growing of cereals (except rice), leguminous c...",,"A:Agriculture, forestry and fishing","01:Crop and animal production, hunting and rel...",011:Growing of non-perennial crops,"0111:Growing of cereals (except rice), legumin...",
1,1,822,CA,Cultivation of wheat,c01.b,P_WHEA,tonnes,0.327,kg,0.327,kg CO2-eq/kg,Wheat,Canada without Quebec,kg,wheat grain,"('ecoinvent 3.4 cutoff', 'f053a42fddfcd4278d97...",14158,f34462a8-dcd0-4706-ac4b-aa748b36a0f4,f34462a8-dcd0-4706-ac4b-aa748b36a0f4_0201ee86-...,wheat production,2009-01-01,2017-12-31,ordinary transforming activity,Current,allocated dataset,16b1f765-0dd1-4660-841c-efa7c6a5a8cd,WFLDB,,"0111:Growing of cereals (except rice), legumin...",111,"Growing of cereals (except rice), leguminous c...",False,,1,Patrik Mouron,Eliane Riedener,True,ReferenceProduct,"01112: Wheat, other",1,25600000000.0,True,"Source: FAOSTAT. Average, 2009-2012",0.131,allocatable product,non-mft,1,0.503,0,205,111,"Growing of cereals (except rice), leguminous c...",4,A,1.0,11.0,111.0,,"Agriculture, forestry and fishing","Crop and animal production, hunting and relate...",Growing of non-perennial crops,"Growing of cereals (except rice), leguminous c...",,"A:Agriculture, forestry and fishing","01:Crop and animal production, hunting and rel...",011:Growing of non-perennial crops,"0111:Growing of cereals (except rice), legumin...",


In [68]:
# convert MJ to kWh 
index_kWh = M.loc[M.unitName == "kWh"].index
#print(M.loc[317])
print("Converted", len(index_kWh), "values from MJ to kWh.")
M["CarbonFootprint"].loc[index_kWh] = M["CarbonFootprint"].loc[index_kWh] * 3.6
M.head(2)

Converted 1755 values from MJ to kWh.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Unnamed: 0.1,index,#_x,CountryCode,ProductTypeName_of_hiot,product code 1,product code 2,OriginalPhysicalUnit,GHGs-eq - tonnes,PhysicalUnit,CarbonFootprint,unit,ProductTypeName,geography,unitName,name,key,#_y,id,filename,activityName,startDate,endDate,specialActivityType,technologyLevel,inheritance status,parentActivityId,tags,synonyms,ISIC v4,ISIC code,ISIC name,specialty production,constrained market,accessRestrictedTo,dataEntryBy,dataGenerator,isActiveAuthor,group,CPC,amount,productionVolumeAmount,absolute PV,productionVolumeComment,price,By-product classification,mft,functional unit,"('IPCC 2013', 'climate change', 'GWP 100a')",Unnamed: 0,#activities,Code (ecoinvent),Description (ecoinvent),ISIC level,Code 1,Code 2,Code 3,Code 4,Code 5,Description 1,Description 2,Description 3,Description 4,Description 5,ISIC 1,ISIC 2,ISIC 3,ISIC 4,ISIC 5
0,0,2,AU,Cultivation of wheat,c01.b,P_WHEA,tonnes,0.307,kg,0.307,kg CO2-eq/kg,Wheat,AU,kg,wheat grain,"('ecoinvent 3.4 cutoff', 'd0bd9c43e8becfcc1a3d...",3759,40d2fd8e-c1c6-421a-a7e7-0723c4599f56,40d2fd8e-c1c6-421a-a7e7-0723c4599f56_0201ee86-...,wheat production,2009-01-01,2017-12-31,ordinary transforming activity,Current,allocated dataset,3be71d91-dd7a-48f2-80a1-49cdea26814c,WFLDB,,"0111:Growing of cereals (except rice), legumin...",111,"Growing of cereals (except rice), leguminous c...",False,,1,Patrik Mouron,Eliane Riedener,True,ReferenceProduct,"01112: Wheat, other",1,25300000000.0,True,"Source: FAOSTAT. Average, 2009-2012",0.131,allocatable product,non-mft,1,1.64,0,205,111,"Growing of cereals (except rice), leguminous c...",4,A,1.0,11.0,111.0,,"Agriculture, forestry and fishing","Crop and animal production, hunting and relate...",Growing of non-perennial crops,"Growing of cereals (except rice), leguminous c...",,"A:Agriculture, forestry and fishing","01:Crop and animal production, hunting and rel...",011:Growing of non-perennial crops,"0111:Growing of cereals (except rice), legumin...",
1,1,822,CA,Cultivation of wheat,c01.b,P_WHEA,tonnes,0.327,kg,0.327,kg CO2-eq/kg,Wheat,Canada without Quebec,kg,wheat grain,"('ecoinvent 3.4 cutoff', 'f053a42fddfcd4278d97...",14158,f34462a8-dcd0-4706-ac4b-aa748b36a0f4,f34462a8-dcd0-4706-ac4b-aa748b36a0f4_0201ee86-...,wheat production,2009-01-01,2017-12-31,ordinary transforming activity,Current,allocated dataset,16b1f765-0dd1-4660-841c-efa7c6a5a8cd,WFLDB,,"0111:Growing of cereals (except rice), legumin...",111,"Growing of cereals (except rice), leguminous c...",False,,1,Patrik Mouron,Eliane Riedener,True,ReferenceProduct,"01112: Wheat, other",1,25600000000.0,True,"Source: FAOSTAT. Average, 2009-2012",0.131,allocatable product,non-mft,1,0.503,0,205,111,"Growing of cereals (except rice), leguminous c...",4,A,1.0,11.0,111.0,,"Agriculture, forestry and fishing","Crop and animal production, hunting and relate...",Growing of non-perennial crops,"Growing of cereals (except rice), leguminous c...",,"A:Agriculture, forestry and fishing","01:Crop and animal production, hunting and rel...",011:Growing of non-perennial crops,"0111:Growing of cereals (except rice), legumin...",


In [69]:
# drop rows where EX CarbonFootprint == 0
ind = M.loc[M["CarbonFootprint"] == 0].index
print("Excluding", len(ind), "'zero' values.")
M.drop(ind, inplace=True)
print(M.shape)

Excluding 0 'zero' values.
(4567, 68)


## Add relative deviation and difference

In [70]:
M["diff"] = M["CarbonFootprint"] - M["('IPCC 2013', 'climate change', 'GWP 100a')"]
M["dev"] = M["('IPCC 2013', 'climate change', 'GWP 100a')"]/M["CarbonFootprint"] - 1.0

# Save matching results

In [71]:
M.columns

Index(['index', '#_x', 'CountryCode', 'ProductTypeName_of_hiot',
       'product code 1', 'product code 2', 'OriginalPhysicalUnit',
       'GHGs-eq - tonnes', 'PhysicalUnit', 'CarbonFootprint', 'unit',
       'ProductTypeName', 'geography', 'unitName', 'name', 'key', '#_y', 'id',
       'filename', 'activityName', 'startDate', 'endDate',
       'specialActivityType', 'technologyLevel', 'inheritance status',
       'parentActivityId', 'tags', 'synonyms', 'ISIC v4', 'ISIC code',
       'ISIC name', 'specialty production', 'constrained market',
       'accessRestrictedTo', 'dataEntryBy', 'dataGenerator', 'isActiveAuthor',
       'group', 'CPC', 'amount', 'productionVolumeAmount', 'absolute PV',
       'productionVolumeComment', 'price', 'By-product classification', 'mft',
       'functional unit', '('IPCC 2013', 'climate change', 'GWP 100a')',
       'Unnamed: 0', '#activities', 'Code (ecoinvent)',
       'Description (ecoinvent)', 'ISIC level', 'Code 1', 'Code 2', 'Code 3',
       'Code 

In [72]:
reduced_index = ["key",
"functional unit",
"unitName",
"PhysicalUnit",
"ProductTypeName_of_hiot",
"unit",
"ISIC 1",
"ISIC 2",
"ISIC 3",
"ISIC 4",
"ISIC v4",
"CPC",
"activityName",
"name",
"ProductTypeName",
"geography",
"CountryCode",
"('IPCC 2013', 'climate change', 'GWP 100a')",
"CarbonFootprint",
"diff",
"dev",]

In [73]:
# save EXCEL
hf.append_df_to_excel(M[reduced_index], 
                      template=f"{DATA_PATH}matching_results/template_matching.xlsx",
                      filename=f"{SCENARIO_PATH}matching_results/EItoEX_matching_results_HIOT_manual.xlsx", 
                      sheet_name='data',
                      truncate_sheet=False,
                      startrow=1)