In [1]:
%matplotlib inline
import numpy as np
import matplotlib as plt
import pandas as pd
import pymrio
import os
from requests import RequestException
import csv
import re
import json
import copy
import ast
import operator
import math
import time
from collections import Counter

# Data 

A.txt files contain MRIOT, representing the financial expenditures (in Euros) from each industry for each industry. Provides Euros of output from row industries to produce an Euro of output in a column industry

F.txt contain the CO2 - combustion - air values for each CountryxIndustry. We only care about the 3 carbon-intensive industries for this: coal mining, petroleum and natural gas. This allows for calculation using the Tier 1 method of carbon direct emissions.

## This Notebook largely uses pymrio. documentation can be found at [this link](https://pymrio.readthedocs.io/en/latest/)

Due to complexity, this file will be run separately for each year to obtain EXIOBASE data. Every time this file runs, change year parameters and data files to the appropriate year. In OrganizeTrade, the data across all years will be concatenated.

In [2]:
exio3 = pymrio.parse_exiobase3(path='../Data/2_Carbon/exio3/ixi_zip/IOT_2010_ixi.zip')

### Below is for a preview of data

In [3]:
# exio3.calc_all()

In [4]:
# emissions_df = exio3.satellite.D_cba
# emissions_df

In [5]:
# co2_df  = emissions_df[emissions_df.index.isin(['CO2 - combustion - air'])]

# constant = 1000000
# scaled_co2_df = co2_df[co2_df.columns].apply(lambda x: x/constant)
# scaled_co2_df

# scaled_co2_df gives us CO2 per euros in df

In [6]:
# scaled_co2_df.to_csv('../Data/2_Carbon/co2_rates_for_testing.csv')

# Aggregation: Finding Carbon Emissions for Each Industry X Country in Current Year

Satellite includes a row for CO2 combustion that represents total direct emissions of a product in a country in this current year.

In [1]:
# Check to see available extension tables with parsed exio3

list(exio3.get_extensions())

NameError: name 'exio3' is not defined

In [8]:
exio3.satellite.F

region,AT,AT,AT,AT,AT,AT,AT,AT,AT,AT,...,WM,WM,WM,WM,WM,WM,WM,WM,WM,WM
sector,Cultivation of paddy rice,Cultivation of wheat,Cultivation of cereal grains nec,"Cultivation of vegetables, fruit, nuts",Cultivation of oil seeds,"Cultivation of sugar cane, sugar beet",Cultivation of plant-based fibers,Cultivation of crops nec,Cattle farming,Pigs farming,...,Landfill of waste: Paper,Landfill of waste: Plastic,Landfill of waste: Inert/metal/hazardous,Landfill of waste: Textiles,Landfill of waste: Wood,Activities of membership organisation n.e.c. (91),"Recreational, cultural and sporting activities (92)",Other service activities (93),Private households with employed persons (95),Extra-territorial organizations and bodies
Taxes less subsidies on products purchased: Total,0,5.405968,9.526391,11.205557,1.764674,0.880975,0.000077,0.070475,50.709219,16.205758,...,8.713773,2.787140,10.834208,1.378793,1.736802,774.807975,2324.240401,1367.099948,52.371257,0
Other net taxes on production,0,-11.312479,-21.653771,-45.004973,-7.904881,-2.834377,-0.001119,-0.207311,-415.284600,-15.066253,...,8.115794,2.758403,8.686413,0.003388,0.004579,52.988132,765.302585,274.359717,34.819091,0
"Compensation of employees; wages, salaries, & employers' social contributions: Low-skilled",0,1.029400,1.866267,4.001617,0.358110,0.284684,0.000099,0.008294,3.802419,2.644141,...,16.542882,9.569576,16.646704,3.743383,5.099373,615.451943,1100.412318,566.113422,1187.679905,0
"Compensation of employees; wages, salaries, & employers' social contributions: Medium-skilled",0,14.193025,25.731467,55.172962,4.937506,3.925124,0.001359,0.114361,52.426482,36.456532,...,105.643251,61.111549,106.306261,23.905333,32.564722,1356.316218,2427.410877,1247.585331,957.626264,0
"Compensation of employees; wages, salaries, & employers' social contributions: High-skilled",0,2.256036,4.090116,8.769955,0.784836,0.623914,0.000216,0.018178,8.333391,5.794906,...,57.292208,33.141876,57.651770,12.964286,17.660426,1321.808829,2382.940361,1215.844273,130.743533,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Energy Carrier Net TMAR,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0
Energy Carrier Net TOTH,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0
Energy Carrier Net TRAI,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0
Energy Carrier Net TROA,0,1234.000000,200.000000,525.000000,28.000000,79.000000,0.000000,15.000000,1198.000000,1543.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0


#### diag_stressor takes one row of the F matrix and diagonalize to the full region/sector format. Footprints calculation based on this matrix show the flow of embodied stressors from the source region/sector (row index) to the final consumer (column index).

In [9]:
co2_diag = exio3.satellite.diag_stressor(('CO2 - combustion - air'), name = 'co2_diag')
co2_diag.F.head(10)

Unnamed: 0_level_0,region,AT,AT,AT,AT,AT,AT,AT,AT,AT,AT,...,WM,WM,WM,WM,WM,WM,WM,WM,WM,WM
Unnamed: 0_level_1,sector,Cultivation of paddy rice,Cultivation of wheat,Cultivation of cereal grains nec,"Cultivation of vegetables, fruit, nuts",Cultivation of oil seeds,"Cultivation of sugar cane, sugar beet",Cultivation of plant-based fibers,Cultivation of crops nec,Cattle farming,Pigs farming,...,Landfill of waste: Paper,Landfill of waste: Plastic,Landfill of waste: Inert/metal/hazardous,Landfill of waste: Textiles,Landfill of waste: Wood,Activities of membership organisation n.e.c. (91),"Recreational, cultural and sporting activities (92)",Other service activities (93),Private households with employed persons (95),Extra-territorial organizations and bodies
region,sector,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
AT,Cultivation of paddy rice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AT,Cultivation of wheat,0.0,172299900.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AT,Cultivation of cereal grains nec,0.0,0.0,189433400.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AT,"Cultivation of vegetables, fruit, nuts",0.0,0.0,0.0,112341500.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AT,Cultivation of oil seeds,0.0,0.0,0.0,0.0,29032120.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AT,"Cultivation of sugar cane, sugar beet",0.0,0.0,0.0,0.0,0.0,10079390.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AT,Cultivation of plant-based fibers,0.0,0.0,0.0,0.0,0.0,0.0,10442.111422,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AT,Cultivation of crops nec,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1058252.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AT,Cattle farming,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,177476000.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AT,Pigs farming,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67829820.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### calc_all() calculates missing parts of the IOSystem and all extensions.

In [10]:
exio3.co2_diag = co2_diag
exio3.calc_all()

<pymrio.core.mriosystem.IOSystem at 0x7ff47ecd07c0>

In [11]:
exio3.co2_diag.D_cba.head()

Unnamed: 0_level_0,region,AT,AT,AT,AT,AT,AT,AT,AT,AT,AT,...,WM,WM,WM,WM,WM,WM,WM,WM,WM,WM
Unnamed: 0_level_1,sector,Cultivation of paddy rice,Cultivation of wheat,Cultivation of cereal grains nec,"Cultivation of vegetables, fruit, nuts",Cultivation of oil seeds,"Cultivation of sugar cane, sugar beet",Cultivation of plant-based fibers,Cultivation of crops nec,Cattle farming,Pigs farming,...,Landfill of waste: Paper,Landfill of waste: Plastic,Landfill of waste: Inert/metal/hazardous,Landfill of waste: Textiles,Landfill of waste: Wood,Activities of membership organisation n.e.c. (91),"Recreational, cultural and sporting activities (92)",Other service activities (93),Private households with employed persons (95),Extra-territorial organizations and bodies
region,sector,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
AT,Cultivation of paddy rice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AT,Cultivation of wheat,1.173052,20355090.0,369.9475,2221.641,118.3119,8.343994,-0.011375,59.334036,970112.7,240936.055164,...,7618.766118,5683.851721,7132.877352,2152.809307,2881.124623,40672.085521,53013.521758,46968.144238,7951.438497,0.0
AT,Cultivation of cereal grains nec,1.370299,165.7113,33881820.0,2951.648,140.3405,11.162788,0.542502,40.70686,1388490.0,726004.177479,...,12002.348882,9146.89438,11273.574836,3425.266655,4608.955313,68739.557577,83645.680489,75751.946871,13554.590226,0.0
AT,"Cultivation of vegetables, fruit, nuts",0.091968,36.81902,101.3928,61030870.0,44.06956,2.380651,-0.362994,1.875088,56738.85,44526.552158,...,23.437096,21.665972,31.179882,7.059208,10.381366,1896.269092,3478.995653,3364.938948,84.063259,0.0
AT,Cultivation of oil seeds,0.196996,18.98058,44.28468,235.5713,5750042.0,0.753903,0.113439,2.780372,12821.34,10460.18079,...,49.03944,18.861705,56.843953,4.685376,6.54724,1080.410593,2010.193215,2000.805833,72.412581,0.0


In [12]:
df_with_co2 = exio3.co2_diag.D_cba.groupby(level='sector', axis=0).sum()
df_with_co2.head(40)

region,AT,AT,AT,AT,AT,AT,AT,AT,AT,AT,...,WM,WM,WM,WM,WM,WM,WM,WM,WM,WM
sector,Cultivation of paddy rice,Cultivation of wheat,Cultivation of cereal grains nec,"Cultivation of vegetables, fruit, nuts",Cultivation of oil seeds,"Cultivation of sugar cane, sugar beet",Cultivation of plant-based fibers,Cultivation of crops nec,Cattle farming,Pigs farming,...,Landfill of waste: Paper,Landfill of waste: Plastic,Landfill of waste: Inert/metal/hazardous,Landfill of waste: Textiles,Landfill of waste: Wood,Activities of membership organisation n.e.c. (91),"Recreational, cultural and sporting activities (92)",Other service activities (93),Private households with employed persons (95),Extra-territorial organizations and bodies
sector,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Activities auxiliary to financial intermediation (67),493.0548,10975.08,23516.83,276837.1,119992.5,441.3946,11596.52,9972.731,18411.8,9547.614779,...,190174.9,86628.39,197189.4,34104.05,46200.42,147682800.0,61435250.0,24884360.0,7636042.0,0.0
Activities of membership organisation n.e.c. (91),7.910753,66.40049,182.6354,2180.882,878.7851,5.211545,36.93669,130.459,202.0285,117.019748,...,970.3157,334.3115,1128.794,-205.6803,3.505697,220653800.0,67303.71,38745.86,7155.899,0.0
Air transport (62),14236.28,230967.6,561659.8,4727414.0,684998.8,18177.28,-65844.64,163960.1,1173272.0,692690.387185,...,9240736.0,5563661.0,9715127.0,4524484.0,5355719.0,425116300.0,1156263000.0,182246000.0,33043910.0,0.0
Aluminium production,669.7324,13554.36,31044.64,349554.9,38861.33,388.6565,3985.436,21624.2,44013.32,20810.78398,...,145181.3,76073.52,149148.0,40927.88,51175.86,3965317.0,7675996.0,6662439.0,806489.4,0.0
Animal products nec,135.8293,725.8518,1508.664,13400.17,-1437.801,26.13665,114.7377,1386.903,3431.863,-746.598084,...,7930.724,4325.263,9515.78,2658.395,2948.775,391798.7,568664.2,1429118.0,24694.64,0.0
"Biogasification of food waste, incl. land application",0.2906147,32.58386,68.67794,1391.552,75.76369,2.833179,6.169287,11.55473,136.4749,104.043248,...,453.5664,57.06898,3262.582,28.21343,36.41034,1892.777,3531.644,3768.27,254.9663,0.0
"Biogasification of paper, incl. land application",2.218252,20.01738,33.45035,361.8622,-205.4631,0.2973343,43.46505,20.19492,39.24299,30.081364,...,464.1887,264.0938,773.9218,318.4822,297.6584,8193.952,12707.71,121011.1,1092.952,0.0
"Biogasification of sewage slugde, incl. land application",4.312359,714.2463,1111.584,7080.769,2602.572,21.02738,20.19814,181.3809,853.5556,557.796885,...,6980.886,1716.789,159604.9,1058.844,1259.024,134052.4,594734.0,337718.0,10595.27,0.0
Casting of metals,939.0085,66999.12,130764.2,926064.6,209081.2,1764.373,1587.243,40164.64,170131.8,77997.438553,...,225200.9,110906.2,242749.4,52498.68,70821.69,4230943.0,6973777.0,6005599.0,1108046.0,0.0
Cattle farming,332.007,3441.331,10601.6,68358.94,436.8248,226.6025,1006.47,15431.57,33573780.0,135599.969733,...,991449.7,660074.2,1035320.0,497409.8,581076.4,17856670.0,21528530.0,17364200.0,4315772.0,0.0


#### I want to focus on the three industries that have the highest carbon emissions contributions, according to the Method 1. However, this can be changed later on in the process of my research if I want to use ALL carbon emissions contributions.

In [13]:
co2_final_df_three_industries  = df_with_co2[df_with_co2.index.isin(['Mining of coal and lignite; extraction of peat (10)',
                                                   'Extraction of crude petroleum and services related to crude oil extraction, excluding surveying',
                                                   'Extraction of natural gas and services related to natural gas extraction, excluding surveying'])]
co2_final_df_three_industries

region,AT,AT,AT,AT,AT,AT,AT,AT,AT,AT,...,WM,WM,WM,WM,WM,WM,WM,WM,WM,WM
sector,Cultivation of paddy rice,Cultivation of wheat,Cultivation of cereal grains nec,"Cultivation of vegetables, fruit, nuts",Cultivation of oil seeds,"Cultivation of sugar cane, sugar beet",Cultivation of plant-based fibers,Cultivation of crops nec,Cattle farming,Pigs farming,...,Landfill of waste: Paper,Landfill of waste: Plastic,Landfill of waste: Inert/metal/hazardous,Landfill of waste: Textiles,Landfill of waste: Wood,Activities of membership organisation n.e.c. (91),"Recreational, cultural and sporting activities (92)",Other service activities (93),Private households with employed persons (95),Extra-territorial organizations and bodies
sector,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
"Extraction of crude petroleum and services related to crude oil extraction, excluding surveying",10884.322203,490057.9,1191910.0,3677716.0,1471618.0,10161.342848,10739.907917,400591.472696,840019.535904,306207.875758,...,1961538.0,1094316.0,2186300.0,886761.5,1002757.0,36204290.0,60286300.0,39074800.0,4368470.0,0.0
"Extraction of natural gas and services related to natural gas extraction, excluding surveying",14837.0383,1521866.0,2444021.0,9108738.0,6567360.0,-17361.353202,23795.531444,401641.246179,904983.632707,375720.528687,...,5021180.0,2773992.0,4784654.0,1153541.0,1547661.0,142207600.0,236345400.0,135290500.0,14199770.0,0.0
Mining of coal and lignite; extraction of peat (10),46089.43995,347614.4,580507.8,3370443.0,549512.4,3007.246633,-54006.513725,357817.373617,692803.861755,329948.728376,...,5218163.0,2871928.0,5715123.0,2384773.0,3328078.0,92210080.0,84535540.0,76683000.0,21586860.0,0.0


In [14]:
co2_final = co2_final_df_three_industries.append(co2_final_df_three_industries.sum().rename('Total CO2 Emissions from Intensive Industries'))

In [15]:
co2_final.index.values

array(['Extraction of crude petroleum and services related to crude oil extraction, excluding surveying',
       'Extraction of natural gas and services related to natural gas extraction, excluding surveying',
       'Mining of coal and lignite; extraction of peat (10)',
       'Total CO2 Emissions from Intensive Industries'], dtype=object)

#### Finally, I have the df that shows the total CO2 emissions from intensive industries for each Industry X Country.

In [16]:
co2_final_total = co2_final.drop(['Extraction of crude petroleum and services related to crude oil extraction, excluding surveying',
                                  'Extraction of natural gas and services related to natural gas extraction, excluding surveying',
       'Mining of coal and lignite; extraction of peat (10)'])
co2_final_total

region,AT,AT,AT,AT,AT,AT,AT,AT,AT,AT,...,WM,WM,WM,WM,WM,WM,WM,WM,WM,WM
sector,Cultivation of paddy rice,Cultivation of wheat,Cultivation of cereal grains nec,"Cultivation of vegetables, fruit, nuts",Cultivation of oil seeds,"Cultivation of sugar cane, sugar beet",Cultivation of plant-based fibers,Cultivation of crops nec,Cattle farming,Pigs farming,...,Landfill of waste: Paper,Landfill of waste: Plastic,Landfill of waste: Inert/metal/hazardous,Landfill of waste: Textiles,Landfill of waste: Wood,Activities of membership organisation n.e.c. (91),"Recreational, cultural and sporting activities (92)",Other service activities (93),Private households with employed persons (95),Extra-territorial organizations and bodies
sector,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Total CO2 Emissions from Intensive Industries,71810.800453,2359538.0,4216438.0,16156900.0,8588490.0,-4192.763722,-19471.074364,1160050.0,2437807.0,1011877.0,...,12200880.0,6740236.0,12686080.0,4425076.0,5878496.0,270621900.0,381167300.0,251048300.0,40155100.0,0.0


# Mapping Industry to Product

Because the final dataset is at the product level, the industries must be matched to their corresponding product using the concordance table provided by EXIOBASE 3 [here](https://zenodo.org/record/4277368#.YCn_P89Khqs)

In [17]:
co2_final_total.columns

MultiIndex([('AT',                           'Cultivation of paddy rice'),
            ('AT',                                'Cultivation of wheat'),
            ('AT',                    'Cultivation of cereal grains nec'),
            ('AT',              'Cultivation of vegetables, fruit, nuts'),
            ('AT',                            'Cultivation of oil seeds'),
            ('AT',               'Cultivation of sugar cane, sugar beet'),
            ('AT',                   'Cultivation of plant-based fibers'),
            ('AT',                            'Cultivation of crops nec'),
            ('AT',                                      'Cattle farming'),
            ('AT',                                        'Pigs farming'),
            ...
            ('WM',                            'Landfill of waste: Paper'),
            ('WM',                          'Landfill of waste: Plastic'),
            ('WM',            'Landfill of waste: Inert/metal/hazardous'),
         

In [18]:
ip_concordance = pd.read_csv('../Data/2_Carbon/EXIOBASE20p_EXIOBASE20i.txt', delimiter = '\t', index_col=False)

ip_concordance

Unnamed: 0.1,Unnamed: 0,Cultivation of paddy rice,Cultivation of wheat,Cultivation of cereal grains nec,"Cultivation of vegetables, fruit, nuts",Cultivation of oil seeds,"Cultivation of sugar cane, sugar beet",Cultivation of plant-based fibers,Cultivation of crops nec,Cattle farming,...,Landfill of waste: Paper,Landfill of waste: Plastic,Landfill of waste: Inert/metal/hazardous,Landfill of waste: Textiles,Landfill of waste: Wood,Activities of membership organisation n.e.c. (91),"Recreational, cultural and sporting activities (92)",Other service activities (93),Private households with employed persons (95),Extra-territorial organizations and bodies
0,Paddy rice,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Wheat,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Cereal grains nec,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Vegetables, fruit, nuts",0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Oil seeds,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,Membership organisation services n.e.c. (91),0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
196,"Recreational, cultural and sporting services (92)",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
197,Other services (93),0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
198,Private households with employed persons (95),0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [19]:
# create dictionary with industry as key, product(s) as value(s)

ip_dict = {}
keys = []
values = []

industries = list(ip_concordance.columns)
industries.pop(0)

products = list(ip_concordance['Unnamed: 0'])

for column in ip_concordance:
    keys.append(column)
    row_vals = ip_concordance.loc[ip_concordance[column] == 1]
    for index, row in row_vals.iterrows():
        titles = ip_concordance.iloc[index,0]
        values.append(titles)

keys.pop(0)

ip_dict = dict(zip(keys, values))

print(ip_dict)

{'Cultivation of paddy rice': 'Paddy rice', 'Cultivation of wheat': 'Wheat', 'Cultivation of cereal grains nec': 'Cereal grains nec', 'Cultivation of vegetables, fruit, nuts': 'Vegetables, fruit, nuts', 'Cultivation of oil seeds': 'Oil seeds', 'Cultivation of sugar cane, sugar beet': 'Sugar cane, sugar beet', 'Cultivation of plant-based fibers': 'Plant-based fibers', 'Cultivation of crops nec': 'Crops nec', 'Cattle farming': 'Cattle', 'Pigs farming': 'Pigs', 'Poultry farming': 'Poultry', 'Meat animals nec': 'Meat animals nec', 'Animal products nec': 'Animal products nec', 'Raw milk': 'Raw milk', 'Wool, silk-worm cocoons': 'Wool, silk-worm cocoons', 'Manure treatment (conventional) and land application': 'Manure (conventional treatment)', 'Manure treatment (biogas) and land application': 'Manure (biogas treatment)', 'Forestry, logging and related service activities (02)': 'Products of forestry, logging and related services (02)', 'Fishing, operating of fish hatcheries and fish farms; se

In [20]:
# map industry names to product names

co2_final_total_products = co2_final_total.rename(columns=ip_dict)

In [21]:
co2_final_total_products_flipped = co2_final_total_products.melt()
co2_final_total_products_flipped


Unnamed: 0,region,sector,value
0,AT,Paddy rice,7.181080e+04
1,AT,Wheat,2.359538e+06
2,AT,Cereal grains nec,4.216438e+06
3,AT,"Vegetables, fruit, nuts",1.615690e+07
4,AT,Oil seeds,8.588490e+06
...,...,...,...
7982,WM,Transportation services via pipelines,2.706219e+08
7983,WM,Sea and coastal water transportation services,3.811673e+08
7984,WM,Inland water transportation services,2.510483e+08
7985,WM,Air transport services (62),4.015510e+07


# Map product exio codes

exio codes are unique to EXIOBASE and can later be mapped to HS6 using a concordance table provided in same link above

In [22]:
df_codes = pd.read_excel('../Data/2_Carbon/Exiobase_supp.xlsx', 'Products')
df_codes

Unnamed: 0.1,Unnamed: 0,Nbr,Name,CodeNr,CodeTxt
0,,1,Paddy rice,p01.a,C_PARI
1,,2,Wheat,p01.b,C_WHEA
2,,3,Cereal grains nec,p01.c,C_OCER
3,,4,"Vegetables, fruit, nuts",p01.d,C_FVEG
4,,5,Oil seeds,p01.e,C_OILS
...,...,...,...,...,...
195,,196,Membership organisation services n.e.c. (91),p91,C_ORGA
196,,197,"Recreational, cultural and sporting services (92)",p92,C_RECR
197,,198,Other services (93),p93,C_OSER
198,,199,Private households with employed persons (95),p95,C_PRHH


In [23]:
codes_dict = pd.Series(df_codes.CodeNr.values,index=df_codes.Name).to_dict()
codes_dict

{'Paddy rice': 'p01.a',
 'Wheat': 'p01.b',
 'Cereal grains nec': 'p01.c',
 'Vegetables, fruit, nuts': 'p01.d',
 'Oil seeds': 'p01.e',
 'Sugar cane, sugar beet': 'p01.f',
 'Plant-based fibers': 'p01.g',
 'Crops nec': 'p01.h',
 'Cattle': 'p01.i',
 'Pigs': 'p01.j',
 'Poultry': 'p01.k',
 'Meat animals nec': 'p01.l',
 'Animal products nec': 'p01.m',
 'Raw milk': 'p01.n',
 'Wool, silk-worm cocoons': 'p01.o',
 'Manure (conventional treatment)': 'p01.w.1',
 'Manure (biogas treatment)': 'p01.w.2',
 'Products of forestry, logging and related services (02)': 'p02',
 'Fish and other fishing products; services incidental of fishing (05)': 'p05',
 'Anthracite': 'p10.a',
 'Coking Coal': 'p10.b',
 'Other Bituminous Coal': 'p10.c',
 'Sub-Bituminous Coal': 'p10.d',
 'Patent Fuel': 'p10.e',
 'Lignite/Brown Coal': 'p10.f',
 'BKB/Peat Briquettes': 'p10.g',
 'Peat': 'p10.h',
 'Crude petroleum and services related to crude oil extraction, excluding surveying': 'p11.a',
 'Natural gas and services related to n

In [24]:
co2_final_total_products_flipped['exio code'] = co2_final_total_products_flipped['sector'].map(codes_dict)
co2_final_total_products_flipped.columns = ['country', 'product_name', 'total_co2', 'exio_code']
carbon_df = co2_final_total_products_flipped
carbon_df

Unnamed: 0,country,product_name,total_co2,exio_code
0,AT,Paddy rice,7.181080e+04,p01.a
1,AT,Wheat,2.359538e+06,p01.b
2,AT,Cereal grains nec,4.216438e+06,p01.c
3,AT,"Vegetables, fruit, nuts",1.615690e+07,p01.d
4,AT,Oil seeds,8.588490e+06,p01.e
...,...,...,...,...
7982,WM,Transportation services via pipelines,2.706219e+08,p60.3
7983,WM,Sea and coastal water transportation services,3.811673e+08,p61.1
7984,WM,Inland water transportation services,2.510483e+08,p61.2
7985,WM,Air transport services (62),4.015510e+07,p62


# Map and replace country codes

solving discrepancy between 3-letter and 2-letter


In [25]:
df_country = pd.read_csv('../Data/3_Trade/country_codes_V202102.csv', encoding='latin-1')
df_country

Unnamed: 0,country_code,country_name_abbreviation,country_name_full,iso_2digit_alpha,iso_3digit_alpha
0,4,Afghanistan,Afghanistan,AF,AFG
1,8,Albania,Albania,AL,ALB
2,12,Algeria,Algeria,DZ,DZA
3,16,American Samoa,American Samoa,AS,ASM
4,20,Andorra,Andorra,AD,AND
...,...,...,...,...,...
233,876,Wallis and Futuna Isds,Wallis and Futuna Islands,WF,WLF
234,882,Samoa,Samoa,WS,WSM
235,887,Yemen,Yemen,YE,YEM
236,891,Serbia and Montenegro,Serbia and Montenegro,CS,SCG


In [26]:
country_dict = pd.Series(df_country.iso_3digit_alpha.values,index=df_country.iso_2digit_alpha).to_dict()
country_dict

{'AF': 'AFG',
 'AL': 'ALB',
 'DZ': 'DZA',
 'AS': 'ASM',
 'AD': 'AND',
 'AO': 'AGO',
 'AG': 'ATG',
 'AZ': 'AZE',
 'AR': 'ARG',
 'AU': 'AUS',
 'AT': 'AUT',
 'BS': 'BHS',
 'BH': 'BHR',
 'BD': 'BGD',
 'AM': 'ARM',
 'BB': 'BRB',
 'BE': 'BEL',
 'BM': 'BMU',
 'BT': 'BTN',
 'BO': 'BOL',
 'BA': 'BIH',
 'BW': 'BWA',
 'BR': 'BRA',
 'BZ': 'BLZ',
 'IO': 'IOT',
 'SB': 'SLB',
 'VG': 'VGB',
 'BN': 'BRN',
 'BG': 'BGR',
 'MM': 'MMR',
 'BI': 'BDI',
 'BY': 'BLR',
 'KH': 'KHM',
 'CM': 'CMR',
 'CA': 'CAN',
 'CV': 'CPV',
 'KY': 'CYM',
 'CF': 'CAF',
 'LK': 'LKA',
 'TD': 'TCD',
 'CL': 'CHL',
 'CN': 'CHN',
 'CX': 'CXR',
 'CC': 'CCK',
 'CO': 'COL',
 'KM': 'COM',
 'YT': 'MYT',
 'CG': 'COG',
 'CD': 'COD',
 'CK': 'COK',
 'CR': 'CRI',
 'HR': 'HRV',
 'CU': 'CUB',
 'CY': 'CYP',
 'CS': 'SCG',
 'CZ': 'CZE',
 'BJ': 'BEN',
 'DK': 'DNK',
 'DM': 'DMA',
 'DO': 'DOM',
 'EC': 'ECU',
 'SV': 'SLV',
 'GQ': 'GNQ',
 'ET': 'ETH',
 'EE': 'EST',
 'FK': 'FLK',
 'FJ': 'FJI',
 'FI': 'FIN',
 'FR': 'FRA',
 'PF': 'PYF',
 'FQ': 'ATF',
 'DJ':

In [27]:
carbon_df['country'] = carbon_df['country'].map(country_dict)
carbon_df['year'] = 2010
carbon_df

Unnamed: 0,country,product_name,total_co2,exio_code,year
0,AUT,Paddy rice,7.181080e+04,p01.a,2010
1,AUT,Wheat,2.359538e+06,p01.b,2010
2,AUT,Cereal grains nec,4.216438e+06,p01.c,2010
3,AUT,"Vegetables, fruit, nuts",1.615690e+07,p01.d,2010
4,AUT,Oil seeds,8.588490e+06,p01.e,2010
...,...,...,...,...,...
7982,,Transportation services via pipelines,2.706219e+08,p60.3,2010
7983,,Sea and coastal water transportation services,3.811673e+08,p61.1,2010
7984,,Inland water transportation services,2.510483e+08,p61.2,2010
7985,,Air transport services (62),4.015510e+07,p62,2010


In [2]:
# number of failed country codes....

carbon_df['country'].isna().sum()

NameError: name 'carbon_df' is not defined

In [30]:
carbon_df.to_csv('../Data/2_Carbon/product_carbon_emissions_2010.csv', index=False)

# Code below can be IGNORED; was used for manually mapping products that had slightly different product names
Was used in OrganizeTrade

In [36]:
exio_df = carbon_df.loc[(carbon_df['exio_code'].isnull())]
exio_df = exio_df.head(20)
exio_df['product_name']

In [37]:
exio_df

Unnamed: 0,country,product_name,total_co2,exio_code,year
15,AUT,"Manure treatment (conventional), storage and l...",0.0,,2010
16,AUT,"Manure treatment (biogas), storage and land ap...",0.0,,2010
50,AUT,Re-processing of secondary wood material into ...,24370590.0,,2010
52,AUT,Re-processing of secondary paper into new pulp,-484970.1,,2010
59,AUT,Re-processing of secondary plastic into new pl...,-4957577.0,,2010
65,AUT,Re-processing of secondary glass into new glass,489663.1,,2010
69,AUT,Re-processing of ash into clinker,1781885.0,,2010
72,AUT,Re-processing of secondary steel into new steel,1787607.0,,2010
74,AUT,Re-processing of secondary preciuos metals int...,-396.1503,,2010
76,AUT,Re-processing of secondary aluminium into new ...,1583428.0,,2010


In [38]:
exio_df.to_csv('../Data/2_Carbon/2010_products_no_code.csv', index=False)