# Emissions-weighted carbon price

## 0.1 Packages and libraries

In [1]:
import csv
import pprint
import os
import glob
import pandas as pd
import numpy as np
import re

import copy

from pandas import read_csv
from importlib.machinery import SourceFileLoader

path_wcpd = '/Users/gd/GitHub/WorldCarbonPricingDatabase/_dataset/data/CO2'
path_ghg = '/Users/gd/OneDrive - rff/documents/research/projects/ecp/ecp_dataset/source_data/ghg_inventory/raw'
path_aux_data = '/Users/gd/OneDrive - rff/documents/research/projects/ecp/ecp_dataset'
path_dependencies = '/Users/gd/GitHub/ECP/_code/compilation/dependencies'

ecp_general = SourceFileLoader('general', path_dependencies+'/ecp_v3_gen_func.py').load_module()
ecp_cov_fac = SourceFileLoader('coverage_factors', path_dependencies+'/ecp_v3_coverage_factors.py').load_module()
ecp_inv_share = SourceFileLoader('inventory_share_func', path_dependencies+'/ecp_v3_inventory_share_func.py').load_module()
ecp_coverage = SourceFileLoader('coverage', path_dependencies+'/ecp_v3_coverage.py').load_module()
ecp_cur_conv = SourceFileLoader('currency_conversion', path_dependencies+'/ecp_v3_curr_conv.py').load_module()
ecp_overlap = SourceFileLoader('overlap', path_dependencies+'/ecp_v3_overlap.py').load_module()

# Institutional design (World Carbon Pricing Database)

In [2]:
# LOAD WCPD DATAFRAMES

wcpd_ctry = ecp_general.concatenate(path_wcpd+"/national")
wcpd_subnat = ecp_general.concatenate(path_wcpd+"/subnational")
wcpd_all = pd.concat([wcpd_ctry, wcpd_subnat]).sort_values(by=["jurisdiction", "year"])

# ADD COLUMN WITH IEA SECTOR CODES
ipcc_iea_map = pd.read_csv("/Users/gd/GitHub/WorldCarbonPricingDatabase/_raw/_aux_files/IPCC2006-IEA-category-codes.csv", 
                  usecols=["IPCC_CODE", "IEA_CODE"])
ipcc_iea_map.columns = ["ipcc_code", "iea_code"]

wcpd_all = wcpd_all.merge(ipcc_iea_map, on=["ipcc_code"], how="left")

# LISTS OF JURISDICTION NAMES

ctry_names = list(wcpd_ctry.jurisdiction.unique())
subnat_names = list(wcpd_subnat.jurisdiction.unique())

std_ctry_names = [x.replace(".", "").replace(",", "").replace(" ", "_") for x in ctry_names]
countries_dic = dict(zip(ctry_names, std_ctry_names))

std_subnat_names = [x.replace(".", "").replace(",", "").replace(" ", "_") for x in subnat_names]
subnat_dic = dict(zip(subnat_names, std_subnat_names))

if len(wcpd_all[wcpd_all.duplicated(['jurisdiction', 'year', 'ipcc_code', 'Product'], keep=False)] != 0):
    print("The dataset contains duplicates!")

In [3]:
# ADD COVERAGE FACTORS 

wcpd_all = ecp_cov_fac.coverage_factors(wcpd_all)

In [19]:
# MECHANISM OVERLAP 
overlap = pd.read_csv("/Users/gd/GitHub/WorldCarbonPricingDatabase/_raw/overlap/overlap_mechanisms.csv")

wcpd_all = ecp_overlap.overlap(wcpd_all)

# Emissions (currently CO2 only)
## I. National jurisdictions 
### I.A Data processing 
#### I.A.1 Total GHG emissions (CAIT)

In [5]:
# REPLACE CAIT COUNTRY NAMES WITH WORLD BANK COUNTRY NAMES

cait_map = pd.read_csv(path_ghg+'/national/ClimateWatch/CAIT/CAIT_map.csv')
cait = pd.read_csv(path_ghg+"/national/ClimateWatch/CAIT/CAIT_2021/CW_CAIT_GHG_Emissions.csv")
cait = cait.drop(['Source'],axis=1)

map_cait_ctry_iso = dict(zip(list(cait_map['ISO3_code'].values), list(cait_map['Ctry_name'].values)))

map_cait_wb = {'Antigua & Barbuda': 'Antigua and Barbuda', 'Bosnia & Herzegovina': 'Bosnia and Herzegovina', 'Brunei': 'Brunei Darussalam',
               'Cape Verde': 'Cabo Verde', 'Congo, Dem. Republic': 'Congo, Dem. Rep.', 'Democratic Republic of the Congo': 'Congo, Dem. Rep.',
               'Congo': 'Congo, Rep.', "CÃ´te d'Ivoire": "Cote d'Ivoire", 'Egypt': 'Egypt, Arab Rep.', 'Gambia': 'Gambia, The',
               'Iran': 'Iran, Islamic Rep.', 'Korea (North)': 'Korea, Dem. Rep.', 'North Korea': 'Korea, Dem. Rep.',
               'Korea (South)': 'Korea, Rep.', 'South Korea': 'Korea, Rep.', 'Kyrgyzstan': 'Kyrgyz Republic', 'Laos': 'Lao, PDR',
               'Macedonia, FYR':'North Macedonia', 
               'Sao Tome & Principe': 'Sao Tome and Principe', 'Slovakia': 'Slovak Republic', 'Saint Kitts & Nevis': 'St. Kitts and Nevis',
               'Saint Lucia': 'St. Lucia', 'Saint Vincent & Grenadines': 'St. Vincent and Grenadines', 'United States of America': 'United States',
               'Venezuela': 'Venezuela, RB', 'Syria': 'Syrian Arab Republic', 'Trinidad & Tobago': 'Trinidad and Tobago', 'Yemen': 'Yemen, Rep.'}

cait['Country'] = cait['Country'].replace(to_replace=map_cait_ctry_iso)
cait['Country'] = cait['Country'].replace(to_replace=map_cait_wb)

In [6]:
# CREATE DATAFRAME WITH TOTAL CO2 AND GHG EMISSIONS - NATIONAL JURISDICTIONS

gases = ["CO2", "All GHG"]
col_names = {"CO2":"Total_CO2_Emissions_Excluding_LUCF_MtCO2e", "All GHG":"Total_GHG_Emissions_Excluding_LUCF_MtCO2e"}

national_total = pd.DataFrame()

for gas in gases:
    cait_gas = cait[(cait['Gas']==gas) & (cait['Sector']=='Total excluding LUCF')]
    cait_gas = cait_gas.drop(['Sector','Gas'],axis=1)
    cait_gas = cait_gas.melt(id_vars='Country')
    cait_gas.rename(columns={"variable":"year","value":col_names[gas]},inplace=True)
    
    if national_total.empty == True:
        national_total = cait_gas
    else:
        national_total = national_total.merge(cait_gas, on=["Country", "year"])

national_total = national_total[~national_total.Country.isin(["European Union (25)","European Union (28)"])]
national_total.rename(columns={"Country":"jurisdiction"}, inplace=True)
national_total["year"] = national_total["year"].astype(int)

national_total.to_csv(path_ghg+'/national/ClimateWatch/CAIT/CAIT_2021/CAIT_country_tot_2021_WBnames.csv',index=None)

In [7]:
# CREATE DATAFRAME WITH TOTAL CO2 AND GHG EMISSIONS - WORLD

world_total = national_total.loc[national_total['jurisdiction']=='World']
world_total = world_total.drop('jurisdiction',axis=1)
world_total.columns = ['year', 'World_CO2_Emissions', 'World_GHG_Emissions']

world_total["year"] = world_total["year"].astype(int)
world_total.to_csv(path_ghg+'/national/ClimateWatch/CAIT/CAIT_2021/ghg_world.csv',index=None)

### I.A.2 Combustion emissions (International Energy Agency)
#### I.A.2.1 Preliminary steps
##### I.A.2.1.a Concatenate IEA yearly emissions files

In [52]:
ecp_functions.concat_iea()

##### I.A.2.1.b Aggregation (over all sub-products) of IEA CO2 emissions figures at 'aggregated product' level

IEA data is disaggregated between 47 (46 + category 'Total') different fossil fuel products. Yet, most carbon pricing mechanisms set prices at a lower level of disaggregation, distinguishing between coal, oil and gas products. Hence we aggregate the IEA data at this level. The product categories retained here are: **'Coal/peat', 'Oil', 'Natural gas', 'Other',** and **'Total'** (sum of first four categories)

In [144]:
result = {}

with open(path_ghg+'/national/IEA/detailed_figures/emissions_allyears/iea_CO2em_ally.csv', 'r',
         encoding = 'latin-1') as csvfile:
    data_reader = csv.reader(csvfile)
    next(data_reader, None)  # skip the headers

    for row in data_reader:
        #extract column value based on column index
        year = row[6]
        location = row[1]
        product_code = row[2]
        flow = row[4]
        sector_name = row[5]
        value = convert_value(row[8]) #uses the convert_value function created above
        
        #'product_code' function defined above; assigns a 'product category' to each of the sub-products based on its product code
        product_category = get_product_category(product_code)

        #initialise container of year key
        if year not in result:
            result[year] = {}
            
        #initialise container of location key
        if location not in result[year]:
                result[year][location] = {}
            
        # initialise container of product_category key if not present; that is, if the product category key is NOT already present in result, it will be addded to it
        if product_category not in result[year][location]:
            result[year][location][product_category] =  {}

        #initialise container of flow-sector names if not present
        if sector_name not in result[year][location][product_category]:
            result[year][location][product_category][sector_name] = {}

        # initialise container of flow codes if not present
        if flow not in result[year][location][product_category][sector_name]:
            result[year][location][product_category][sector_name][flow] = 0

        # perform the aggregation (in the present case, for each row, the code adds the value of 'value' to the container)
        result[year][location][product_category][sector_name][flow] += value
        


In [146]:
with open(path_ghg+'/national/IEA/detailed_figures/agg_product/iea_aggprod.csv', "w", encoding = 'utf-8') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(('Country','year','Flow','Sector','Product','CO2_emissions'))

    for year in result:
        for location in result[year]:
            for product_category in result[year][location]:
                for sector_name in result[year][location][product_category]:
                    for flow in result[year][location][product_category][sector_name]:
                        writer.writerow((location, year, flow, sector_name, product_category, result[year][location][product_category][sector_name][flow]))
                        
os.remove(path_ghg+'/national/IEA/detailed_figures/emissions_allyears/iea_CO2em_ally.csv')                

##### I.A.2.1.c Replace country names by WB country names list
We standardize country names to those used by the World Bank. Note regarding China: the coding below implies that 'China' does not include Hong Kong and refers to "People's Republic of China" only (the approach adopted by the World Bank). 

In [147]:
data = pd.read_csv(path_ghg+"/national/IEA/detailed_figures/agg_product/iea_aggprod.csv",
                  encoding = "utf-8") #specify encoding
data = pd.DataFrame(data)

map_iea_wb = {"CÃ\x83Â´te d'Ivoire": "Cote d'Ivoire", "CÃ´te d'Ivoire": "Cote d'Ivoire",
              '"China (P.R. of China and Hong Kong, China)"': 'China (P.R. of China and Hong Kong, China)',
              "People's Republic of China": 'China', 'CuraÃ\x83Â§ao/Netherlands Antilles': 'Curacao/Netherlands Antilles',
              'CuraÃ§ao': 'Curacao', 'CuraÃ§ao/Netherlands Antilles': 'Curacao/Netherlands Antilles',
              'Democratic Republic of Congo': 'Congo, Dem. Rep.', 'Democratic Republic of the Congo': 'Congo, Dem. Rep.',
              'Republic of the Congo': 'Congo, Rep.', 'Egypt': 'Egypt, Arab Rep.', 'Hong Kong (China)': 'Hong Kong SAR, China',
              'Islamic Republic of Iran': 'Iran, Islamic Rep.', "Democratic People's Republic of Korea": 'Korea, Dem. Rep.',
              'Korea': 'Korea, Rep.', 'Kyrgyzstan': 'Kyrgyz Republic', 'Republic of North Macedonia': 'North Macedonia',
              'Republic of Moldova':'Moldova', 'Chinese Taipei':'Taiwan, China',
              'Venezuela': 'Venezuela, RB', 'Plurinational State of Bolivia':'Bolivia',
              'United Republic of Tanzania':'Tanzania',
              'Bolivarian Republic of Venezuela': 'Venezuela, RB', 'Viet Nam': 'Vietnam', 'Yemen': 'Yemen, Rep.'}
                   
data['Country'] = data['Country'].replace(to_replace=map_iea_wb)

data.to_csv(path_ghg+'/national/IEA/detailed_figures/agg_product/iea_aggprod_WBnames.csv',index=None)

#remove file 'IEA_aggprod.csv' wich is no longer needed
os.remove(path_ghg+'/national/IEA/detailed_figures/agg_product/iea_aggprod.csv')

### I.A.3 Fugitive emissions (CO2)

### I.A.4 Industrial Processes and Product Use emissions (CO2)

In [8]:
ippu_nat = pd.read_excel(path_ghg+"/national/EDGAR/v60_CO2_excl_short-cycle_org_C_1970_2018.xls",
                          sheet_name="v6.0_EM_CO2_fossil_IPCC2006", skiprows=9)

ippu_nat.drop(['IPCC_annex', 'C_group_IM24_sh', 'Country_code_A3', 'ipcc_code_2006_for_standard_report_name', 'fossil_bio'], axis=1, inplace=True)

ippu_nat = ippu_nat.loc[~ippu_nat.Name.isin(["Int. Shipping", "Int. Aviation"]), :]
ippu_nat = ippu_nat.melt(id_vars=["Name", "ipcc_code_2006_for_standard_report"])

ippu_nat.rename(columns={"Name":"jurisdiction", "ipcc_code_2006_for_standard_report":"ipcc_code", "variable":"year", "value":"CO2_emissions"}, 
                 inplace=True)
ippu_nat["ipcc_code"] = ippu_nat["ipcc_code"].apply(lambda x: x.replace('.', '').upper())
ippu_nat["year"] = ippu_nat["year"].apply(lambda x: x.replace('Y_', '').upper())
ippu_nat["ipcc_code"] = ippu_nat["ipcc_code"].apply(lambda x: x.replace('_NORES', '').upper())
ippu_nat["year"] = ippu_nat["year"].astype(int)

ippu_nat["CO2_emissions"] = ippu_nat["CO2_emissions"]/1000

# select only IPCC 2 Industrial Processes and Product Use categories
ippu_nat = ippu_nat.loc[ippu_nat.ipcc_code.str.match("2"), :]


# need to change names of countries to match names in inventory dataframe

map_edgar_wb = {'Bahamas':'Bahamas, The', 'Cape Verde':'Cabo Verde', 'Congo_the Democratic Republic of the':'Congo, Dem. Rep.',
                'Congo':'Congo, Rep.', "Egypt":'Egypt, Arab Rep.', 'Micronesia, Federated States of':'Federated States of Micronesia',
                'Gambia':'Gambia, The', 'Hong Kong':'Hong Kong SAR, China', 'Iran, Islamic Republic of':'Iran, Islamic Rep.',
                "Korea, Democratic People's Republic of":'Korea, Dem. Rep.', 'Korea, Republic of':'Korea, Rep.', 'Kyrgyzstan':'Kyrgyz Republic',
                "Lao People's Democratic Republic":'Lao PDR', 'Libyan Arab Jamahiriya':'Libya', 'Macao':'Macao SAR, China', 
                'Moldova, Republic of':'Moldova', 'Macedonia, the former Yugoslav Republic of':'North Macedonia', 'Slovakia':'Slovak Republic', 
                'Saint Kitts and Nevis':'St. Kitts and Nevis', 'Saint Lucia':'St. Lucia', 
                'Saint Vincent and the Grenadines':'St. Vincent and the Grenadines', 'Taiwan_Province of China':'Taiwan, China',
                'Tanzania_United Republic of':'Tanzania', 'Venezuela':'Venezuela, RB', 'Viet Nam':'Vietnam', 'Yemen':'Yemen, Rep.'} #'Serbia and Montenegro':'Serbia'

ippu_nat["jurisdiction"] = ippu_nat["jurisdiction"].replace(to_replace=map_edgar_wb)


### I.B National GHG Inventory

In [9]:
# DATA FORMATTING (common inventory reporting format)

# 1A Fuel Combustion Activities
combustion_nat = pd.read_csv(path_ghg+"/national/IEA/iea_energy_combustion_emissions/detailed_figures/agg_product/iea_aggprod_WBnames.csv",
                  encoding = "utf-8") #specify encoding
combustion_nat.rename(columns={"Country":"jurisdiction", "Year":"year", "Flow":"iea_code"}, inplace=True)
combustion_nat.drop("Sector", axis=1, inplace=True)

combustion_nat = combustion_nat.merge(ipcc_iea_map, on=["iea_code"], how="left")
combustion_nat["CO2_emissions"] = combustion_nat["CO2_emissions"]/1000

# 2 Industrial Processes and Product Use
ippu_nat = ippu_nat[["jurisdiction", "year", "ipcc_code", "CO2_emissions"]]
ippu_nat["year"] = ippu_nat["year"].astype(int)
ippu_nat["iea_code"] = "NA"
ippu_nat["Product"] = "NA"


# COMBINED INVENTORY

inventory_nat = wcpd_all.loc[wcpd_all.jurisdiction.isin(ctry_names), ["jurisdiction", "year", "ipcc_code", "iea_code", "Product"]]
inventory_nat[["iea_code", "Product"]] = inventory_nat[["iea_code", "Product"]].fillna("NA")

combined_nat = pd.concat([combustion_nat, ippu_nat])

inventory_nat = inventory_nat.merge(combined_nat, on=["jurisdiction", "year", "ipcc_code", "iea_code", "Product"], how="left")

### I.C Emissions share (in total world and jurisdiction, GHG and CO2 emissions)

In [10]:
inventory_nat_share = ecp_inv_share.emissions_share(inventory_nat, national_total, world_total)
inventory_nat = pd.merge(inventory_nat, inventory_nat_share, on=["jurisdiction", "year", "ipcc_code", "iea_code", "Product"], how="left")

# Shares of world sector emissions (share of total CO2)
sectors_wld_total = inventory_nat[["jurisdiction", "year", "ipcc_code", "iea_code", "Product", "CO2_emissions"]].groupby(["ipcc_code", "year"]).sum()
sectors_wld_total.reset_index(inplace=True)

inventory_sect_natjur = ecp_inv_share.emissions_share_sectors(inventory_nat, sectors_wld_total, "national")

In [73]:
inventory_nat.to_csv("/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/processed/inventory/inventory_nat.csv", index=None)

In [14]:
for ctry in ctry_list:
    inventory_nat.loc[inventory_nat.jurisdiction==ctry, :].to_csv("/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/processed/inventory/national/inventory_"+countries_dic[ctry]+".csv", index=None)


NameError: name 'ctry_list' is not defined

## II. Subnational jurisdictions
### II.A Data processing

#### United States

In [11]:
us = pd.DataFrame()

os.chdir(path_ghg+'/subnational/United_States/Rhodium/')
file_list = glob.glob('*.csv')

for file in file_list:
    temp = pd.read_csv(path_ghg+'/subnational/United_States/Rhodium/'+file, decimal=',')
    #extract US state name from file name
    state_name = file[len("DetailedGHGinventory_"):-4]
    #add state name as key column
    temp.loc[:, "jurisdiction"] = state_name
    #concat
    us = pd.concat([us, temp])

# excluding LULUCF emissions - excluded from emissions total calculations to be consistent with chosen total
us = us.loc[~us.Subsector.str.match("LULUCF"), :]
us = us.drop(["Ranking"], axis=1)
us.loc[:, "jurisdiction"] = us.loc[:, "jurisdiction"].apply(lambda x: x.replace('_', ' ').title())

us_tot_ghg = us.groupby(["jurisdiction", "Year"]).sum()
us_tot_ghg = us_tot_ghg.reset_index()
us_tot_ghg.columns = ["jurisdiction", "Year", "Total_GHG_Emissions_Excluding_LUCF_MtCO2e"]

us = us.loc[us.Gas.isin(['CO2 (combustion)', 'CO2 (non-combustion)'])]
us_tot_co2 = us.groupby(["jurisdiction", "Year"]).sum()
us_tot_co2 = us_tot_co2.reset_index()
us_tot_co2.columns = ["jurisdiction", "Year", "Total_CO2_Emissions_Excluding_LUCF_MtCO2e"]

us_tot = us_tot_ghg.merge(us_tot_co2, on=["jurisdiction", "Year"])

In [12]:
#add ipcc_code

sector_names_map_us = {'Wastewater Treatment':'4D',
       'Rice Cultivation':'3C7', 'Manure Management':'3A2', 'Landfills':'4A',
       'Incineration of Waste':'4C1', 'Field Burning of Agricultural Residues':'3C1',
       'Enteric Fermentation':'3A1', 'Composting':'4B', 'Ferroalloy Production':'2C2',
#       'Iron and Steel Production & Metallurgical Coke Production':'1A2A', TEMPORARY FIX
       'Petrochemical Production':'2B8',
       'Stationary Combustion':'1A5A', 'Mobile Combustion':'1A5B',
       'Carbide Production and Consumption':'2B5',
       'Abandoned Underground Coal Mines':'1B1A13', 'Industry - All combustion':'1A2A',# CODE ATTRIBUTION IS TEMPORARY FIX
       'Transport - Other':'1A3E', 
       'Transport - LDVs':'1A3B', 'Transport - Freight (trucks)':'1A3B',
       'Transport - Air':'1A3A', 'Transport - Rail':'1A3C', 'Commercial':'1A4A',
       'Power - All Fuels':'1A1A1', 'Residential':'1A4B', 'Liming':'3C2', 'Urea Fertilization':'3C3',
       'Aluminum Production':'2C3', 'Ammonia Production':'2B1',
       'Glass Production':'2A3',
       'Lead Production':'2C5', 'Lime Production':'2A2',
       'Magnesium Production and Processing':'2C4', 'Non-Energy Use of Fuels':'2D',
       'Other Process Uses of Carbonates':'2A4',
       'Soda Ash Production':'2B7', 'Titanium Dioxide Production':'2B6',
       'Zinc Production':'2C6', 'Cement Production':'2A1', 
       'Substitution of Ozone Depleting Substances':'2F', 
       'Electronics Industry':'2E', 
       'Adipic Acid Production':'2B3', 'N2O from Product Uses':'2G3',
       'Nitric Acid Production':'2B2', 'Agricultural Soil Management':'3C4',
       'Coal Mining':'1B1A', 'Caprolactam, Glyoxal, and Glyoxylic Acid Production':'2B4'}

#'Abandoned Oil and Gas Wells', 'MVAC', 'Petroleum Systems', 'Electrical Transmission and Distribution',
# 'LULUCF Carbon Stock Change', 'LULUCF N2O Emissions', 'LULUCF CH4 Emissions', 'Natural Gas Systems',
#'Urea Consumption for Non-Agricultural Purposes':'', 'HCFC-22 Production', 'Phosphoric Acid Production',
# 'Transport - Natural gas pipeline', 'Carbon Dioxide Consumption',

us.loc[:, "ipcc_code"] = us.loc[:, "Subsector"]
us.loc[:, "ipcc_code"] = us.loc[:, "ipcc_code"].replace(to_replace=sector_names_map_us)

excl_sectors = ['Transport - Natural gas pipeline', 'Carbon Dioxide Consumption', 'Abandoned Oil and Gas Wells', 'Phosphoric Acid Production',
                'Natural Gas Systems', 'Petroleum Systems', 'Urea Consumption for Non-Agricultural Purposes']

us = us.loc[~us.ipcc_code.isin(excl_sectors), :]

us = us.drop(["Gas", "Subsector", "Sector"], axis=1)
us = us.rename(columns={"Emission (mmt CO2e)":"CO2_emissions", "Year":"year"})
us = us[["jurisdiction", "year", "ipcc_code", "CO2_emissions"]]

us = us.loc[us.year<=2020, :]
us = us.sort_values(by=["jurisdiction", "year", "ipcc_code"])

#needed to aggregate over IPCC sectors as I have attributed same ipcc_code to multiple Rhodium categories
us = us.groupby(by=["jurisdiction", "year", "ipcc_code"]).sum()
us = us.reset_index()

us["supra_jur"] = "United States"

# replace name of Georgia state to avoid clash with Georgia country
us["jurisdiction"].replace(to_replace={"Georgia":"Georgia_US"}, inplace=True)

#### Canada

In [13]:

can = pd.read_csv(path_ghg+'/subnational/Canada/harmonized_data/ECCC/GHG_IPCC_Can_Prov_Terr.csv',
                  low_memory=False)
can_map = pd.read_csv(path_ghg+'/subnational/Canada/harmonized_data/ECCC/ipcc_code_name_map.csv')

map_ipcc_can = dict(zip(list(can_map['category'].values), list(can_map['IPCC_CODE'].values)))

can = can.loc[can.Region != "Canada", :]
can = can[["Region", "Year", "Category", "CO2", "CO2eq"]]
can.rename(columns={"Region":"jurisdiction", "Category":"ipcc_code", "CO2eq":"tot_ghg"}, inplace=True)

for col in ["CO2", "tot_ghg"]:
    can[col].replace(to_replace={"x":None}, inplace=True)
    can[col] = can[col].astype(float)
    can[col] = can[col].divide(1000, fill_value=None)

can_tot = can.loc[can.ipcc_code.isin(["TOTAL"]), ["jurisdiction", "Year", "CO2", "tot_ghg"]]
can_lulucf = can.loc[can.ipcc_code.isin(["LAND USE, LAND-USE CHANGE AND FORESTRY"]), ["jurisdiction", "Year", "CO2", "tot_ghg"]]
can_tot = can_tot.merge(can_lulucf, on=["jurisdiction", "Year"])

#Calculating totals excluding LULUCF
can_tot["CO2_x"] = can_tot.CO2_x - can_tot.CO2_y
can_tot["tot_ghg_x"] = can_tot.tot_ghg_x - can_tot.tot_ghg_y
can_tot.drop(["CO2_y", "tot_ghg_y"], axis=1, inplace=True)
can_tot.rename(columns={"CO2_x":"Total_GHG_Emissions_Excluding_LUCF_MtCO2e", "tot_ghg_x":"Total_CO2_Emissions_Excluding_LUCF_MtCO2e"}, inplace=True)

can["ipcc_code"].replace(to_replace=map_ipcc_can, inplace=True)
can = can[["jurisdiction", "Year", "ipcc_code", "CO2"]]
can.columns = ["jurisdiction", "year", "ipcc_code", "CO2_emissions"]
can = can.loc[~can.ipcc_code.isna(), :] #keep all sectors in 'can' dataframe but assign IEA and IPCC codes so that they can be sorted

can["supra_jur"] = "Canada"

#### China

In [14]:
chn_prov_names = pd.read_excel(path_ghg+"/subnational/China/CEADS/CEADS_provincial_emissions/Emission_inventories_for_30_provinces_1997.xlsx", 
                           sheet_name="Sum")
chn_prov_names = list(chn_prov_names["Unnamed: 0"])[:-2]

file_list = os.listdir(path_ghg+"/subnational/China/CEADS/CEADS_provincial_emissions/")
#file_list.remove('.DS_Store')

china_comb = pd.DataFrame()
china_proc = pd.DataFrame()

for file in file_list:
    for prov in chn_prov_names:
        temp = pd.read_excel(path_ghg+"/subnational/China/CEADS/CEADS_provincial_emissions/"+file, 
                           sheet_name=prov, skiprows=[1,2])

        temp.rename(columns={"Unnamed: 0":"ipcc_code"}, inplace=True)
        temp["year"] = file[-9:-5]
        temp["jurisdiction"] = prov
        
        temp_comb = temp[["jurisdiction", "year", "ipcc_code", "Process", "Total"]].copy()
        temp_proc = temp[["jurisdiction", "year", "ipcc_code", "Process"]].copy()
        
        temp_comb.loc[:, "CO2_emissions"] = temp.loc[:, "Total"]-temp.loc[:, "Process"]
        temp_comb.drop(["Process", "Total"], axis=1, inplace=True)
        
        if china_comb.empty == True:
            china_comb = temp_comb
            china_proc = temp_proc
        else:
            china_comb = pd.concat([china_comb, temp_comb])
            china_proc = pd.concat([china_proc, temp_proc])



In [15]:
# From the CEADS data, we can acutally recover the emissions associated with each broad fuel category (like for national jurisdictions)

# Replace province names by those in dataset
province_names_map = {'Beijing': 'Beijing Municipality', 'Tianjin': 'Tianjin Municipality', 'Hebei':'Hebei Province', 
                      'Shanxi':'Shanxi Province', 'InnerMongolia':'Inner Mongolia Autonomous Region',
                      'Liaoning':'Liaoning Province', 'Jilin':'Jilin Province', 'Heilongjiang':'Heilongjiang Province', 
                      'Shanghai':'Shanghai Municipality', 'Jiangsu':'Jiangsu Province',
                      'Zhejiang':'Zhejiang Province', 'Anhui':'Anhui Province', 'Fujian':'Fujian Province', 'Jiangxi':'Jiangxi Province', 
                      'Shandong':'Shandong Province', 'Henan':'Henan Province', 'Hubei':'Hubei Province', 'Hunan':'Hunan Province', 
                      'Guangdong':'Guangdong Province', 'Guangxi':"Guangxi Zhuang Autonomous Region", 'Hainan':'Hainan Province', 'Chongqing':'Chongqing Municipality',
                      'Sichuan':'Sichuan Province', 'Guizhou':'Guizhou Province', 'Yunnan':'Yunnan Province', 'Shaanxi':'Shaanxi Province', 
                      'Gansu':'Gansu Province', 'Qinghai':'Qinghai Province', 'Ningxia':'Ningxia Hui Autonomous Region', 
                      'Xinjiang':'Xinjiang Uyghur Autonomous Region'}

# Associate IPCC sector names with sector codes
sector_names_map_china = {'Farming, Forestry, Animal Husbandry, Fishery and Water Conservancy      ':'1A4C',
                          'Coal Mining and Dressing                                 ':'1A1C',
                          'Petroleum and Natural Gas Extraction                     ':'1B2',
                          'Ferrous Metals Mining and Dressing                       ':'1A2I',
                          'Nonferrous Metals Mining and Dressing                    ':'1A2I',
                          'Nonmetal Minerals Mining and Dressing                    ':'1A2I',
                          'Other Minerals Mining and Dressing                       ':'1A2I',
                          'Logging and Transport of Wood and Bamboo                 ':'1A2J',
                          'Food Processing                                          ':'1A2E',
                          'Food Production                                          ':'1A2E',
                          'Beverage Production':'1A2E',
                          'Tobacco Processing                                       ':'1A2E',
                          'Textile Industry                                         ':'1A2L',
                          'Garments and Other Fiber Products                        ':'1A2L',
                          'Leather, Furs, Down and Related Products                 ':'1A2L',
                          'Timber Processing, Bamboo, Cane, Palm Fiber & Straw Products':'1A2J',
                          'Furniture Manufacturing                                  ':'1A2J',
                          'Papermaking and Paper Products                           ':'1A2D',
                          'Printing and Record Medium Reproduction                  ':'1A2D',
                          'Cultural, Educational and Sports Articles                ':'1A2D',
                          'Petroleum Processing and Coking                          ':'1A1B',
                          'Raw Chemical Materials and Chemical Products             ':'1A2C',
                          'Medical and Pharmaceutical Products                      ':'1A2C',
                          'Chemical Fiber                                           ':'1A2C',
                          'Rubber Products                                          ':'1A2C',
                          'Plastic Products                                         ':'1A2C',
                          'Nonmetal Mineral Products                                ':'1A2F',
                          'Smelting and Pressing of Ferrous Metals                  ':'1A2A',
                          'Smelting and Pressing of Nonferrous Metals               ':'1A2B',
                          'Metal Products                                           ':'1A2A',
                          'Ordinary Machinery                                       ':'1A2H',
                          'Equipment for Special Purposes                           ':'1A2H',
                          'Transportation Equipment                                 ':'1A2G',
                          'Electric Equipment and Machinery                         ':'1A2H',
                          'Electronic and Telecommunications Equipment              ':'1A2H',
                          'Instruments, Meters, Cultural and Office Machinery         ':'1A2H',
                          'Other Manufacturing Industry                             ':'1A2M',
                          'Scrap and waste':'1A2M',
                          'Production and Supply of Electric Power, Steam and Hot Water   ':'1A1A',
                          'Production and Supply of Gas                             ':'1B2B',
                          'Production and Supply of Tap Water                       ':'1A4A', # to be verified
                          'Construction                                             ':'1A2K',
                          'Transportation, Storage, Post and Telecommunication Services    ':'1A3',
                          'Wholesale, Retail Trade and Catering Services            ':'1A4A',
                          'Others                                                   ':'1A5',
                          'Urban':'1A4B', 
                          'Rural':'1A4B'}

china_comb.replace(to_replace=province_names_map, inplace=True)
china_comb.replace(to_replace=sector_names_map_china, inplace=True)

china_proc.replace(to_replace=province_names_map, inplace=True)
china_proc = china_proc.loc[china_proc.ipcc_code=='Nonmetal Mineral Products                                ', :]
china_proc.replace(to_replace={'Nonmetal Mineral Products                                ':"2A"}, inplace=True)
china_proc.rename(columns={"Process":"CO2_emissions"}, inplace=True)

# concatenate combustion and process emissions dataframes

china = pd.concat([china_comb, china_proc])
china = china.loc[china.ipcc_code!="Total Consumption"] # remove total category from dataframe

# sum at the (aggregate) sector level - since some sectors have been assigned the same IPCC code

china = china.groupby(["jurisdiction", "year", "ipcc_code"]).sum()
china = china.reset_index()
china["year"] = china.year.astype(int)

china["supra_jur"] = "China"

# retrieve total province emissions

china_tot = china.groupby(["jurisdiction", "year"]).sum()
china_tot = china_tot.reset_index()

china_tot["Total_GHG_Emissions_Excluding_LUCF_MtCO2e"] = np.nan
china_tot.rename(columns={"CO2_emissions":"Total_CO2_Emissions_Excluding_LUCF_MtCO2e"}, inplace=True)

### II.B Subnational inventory

In [16]:
# Inventory structure
inventory_subnat = wcpd_all.loc[wcpd_all.jurisdiction.isin(subnat_names), ["jurisdiction", "year", "ipcc_code", "iea_code"]]

us_states = ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Dc', 'Delaware', 'Florida', 'Georgia_US',
            'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts',
            'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico',
            'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina',
            'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']

can_prov = ['Alberta', 'British Columbia', 'Manitoba', 'New Brunswick', 'Newfoundland and Labrador', 'Northwest Territories',
            'Northwest Territories and Nunavut', 'Nova Scotia', 'Nunavut', 'Ontario', 'Prince Edward Island', 'Quebec', 'Saskatchewan', 'Yukon']

chn_prov = ['Anhui Province', 'Beijing Municipality', 'Chongqing Municipality', 'Fujian Province', 'Gansu Province', 'Guangdong Province',
            'Guangxi Zhuang Autonomous Region', 'Guizhou Province', 'Hainan Province', 'Hebei Province', 'Heilongjiang Province', 'Henan Province',
            'Hubei Province', 'Hunan Province', 'Inner Mongolia Autonomous Region', 'Jiangsu Province', 'Jiangxi Province', 'Jilin Province',
            'Liaoning Province', 'Ningxia Hui Autonomous Region', 'Qinghai Province', 'Shaanxi Province', 'Shandong Province', 'Shanghai Municipality',
            'Shanxi Province', 'Sichuan Province', 'Tianjin Municipality', 'Xinjiang Uyghur Autonomous Region', 'Yunnan Province', 'Zhejiang Province',
            "Hong Kong Special Administrative Region", "Tibet Autonomous Region", "Macau Special Administrative Region"]

jpn_pref = ["Tokyo", "Saitama", "Kyoto"]

# we don't have fuel level information for subnational jurisdictions so we drop the Product column and delete duplicate/redundant rows 
inventory_subnat.drop_duplicates(subset=["jurisdiction", "year", "ipcc_code", "iea_code"], inplace=True)
inventory_subnat[["iea_code"]] = inventory_subnat[["iea_code"]].fillna("NA")


# COMBINED data
combined_subnat = pd.concat([us, can, china])
combined_subnat = combined_subnat.merge(ipcc_iea_map, on=["ipcc_code"], how="left")
combined_subnat[["iea_code"]] = combined_subnat[["iea_code"]].fillna("NA")

inventory_subnat.loc[inventory_subnat.jurisdiction.isin(us_states), "supra_jur"] = "United States"
inventory_subnat.loc[inventory_subnat.jurisdiction.isin(can_prov), "supra_jur"] = "Canada"
inventory_subnat.loc[inventory_subnat.jurisdiction.isin(chn_prov), "supra_jur"] = "China"
inventory_subnat.loc[inventory_subnat.jurisdiction.isin(jpn_pref), "supra_jur"] = "Japan"

inventory_subnat = inventory_subnat.merge(combined_subnat, on=["supra_jur", "jurisdiction", "year", "ipcc_code", "iea_code"], how="left")
inventory_subnat = inventory_subnat[['supra_jur', 'jurisdiction', 'year', 'ipcc_code', "iea_code", 'CO2_emissions']]

### II.C Emissions share

In [17]:
em_subnat_total = pd.concat([us_tot, can_tot, china_tot])

inventory_subnat_share = ecp_inv_share.emissions_share(inventory_subnat, em_subnat_total, 
                                                       world_total, national_total, "subnational")
inventory_subnat = inventory_subnat.merge(inventory_subnat_share, on=["supra_jur", "jurisdiction", "year", "ipcc_code", "iea_code"], how="left")

inventory_sect_subnatjur = ecp_inv_share.emissions_share_sectors(inventory_subnat, sectors_wld_total, "subnational")
    

In [72]:
for jur in subnat_list:
    inventory_subnat.loc[inventory_subnat.jurisdiction==jur, :].to_csv("/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/processed/inventory/subnational/inventory_"+subnat_dic[jur]+".csv", index=None)


# Coverage 
## I. Disaggregated coverage dataframes

** Note: National and subnational inventories do not have the same level of disaggregation **

In [18]:
# SHARE OF JURISDICTIONS TOTAL EMISSIONS

coverage_nat = ecp_coverage.coverage(inventory_nat, 2018, 2020, wcpd_all, overlap,
                                     False, "national")
coverage_subnat = ecp_coverage.coverage(inventory_subnat, 2018, 2020, wcpd_all, overlap,
                                        False, "subnational")

coverage_all = pd.concat([coverage_nat, coverage_subnat])
coverage_all = coverage_all.loc[coverage_all["jurisdiction"]!="World", :]

# Coverage figures should be calculated only based on aggregation of the most disaggregated flows, not their higher-level aggregation. 
# Otherwise this might result in double counting. Hence aggregate sectors should be dropped from coverage dataframe.
# It also currently excludes coverage of international aviation ('ABFLOW039') and marine ('ABFLOW040') bunkers 
# as they are currently excluded from national total emissions.
# Drop combustion sectors that are aggregation of lower level sectors and concatenate all coverage dataframes into a single one*

flow_excl = ['1A', '1A1A', '1A1C', '1A2', '1A3'] #'1A1C' is exluded here as ABFLOW011 emissions are attributed twice (to both 1A1B and 1A1C)
coverage_all = coverage_all.loc[~coverage_all.ipcc_code.isin(flow_excl), :]

# SHARE OF SECTORS' GLOBAL TOTAL EMISSIONS

coverage_nat_sect = ecp_coverage.coverage(inventory_sect_natjur, 2018, 2020, wcpd_all, overlap,
                             True, "national")
coverage_subnat_sect = ecp_coverage.coverage(inventory_sect_subnatjur, 2018, 2020, wcpd_all, overlap,
                                True, "subnational")

NameError: name 'overlap' is not defined

## II. Aggregate coverage

- "The sum over all pricing mechanisms" of [emissions_share x coverage_factor] minus the overlapping coverage

We account for the fact that more than one tax scheme or ets scheme can apply to the same emissions. However, covered emissions should be counted only once when covered by one or more scheme. To calculate overlapping coverage at the sector-fuel level, we use the `overlap_` variable in `wcpd_all` dataframe created above.

### II.1 jurisdictions

In [19]:
# Create dataframe to contain aggregate coverage
coverage_agg = coverage_all[["jurisdiction", "year", "ipcc_code", "iea_code", "Product"]]

# TAXES

cov_tax_columns_jurGHG = [x for x in coverage_all.columns if "cov_tax" in x and "jurGHG" in x]
cov_tax_columns_jurCO2 = [x for x in coverage_all.columns if "cov_tax" in x and "jurCO2" in x]
cov_tax_columns_wldGHG = [x for x in coverage_all.columns if "cov_tax" in x and "wldGHG" in x]
cov_tax_columns_wldCO2 = [x for x in coverage_all.columns if "cov_tax" in x and "wldCO2" in x]
cov_tax_columns_supraGHG = [x for x in coverage_all.columns if "cov_tax" in x and "supraGHG" in x]
cov_tax_columns_supraCO2 = [x for x in coverage_all.columns if "cov_tax" in x and "supraCO2" in x]

tax_columns = {"cov_tax_CO2_jurGHG":cov_tax_columns_jurGHG, "cov_tax_CO2_jurCO2":cov_tax_columns_jurCO2, 
               "cov_tax_CO2_wldGHG":cov_tax_columns_wldGHG, "cov_tax_CO2_wldCO2":cov_tax_columns_wldCO2, 
               "cov_tax_CO2_supraGHG":cov_tax_columns_supraGHG, "cov_tax_CO2_supraCO2":cov_tax_columns_supraCO2}

# ETS

cov_ets_columns_jurGHG = [x for x in coverage_all.columns if "cov_ets" in x and "jurGHG" in x]
cov_ets_columns_jurCO2 = [x for x in coverage_all.columns if "cov_ets" in x and "jurCO2" in x]
cov_ets_columns_wldGHG = [x for x in coverage_all.columns if "cov_ets" in x and "wldGHG" in x]
cov_ets_columns_wldCO2 = [x for x in coverage_all.columns if "cov_ets" in x and "wldCO2" in x]
cov_ets_columns_supraGHG = [x for x in coverage_all.columns if "cov_ets" in x and "supraGHG" in x]
cov_ets_columns_supraCO2 = [x for x in coverage_all.columns if "cov_ets" in x and "supraCO2" in x]

ets_columns = {"cov_ets_CO2_jurGHG":cov_ets_columns_jurGHG, "cov_ets_CO2_jurCO2":cov_ets_columns_jurCO2, 
               "cov_ets_CO2_wldGHG": cov_ets_columns_wldGHG, "cov_ets_CO2_wldCO2":cov_ets_columns_wldCO2, 
               "cov_ets_CO2_supraGHG":cov_ets_columns_supraGHG, "cov_ets_CO2_supraCO2":cov_ets_columns_supraCO2}

# ALL INSTRUMENTS

cov_all_columns_jurGHG = [x for x in coverage_all.columns if "cov_" in x and "jurGHG" in x and "overlap" not in x]
cov_all_columns_jurCO2 = [x for x in coverage_all.columns if "cov_" in x and "jurCO2" in x and "overlap" not in x]
cov_all_columns_wldGHG = [x for x in coverage_all.columns if "cov_" in x and "wldGHG" in x and "overlap" not in x]
cov_all_columns_wldCO2 = [x for x in coverage_all.columns if "cov_" in x and "wldCO2" in x and "overlap" not in x]
cov_all_columns_supraGHG = [x for x in coverage_all.columns if "cov_" in x and "supraGHG" in x and "overlap" not in x]
cov_all_columns_supraCO2 = [x for x in coverage_all.columns if "cov_" in x and "supraCO2" in x and "overlap" not in x]

all_columns = {"cov_all_CO2_jurGHG":cov_all_columns_jurGHG, "cov_all_CO2_jurCO2":cov_all_columns_jurCO2, 
               "cov_all_CO2_wldGHG":cov_all_columns_wldGHG, "cov_all_CO2_wldCO2":cov_all_columns_wldCO2, 
               "cov_all_CO2_supraGHG":cov_all_columns_supraGHG, "cov_all_CO2_supraCO2":cov_all_columns_supraCO2}


all_overlap_dic = {'cov_all_CO2_jurGHG':'cov_overlap_CO2_jurGHG', 'cov_all_CO2_jurCO2':'cov_overlap_CO2_jurCO2', 
                   'cov_all_CO2_wldGHG':'cov_overlap_CO2_wldGHG', 'cov_all_CO2_wldCO2':'cov_overlap_CO2_wldCO2', 
                   'cov_all_CO2_supraGHG':'cov_overlap_CO2_supraGHG', 'cov_all_CO2_supraCO2':'cov_overlap_CO2_supraCO2'}

# Calculation of coverage

# An adjustment to the coverage function needs to be made. The function's output needs to include i) overlap across taxes, ii) overlap across ets, 
# iii) overlap across all instruments

# A. Sum across all instruments (columns)

for dic in [tax_columns, ets_columns]: # [all_columns]
    for key in dic.keys():
        # sum across all instrument columns and substract overlaping coverage
        coverage_agg[key] = coverage_all[dic[key]].sum(axis=1) # - coverage_all[all_overlap_dic[key]]

for dic in [all_columns]:
    for key in dic.keys():
        coverage_agg[key] = coverage_all[dic[key]].sum(axis=1) - coverage_all[all_overlap_dic[key]]

# B. Sum across all emission categories (rows)
coverage_agg = coverage_agg.groupby(['jurisdiction','year']).sum()
coverage_agg.reset_index(inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  coverage_agg[key] = coverage_all[dic[key]].sum(axis=1) # - coverage_all[all_overlap_dic[key]]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  coverage_agg[key] = coverage_all[dic[key]].sum(axis=1) - coverage_all[all_overlap_dic[key]]


In [20]:
# WORLD TOTAL COVERAGE 

cov_world_agg = coverage_agg[["jurisdiction","year", "cov_tax_CO2_wldCO2", "cov_ets_CO2_wldCO2", 
                                      "cov_tax_CO2_wldGHG", "cov_ets_CO2_wldGHG"]]

cov_world_agg.reset_index(inplace=True)
cov_world_agg = cov_world_agg.groupby(['year']).sum()

cov_world_agg["cov_all_CO2_jurGHG"] = cov_world_agg.cov_tax_CO2_wldGHG + cov_world_agg.cov_ets_CO2_wldGHG
cov_world_agg["cov_all_CO2_jurCO2"] = cov_world_agg.cov_tax_CO2_wldCO2 + cov_world_agg.cov_ets_CO2_wldCO2

cov_world_agg["jurisdiction"] = "World"

cov_world_agg.drop("index", axis=1, inplace=True)
cov_world_agg.reset_index(inplace=True)

coverage_agg = pd.concat([coverage_agg, cov_world_agg])

In [21]:
# National-level coverage from subnational schemes

subnat_lists = {"United States":us_states, "Canada":can_prov, "China":chn_prov}

for subnat_list in subnat_lists.keys():
    temp = coverage_agg.loc[coverage_agg.jurisdiction.isin(subnat_lists[subnat_list]), :]
    temp = temp.groupby(["year"]).sum()
    temp.reset_index(inplace=True)
    temp["jurisdiction"] = subnat_list

    temp[["cov_tax_CO2_jurGHG", "cov_tax_CO2_jurCO2", "cov_ets_CO2_jurGHG", "cov_ets_CO2_jurCO2",
          "cov_all_CO2_jurGHG", "cov_all_CO2_jurCO2"]] = np.nan
    
    swap_list = {"cov_tax_CO2_jurGHG":"cov_tax_CO2_supraGHG", "cov_tax_CO2_jurCO2":"cov_tax_CO2_supraCO2", "cov_ets_CO2_jurGHG":"cov_ets_CO2_supraGHG", 
                 "cov_ets_CO2_jurCO2":"cov_ets_CO2_supraCO2", "cov_all_CO2_jurGHG":"cov_all_CO2_supraGHG", "cov_all_CO2_jurCO2":"cov_all_CO2_supraCO2",
                 "cov_tax_CO2_supraGHG":"cov_tax_CO2_jurGHG", "cov_tax_CO2_supraCO2":"cov_tax_CO2_jurCO2", "cov_ets_CO2_supraGHG":"cov_ets_CO2_jurGHG", 
                 "cov_ets_CO2_supraCO2":"cov_ets_CO2_jurCO2", "cov_all_CO2_supraGHG":"cov_all_CO2_jurGHG", "cov_all_CO2_supraCO2":"cov_all_CO2_jurCO2"}
    
    temp.rename(columns=swap_list, inplace=True)

    coverage_agg = coverage_agg.loc[coverage_agg.jurisdiction != subnat_list, :]
    
    coverage_agg = pd.concat([coverage_agg, temp])
    

In [22]:
# NA values for all entries of 'supra' columns of national jurisdictions

all_subnat_list = us_states + can_prov + chn_prov
supra_cols = ["cov_tax_CO2_supraGHG", "cov_tax_CO2_supraCO2", "cov_ets_CO2_supraGHG", 
              "cov_ets_CO2_supraCO2", "cov_all_CO2_supraGHG", "cov_all_CO2_supraCO2"]

coverage_agg.loc[~coverage_agg.jurisdiction.isin(all_subnat_list), supra_cols] = np.nan

In [23]:
coverage_agg_OUT = coverage_agg.fillna("NA")
coverage_agg_OUT.sort_values(by=["jurisdiction", "year"]).to_csv(path_aux_data+"/data/coverage/tot_coverage_jurisdiction.csv", index=None) #total_coverage_2010scope.csv

### II.2 World sectors

In [24]:
coverage_sect = pd.concat([coverage_nat_sect, coverage_subnat_sect])

cov_tax_columns_WldSectCO2 = [x for x in coverage_sect.columns if "cov_tax" in x and "wld_sect" in x]
cov_ets_columns_WldSectCO2 = [x for x in coverage_sect.columns if "cov_ets" in x and "wld_sect" in x]
cov_all_columns_WldSectCO2 = [x for x in coverage_sect.columns if "cov_" in x and "wld_sect" in x]

tax_columns = {"cov_tax_CO2_WldSectCO2":cov_tax_columns_WldSectCO2}
ets_columns = {"cov_ets_CO2_WldSectCO2":cov_ets_columns_WldSectCO2}
all_columns = {"cov_all_CO2_WldSectCO2":cov_all_columns_WldSectCO2}

coverage_sect_agg_schemes = coverage_sect[["jurisdiction", "year", "ipcc_code", "iea_code", "Product"]]

for dic in [tax_columns, ets_columns, all_columns]:
    for key in dic.keys():
        coverage_sect_agg_schemes[key] = coverage_sect[dic[key]].sum(axis=1)

coverage_sect_agg_schemes = coverage_sect_agg_schemes.groupby(['ipcc_code','year']).sum()
coverage_sect_agg_schemes.reset_index(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  coverage_sect_agg_schemes[key] = coverage_sect[dic[key]].sum(axis=1)


In [25]:
coverage_sect_agg_schemes.to_csv(path_aux_data+"/data/coverage/tot_coverage_world_sectors.csv", index=None)

# Emissions-weighted Carbon Price (ECP)
Combines: (i) (total) coverage of ETS and associated price, (ii) user-fuel coverage of taxes and associated tax rates


In [None]:
wcpd_usd = ecp_cur_conv.cur_conv(wcpd_all, can, us, china)

## I. ECP from ETS and taxes (CO2 only, time-varying and fixed weights, jurisdiction level)

National and subnational jurisdictions, sectoral level

In [32]:
#Bring together calculated emissions share at sector and sector-fuel level and carbon prices in kusd
#!! need to make sure that `coverage_comb_nat` excludes all aggregate flows!!

id_columns = [x for x in wcpd_usd.columns if bool(re.match(re.compile("ets.+_id"), x))==True or bool(re.match(re.compile("tax.+_id"), x))==True]
price_columns = [x for x in wcpd_usd.columns if bool(re.match(re.compile("ets.+price_kusd"), x))==True or bool(re.match(re.compile("tax.+rate.+kusd"), x))==True]

prices_usd = wcpd_usd[['jurisdiction', 'year', 'ipcc_code', 'iea_code', 'Product']+id_columns+price_columns]

ecp_variables_map = {}

In [33]:

def ecp(coverage_df, jur_level, weight_type, weight_year=None, sectors=bool):
    
    global ecp_variables_map 
    
    if jur_level == "national":
        merge_keys = ["jurisdiction", "year", "ipcc_code", "iea_code", "Product"]
        prices_temp = prices_usd.copy()
        
    if jur_level == "subnational":
        merge_keys = ["jurisdiction", "year", "ipcc_code", "iea_code"]
        prices_temp = prices_usd.loc[prices_usd.Product=="Natural gas", :].copy()
        prices_temp.drop(["Product"], axis=1, inplace=True)
              
    if weight_type=="time_varying":
        temp_df = coverage_df.copy()
        temp_df = temp_df.merge(prices_temp, on=merge_keys, how="left")
        
    elif weight_type=="fixed":
        temp_df = coverage_df.loc[coverage_df.year==weight_year, :]
        temp_df.drop(["year"], axis=1, inplace=True)
        fw_merge_keys = merge_keys.copy()
        fw_merge_keys.remove("year")
        
        temp_df = prices_temp.merge(temp_df, on=fw_merge_keys, how="left")

    ecp_variables_map = {"ecp_ets_jurGHG_kusd":[x for x in list(temp_df.columns) if bool(re.match(re.compile("ets.+price+."), x))==True or bool(re.match(re.compile("cov_ets.+jurGHG"), x))==True], 
                         "ecp_ets_jurCO2_kusd":[x for x in list(temp_df.columns) if bool(re.match(re.compile("ets.+price+."), x))==True or bool(re.match(re.compile("cov_ets.+jurCO2"), x))==True], 
                         "ecp_ets_wldGHG_kusd":[x for x in list(temp_df.columns) if bool(re.match(re.compile("ets.+price+."), x))==True or bool(re.match(re.compile("cov_ets.+wldGHG"), x))==True],
                         "ecp_ets_wldCO2_kusd":[x for x in list(temp_df.columns) if bool(re.match(re.compile("ets.+price+."), x))==True or bool(re.match(re.compile("cov_ets.+wldCO2"), x))==True],
                         "ecp_tax_jurGHG_kusd":[x for x in list(temp_df.columns) if bool(re.match(re.compile("tax.+rate+."), x))==True or bool(re.match(re.compile("cov_tax.+jurGHG"), x))==True], 
                         "ecp_tax_jurCO2_kusd":[x for x in list(temp_df.columns) if bool(re.match(re.compile("tax.+rate+."), x))==True or bool(re.match(re.compile("cov_tax.+jurCO2"), x))==True], 
                         "ecp_tax_wldGHG_kusd":[x for x in list(temp_df.columns) if bool(re.match(re.compile("tax.+rate+."), x))==True or bool(re.match(re.compile("cov_tax.+wldGHG"), x))==True], 
                         "ecp_tax_wldCO2_kusd":[x for x in list(temp_df.columns) if bool(re.match(re.compile("tax.+rate+."), x))==True or bool(re.match(re.compile("cov_tax.+wldCO2"), x))==True]}

    ecp_variables_map_sect = {"ecp_ets_sectCO2_kusd":[x for x in list(temp_df.columns) if bool(re.match(re.compile("ets.+price+."), x))==True or bool(re.match(re.compile("cov_ets.+_share"), x))==True], 
                              "ecp_tax_sectCO2_kusd":[x for x in list(temp_df.columns) if bool(re.match(re.compile("tax.+rate+."), x))==True or bool(re.match(re.compile("cov_tax.+_share"), x))==True]}
    
    
    if jur_level == "subnational" and sectors == False:
        ecp_variables_map["ecp_ets_supraGHG_kusd"] = [x for x in list(temp_df.columns) if bool(re.match(re.compile("ets.+price+."), x))==True or bool(re.match(re.compile("cov_ets.+supraGHG"), x))==True]
        ecp_variables_map["ecp_ets_supraCO2_kusd"] = [x for x in list(temp_df.columns) if bool(re.match(re.compile("ets.+price+."), x))==True or bool(re.match(re.compile("cov_ets.+supraCO2"), x))==True]
        ecp_variables_map["ecp_tax_supraGHG_kusd"] = [x for x in list(temp_df.columns) if bool(re.match(re.compile("tax.+rate+."), x))==True or bool(re.match(re.compile("cov_tax.+supraGHG"), x))==True]
        ecp_variables_map["ecp_tax_supraCO2_kusd"] = [x for x in list(temp_df.columns) if bool(re.match(re.compile("tax.+rate+."), x))==True or bool(re.match(re.compile("cov_tax.+supraCO2"), x))==True]

    if sectors == False:
        ecp_mapping = ecp_variables_map
    elif sectors == True:
        ecp_mapping = ecp_variables_map_sect
    
    for key in ecp_mapping.keys():
        temp_df[key] = 0
        list_length = len(ecp_mapping[key])
        
        for i in range(0, list_length, 2):
            temp_df[key] += temp_df[ecp_mapping[key][i]]*temp_df[ecp_mapping[key][i+1]]
        
        temp_df[key] = temp_df[key].astype(float)
    
    temp_df = temp_df[merge_keys+list(ecp_mapping.keys())] 
    
    
    temp_df = temp_df.fillna(0) # CHECK WHY IT "NA" VALUES ARE PRODUCED IN THE FIRST PLACE

    
    if sectors == False:
        temp_df["ecp_all_jurGHG_kusd"] = temp_df["ecp_tax_jurGHG_kusd"]+temp_df["ecp_ets_jurGHG_kusd"]
        temp_df["ecp_all_jurCO2_kusd"] = temp_df["ecp_tax_jurCO2_kusd"]+temp_df["ecp_ets_jurCO2_kusd"]
        temp_df["ecp_all_wldGHG_kusd"] = temp_df["ecp_tax_wldGHG_kusd"]+temp_df["ecp_ets_wldGHG_kusd"]
        temp_df["ecp_all_wldCO2_kusd"] = temp_df["ecp_tax_wldCO2_kusd"]+temp_df["ecp_ets_wldCO2_kusd"]

    elif sectors == True:
        temp_df["ecp_all_sectCO2_kusd"] = temp_df["ecp_tax_sectCO2_kusd"]+temp_df["ecp_ets_sectCO2_kusd"]
        
    if jur_level == "subnational" and sectors == False:
        temp_df["ecp_all_supraGHG_kusd"] = temp_df["ecp_tax_supraGHG_kusd"]+temp_df["ecp_ets_supraGHG_kusd"]
        temp_df["ecp_all_supraCO2_kusd"] = temp_df["ecp_tax_supraCO2_kusd"]+temp_df["ecp_ets_supraCO2_kusd"]
        
    temp_df = temp_df.loc[~temp_df.ipcc_code.isin(flow_excl), :] # exclude aggregate sectors to avoid double counting
    
    return temp_df
    

In [34]:
ecp_tv_nat = ecp(coverage_nat, "national", "time_varying", sectors=False)
ecp_tv_subnat = ecp(coverage_subnat, "subnational", "time_varying", sectors=False)
ecp_tv = pd.concat([ecp_tv_nat, ecp_tv_subnat])

ecp_fixed_nat = ecp(coverage_nat, "national", "fixed", 2015, sectors=False)
ecp_fixed_subnat = ecp(coverage_subnat, "subnational", "fixed", 2015, sectors=False)
ecp_fixed = pd.concat([ecp_fixed_nat, ecp_fixed_subnat])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [35]:
ecp_tv_nat_sect = ecp(coverage_nat_sect, "national", "time_varying", sectors=True)

#ecp_tv_subnat_sect = ecp(coverage_subnat_sect, "subnational", "time_varying")
#ecp_tv_sect = pd.concat([ecp_tv_nat_sect, ecp_tv_subnat_sect])

#ecp_fixed_nat_sect = ecp(coverage_nat_sect, "national", "fixed", 2015)
#ecp_fixed_subnat_sect = ecp(coverage_subnat_sect, "subnational", "fixed", 2015)
#ecp_fixed_sect = pd.concat([ecp_fixed_nat, ecp_fixed_subnat])

IndexError: list index out of range

In [114]:
ecp_tv_nat_sect.groupby(["ipcc_code", "year"]).sum().to_csv(path_aux_data+"/data/ecp/ecp_sectors_wld/world_sectoral_ecp.csv")


In [36]:
def ecp_aggregation(ecp_df):

    global ecp_agg
    
    ecp_agg = ecp_df.groupby(["jurisdiction", "year"]).sum()
    ecp_agg.reset_index(inplace=True)

    #World calculations
    ecp_world_agg = ecp_agg[["jurisdiction","year", 'ecp_ets_wldGHG_kusd', 'ecp_ets_wldCO2_kusd',
                             'ecp_tax_wldGHG_kusd', 'ecp_tax_wldCO2_kusd']]

    ecp_world_agg = ecp_world_agg.groupby(['year']).sum()

    cols_map = {"ecp_tax_wldGHG_kusd":"ecp_tax_jurGHG_kusd", "ecp_tax_wldCO2_kusd":"ecp_tax_jurCO2_kusd",
                "ecp_ets_wldGHG_kusd":"ecp_ets_jurGHG_kusd", "ecp_ets_wldCO2_kusd":"ecp_ets_jurCO2_kusd"}

    ecp_world_agg.rename(columns=cols_map, inplace=True)
    ecp_world_agg["jurisdiction"] = "World"
    ecp_world_agg.reset_index(inplace=True)

    ecp_agg = pd.concat([ecp_agg, ecp_world_agg])

    # all schemes ecp
    ecp_agg["ecp_all_jurGHG_kusd"] = ecp_agg.ecp_tax_jurGHG_kusd + ecp_agg.ecp_ets_jurGHG_kusd
    ecp_agg["ecp_all_jurCO2_kusd"] = ecp_agg.ecp_tax_jurCO2_kusd + ecp_agg.ecp_ets_jurCO2_kusd
    ecp_agg["ecp_all_supraGHG_kusd"] = ecp_agg.ecp_tax_supraGHG_kusd + ecp_agg.ecp_ets_supraGHG_kusd
    ecp_agg["ecp_all_supraCO2_kusd"] = ecp_agg.ecp_tax_supraCO2_kusd + ecp_agg.ecp_ets_supraCO2_kusd

    return ecp_agg

In [37]:
ecp_tv_agg = ecp_aggregation(ecp_tv)
ecp_fixed_agg = ecp_aggregation(ecp_fixed)
            


**National-level ecp from subnational schemes**

In [38]:

subnat_lists = {"United States":us_states, "Canada":can_prov, "China":chn_prov}
ecp_list = {"time_varying":ecp_tv_agg, "fixed_weights":ecp_fixed_agg}

def national_from_subnat(df, list_subnat):
    temp = df.loc[df.jurisdiction.isin(list_subnat), :]
    temp = temp.groupby(["year"]).sum()
    temp.reset_index(inplace=True)
    temp["jurisdiction"] = key

    temp[["ecp_ets_jurGHG_kusd", "ecp_tax_jurGHG_kusd", 
          "ecp_ets_jurCO2_kusd", "ecp_tax_jurCO2_kusd", 
          "ecp_all_jurGHG_kusd", "ecp_all_jurGHG_kusd"]] = np.nan

    swap_list = {"ecp_ets_jurGHG_kusd":"ecp_ets_supraGHG_kusd", "ecp_tax_jurGHG_kusd":"ecp_tax_supraGHG_kusd", 
                 "ecp_ets_jurCO2_kusd":"ecp_ets_supraCO2_kusd", "ecp_tax_jurCO2_kusd":"ecp_tax_supraCO2_kusd", 
                 "ecp_all_jurGHG_kusd":"ecp_all_supraGHG_kusd", "ecp_all_jurCO2_kusd":"ecp_all_supraCO2_kusd",
                 "ecp_ets_supraGHG_kusd":"ecp_ets_jurGHG_kusd", "ecp_tax_supraGHG_kusd":"ecp_tax_jurGHG_kusd", 
                 "ecp_ets_supraCO2_kusd":"ecp_ets_jurCO2_kusd", "ecp_tax_supraCO2_kusd":"ecp_tax_jurCO2_kusd", 
                 "ecp_all_supraGHG_kusd":"ecp_all_jurGHG_kusd", "ecp_all_supraCO2_kusd":"ecp_all_jurCO2_kusd"}

    temp.rename(columns=swap_list, inplace=True)

    df = df.loc[df.jurisdiction != key, :]
    df = pd.concat([df, temp])
        
    return df

for key in subnat_lists.keys():
    ecp_tv_agg = national_from_subnat(ecp_tv_agg, subnat_lists[key])
    ecp_fixed_agg = national_from_subnat(ecp_tv_agg, subnat_lists[key])

In [39]:
# NA values for all entries of 'supra' columns of national jurisdictions
all_subnat_list = us_states + can_prov + chn_prov
supra_cols = ["ecp_ets_supraGHG_kusd", "ecp_tax_supraGHG_kusd", 
              "ecp_ets_supraCO2_kusd", "ecp_tax_supraCO2_kusd", 
              "ecp_all_supraGHG_kusd", "ecp_all_supraCO2_kusd"]

for df in [ecp_tv_agg, ecp_fixed_agg]:
    df.loc[~df.jurisdiction.isin(all_subnat_list), supra_cols] = np.nan

In [40]:
col_sel = ["jurisdiction", "year", 
           'ecp_ets_jurGHG_kusd', 'ecp_tax_jurGHG_kusd', "ecp_all_jurGHG_kusd", 
           'ecp_ets_jurCO2_kusd', 'ecp_tax_jurCO2_kusd', "ecp_all_jurCO2_kusd",
           'ecp_ets_supraGHG_kusd', 'ecp_tax_supraGHG_kusd', "ecp_all_supraGHG_kusd", 
           'ecp_ets_supraCO2_kusd', 'ecp_tax_supraCO2_kusd', "ecp_all_supraCO2_kusd"]

ecp_tv_agg[col_sel].fillna("NA").sort_values(by=["jurisdiction", "year"]).to_csv(path_aux_data+"/data/ecp/ecp_economy/ecp_vw/ecp_tvV.csv", index=None)
ecp_fixed_agg[col_sel].fillna("NA").sort_values(by=["jurisdiction", "year"]).to_csv(path_aux_data+"/data/ecp/ecp_economy/ecp_fw/ecp_fixedV.csv", index=None)

## II. Calculation of ECP from ETS and taxes (CO2 only, constant, jurisdiction-specific weights, jurisdiction level)

In [391]:
#Information needed (need two dataframes): 
#- at sector level: year of first implementation of carbon pricing on any fuel (one with the list of jurisdiction and year of implementation of first scheme)
#- at jurisdiction level: year of first implementation of carbon pricing in any sector (one with the list of jurisdiction-sector entries and year of implementation of first scheme)

first_year = cp_all_subset[['jurisdiction', 'year', 'ipcc_code', 'IEA_CODE', 'Product', 'tax', 'ets']]

first_year.loc[:, "pricing"] = first_year.loc[:, "tax"] + first_year.loc[:, "ets"]
first_year.loc[:, "pricing"] = np.where(first_year.loc[:, "pricing"] > 0, 1.0,0.0)
first_year = first_year.drop(["tax", "ets"], axis=1)
first_year = first_year.loc[first_year.pricing == 1,]
first_year.sort_values(by=["jurisdiction", "year", "ipcc_code", "Product"], ascending=True, inplace=True)

first_year.drop_duplicates(subset=["jurisdiction", "ipcc_code", "Product"], inplace=True)

# jurisdiction-level, recording year prior to first year of pricing mechanism implementation
first_year_jur = first_year.groupby(["jurisdiction", "year"]).sum()
first_year_jur.loc[:, "pricing"] = np.where(first_year_jur.loc[:, "pricing"] > 0, 1.0, 0.0)
first_year_jur.reset_index(inplace=True)

first_year_jur = first_year_jur.drop_duplicates(subset=["jurisdiction"])
first_year_jur["year"] = first_year_jur["year"]-1 # to take the year before first year of implementation
first_year_jur = first_year_jur.drop("pricing", axis=1)

first_year_jur = pd.Series(first_year_jur.year.values,index=first_year_jur.jurisdiction).to_dict()

## adjustment needed for Finland and Poland - their respective schemes started in 1990 so 1989 should be the reference year for
## emissions. However, because GHG/CO2 CAIT series start in 1990, shares series start in 1990
first_year_jur["Finland"] = 1990
first_year_jur["Poland"] = 1990


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [392]:
def ecp_const_intro(share_df, temp_cp, prices):
    
    df_concat = pd.DataFrame()

    for jur in share_df.jurisdiction.unique():
        if jur in first_year_jur.keys():
            weight_year = first_year_jur[jur]
        else:
            weight_year = 2015
    
        share_df_jur = share_df[(share_df["jurisdiction"]==jur)]
        temp_cp_jur = temp_cp.loc[(temp_cp["jurisdiction"]==jur), :]

        x = ecp_const(share_df_jur, weight_year, temp_cp_jur, prices)
        
        if df_concat.empty == True:
            df_concat = x
        else:
            df_concat = pd.concat([df_concat, x])
        
    return df_concat



In [393]:
ecp_const_intro_comb_nat = ecp_const_intro(combustion_nat_share, cp_all_subset, prices_usd)
ecp_const_intro_comb_subnat = ecp_const_intro(combustion_subnat_share, cp_all_subset, prices_usd)
ecp_const_intro_fuind = ecp_const_intro(fuind_share, cp_all_subset, prices_usd)


In [394]:
ecp_aggregation([ecp_const_intro_comb_nat, ecp_const_intro_comb_subnat, ecp_const_intro_fuind])

col_sel = ["jurisdiction", "year", 'ECP_ets_jurGHG_kusd', 'ECP_tax_jurGHG_kusd',
           "ECP_tax_ets_jurGHG_kusd", 'ECP_ets_jurCO2_kusd', 'ECP_tax_jurCO2_kusd', "ECP_tax_ets_jurCO2_kusd"]

ecp_agg.loc[ecp_agg.year<=2018][col_sel].to_csv(path_aux_data+"/ecp_calculation/ecp/ecp_economy/ecp_intro/ecp_intro.csv", index=None)