In [25]:
import pandas as pd
import numpy as np

from pandas import read_csv
from importlib.machinery import SourceFileLoader

# changing file locations 

path_wcpd = '/Users/ejoiner/OneDrive - rff/Documents/RFF Organization/Research Documents/WCPD/WorldCarbonPricingDatabase/_dataset/data' 
path_dependencies = '/Users/ejoiner/OneDrive - rff/Documents/RFF Organization/Research Documents/WCPD/ECP/_code/compilation/_dependencies/dep_ecp'

In [20]:
ecp_general = SourceFileLoader('general', path_dependencies+'/ecp_v3_gen_func.py').load_module()

In [None]:
ecp_cov_fac = SourceFileLoader('coverage_factors', path_dependencies+'ecp_v3_coverageFactors.py').load_module()

In [22]:
ecp_sec_em = SourceFileLoader('inventoryShare', path_dependencies+'/ecp_v3_aggSec.py').load_module()

In [23]:
ecp_sec_natSubnat = SourceFileLoader('sectorNatFromSubnat', path_dependencies+'/ecp_v3_sectorNatFromSubnat.py').load_module()

exec(open("/Users/ejoiner/OneDrive - rff/Documents/RFF Organization/Research Documents/WCPD/WorldCarbonPricingDatabase/_code/_compilation/_dependencies/jurisdictions.py").read())

subnat_lists = {"United States":subnat_usa, "Canada":subnat_can, "China":subnat_chn}

gases = ["CO2"] #CH4, N2O, F-GASES #CO2 only for now

Loading data

In [None]:
for gas in gases:

    # LOAD WCPD DATAFRAMES

    wcpd_ctry = ecp_general.concatenate(path_wcpd+"/"+gas+"/national")
    wcpd_subnat = ecp_general.concatenate(path_wcpd+"/"+gas+"/subnational")
    wcpd_all = pd.concat([wcpd_ctry, wcpd_subnat]).sort_values(by=["jurisdiction", "year"])

    # ADD COLUMN WITH IEA SECTOR CODES
    ipcc_iea_map = pd.read_csv("/Users/ejoiner/OneDrive - rff/Documents/RFF Organization/Research Documents/WCPD/ECP/_raw/_aux_files/ipcc2006_iea_category_codes.csv", 
                usecols=["IPCC_CODE", "IEA_CODE"])
    ipcc_iea_map.columns = ["ipcc_code", "iea_code", "pollutant"]

    wcpd_all = wcpd_all.merge(ipcc_iea_map, on=["ipcc_code", "pollutant"], how="left")

    # ADD COVERAGE FACTORS 

    wcpd_all = ecp_cov_fac.coverageFactors(wcpd_all, gas)

    # LISTS OF JURISDICTION NAMES

    ctry_names = list(wcpd_ctry.jurisdiction.unique())
    subnat_names = list(wcpd_subnat.jurisdiction.unique())

    std_ctry_names = [x.replace(".", "").replace(",", "").replace(" ", "_") for x in ctry_names]
    countries_dic = dict(zip(ctry_names, std_ctry_names))

    std_subnat_names = [x.replace(".", "").replace(",", "").replace(" ", "_") for x in subnat_names]
    subnat_dic = dict(zip(subnat_names, std_subnat_names))

ValueError: Usecols do not match columns, columns expected but not found: ['IPCC_CODE', 'IEA_CODE']

In [None]:
priceSeriesPaths = {"cFlxRate":"/currentPrices/FlexXRate", 
                   "cFixRate":"/currentPrices/FixedXRate", 
                   "kFixRate":"/constantPrices/FixedXRate"}

price_cols = {"cFlxRate":["ets_price_usd", "tax_rate_incl_ex_usd"], 
              "cFixRate":["ets_price_usd", "tax_rate_incl_ex_usd"], 
              "kFixRate":["ets_price_usd_k", "tax_rate_incl_ex_usd_k"]}

ecp_cols = {"cFlxRate":["ecp_ets_usd", "ecp_tax_usd", "ecp_all_usd"],
            "cFixRate":["ecp_ets_usd", "ecp_tax_usd", "ecp_all_usd"],
            "kFixRate":["ecp_ets_usd_k", "ecp_tax_usd_k", "ecp_all_usd_k"]}

IPCC1AList = ["1A", "1B", "1C", 
              "1A1", "1A2", "1A3", "1A4", "1A5", 
              "1A1A", "1A1B", "1A1C", "1A2A", "1A2B", "1A2C",
              "1A2D", "1A2E", "1A2F", "1A2G", "1A2H", "1A2I", 
              "1A2J", "1A2K", "1A2L", "1A2M", 
              "1A3A", "1A3B", "1A3C", "1A3D", "1A3E",
              "1A4A", "1A4B", "1A4C", "1A5A", "1A5B", "1A5C",
              "1A1A1", "1A1A2", "1A1A3", "1A3A1", "1A3A2", "1A3D1", "1A3D2", "1A3E1",
              "1A4C1", "1A4C2", "1A4C3",
              "1A5A", "1A5B", "1A5C"]

# "1A1C" - excluding one of the two IPCC categories associated with IEA flow ABFLOW011

# specify which price series is being calculated
priceSeriesList =  ["cFlxRate", "kFixRate"]

# select jurisdiction level for which calculations are executed [for now, only 'national']
jurGroup = "national"

# list of IPCC categories for which aggregate prices have to be calculated
# for most categories, this would simply involve an aggregation from product to category level; 
# for aggregate categories, it implies an additional aggregation from subcategories
priceCat = {"level_5":["1A1A1", "1A1A2", "1A1A3", "1A3A1", "1A3A2", "1A3D1", 
                       "1A3D2", "1A3E1", "1A4C1", "1A4C2", "1A4C3"],
            "level_4":["1A1A", "1A1B", "1A1C", "1A2A", "1A2B", "1A2C",
                       "1A2D", "1A2E", "1A2F", "1A2G", "1A2H", 
                       "1A2I", "1A2J", "1A2K", "1A2L", "1A2M",
                       "1A3A", "1A3B", "1A3C", "1A3D", "1A4A", 
                       "1A4B", "1A4C", "1A5A", "1A5B", "1A5C",
                       "1B2A", "1B2B",
                       "3C1A", "3C1B", "3C1C", "3C1D"], 
             "level_3":["1A2", "1A5", "1B1", 
                        "2A1", "2A2", "2A3", "2A4", "2H1", "2H2",
                        "3A1", "3A2", 
                        "3B1", "3B2", "3B3", "3B4", "3B5", "3B6",
                        "3C1", "3C2", "3C3", "3C4", "3C5", "3C6", "3C7", "3C8"],
             "level_2":["2A", "2B", "2C", "2D", "2E", "2F", "2G", 
                        "4A", "4B", "4C", "4D", "4E",
                        "5A", "5B"]
}

# list of IPCC categories that are aggregate of lower level categories in the inventory structure
aggCatList = ["1", "1A", "1A1A", "1A2", "1A3",
              "1A3A", "1A3D", "1A3E", "1A4", "1A4C",
              "1A5", "1B", "1B1", "1B1A", "1B1A1",
              "1B1A2", "1B2", "1B2A", "1B2A3", "1B2B",
              "1B2B3", "1C", "1C1", "1C2", "2",
              "2A", "2A4", "2B", "2B8", "2B9", "2C",
              "2D", "2E", "2F", "2G", "2H", "3", 
              "3A", "3B", "3B1", "3B2", "3B3", "3B4",
              "3B5", "3B6", "3B6B", "3C", "3D", "4",
              "4A", "4C", "4D", "5", "5A"]

IPCC1AListSubCat = list(set(IPCC1AList)-set(aggCatList))

In [None]:
dfSecPrice = {}

for gas in ["CO2"]:#gases:

    for priceSeries in priceSeriesList:

        dfSecPrice[gas] = pd.DataFrame() # initialize empty dataframe to store final output
        priceSeriesPath = priceSeriesPaths[priceSeries] # get directory path for the set price series

        # load sector level, emissions-weighted, carbon prices
        cfWeightedP = ecp_sec_em.cfWeightedPrices(gas, priceSeries, priceSeriesPath,
                                                  price_cols, wcpd_all, countries_dic, subnat_dic)
        cfWprices_usd = cfWeightedP[0]
        all_inst_col = cfWeightedP[1]

        # the strategy for the calculation of carbon prices for each category 
        # proceeds from the lowest level of the classification structure;
        # thus the loop needs to proceed from lower to higher category levels
        for level in priceCat.keys():
            for category in priceCat[level]:

                # calling function calculating emissions shares (using emissions inventory) for `category`
                # this function gives the emissions shares / weights with which to aggregate prices at the `category` level
                inventoryShare = ecp_sec_em.inventoryShare(category, jurGroup, gas, level)

                # fill in entries for which no emissions data (and hence no emissions share data) is available 
                # with default weights; default weights values depend on the category
                # for the lowest level categories under energy category 1A and national jurisdictions, 
                # the emissions inventory distinguishes between three types of fuel, 
                # hence the default weight will be 1/3 (simple average across fuel types)
                if category in IPCC1AListSubCat and jurGroup=="national":
                    temp = inventoryShare.merge(cfWprices_usd, on=["jurisdiction", "year", "ipcc_code", "iea_code", "Product"], how='left')
                    temp[gas+"_shareAggSec"].fillna(1/3, inplace=True)

                # for other energy categories under category 1A, the weight depends on the number of 
                # subcategories constituting the category
                elif category in IPCC1AList and jurGroup=="national":
                    temp = inventoryShare.merge(cfWprices_usd, on=["jurisdiction", "year", "ipcc_code", "iea_code", "Product"], how='left')
                    subCatcodes = [x for x in list(temp.ipcc_code.unique()) if (x.startswith(category)) if (len(x)==len(category)+1)]
                        
                    if len(subCatcodes) != 0:
                        weight = 1/len(subCatcodes)
                        temp.loc[(temp.ipcc_code.isin(subCatcodes)) & (temp.CO2_shareAggSec.isnull()), gas+"_shareAggSec"] = weight
                    else:
                        temp.loc[(temp.ipcc_code==category) & (temp.CO2_shareAggSec.isnull()), gas+"_shareAggSec"] = 1

                # 
                else:
                    temp = inventoryShare.merge(cfWprices_usd, on=["jurisdiction", "year", "ipcc_code", "iea_code"], how='left')
                    subCatcodes = [x for x in list(temp.ipcc_code.unique()) if (x.startswith(category)) if (len(x)==len(category)+1)]
                        
                    if len(subCatcodes) != 0:
                        weight = 1/len(subCatcodes)
                        temp.loc[(temp.ipcc_code.isin(subCatcodes)) & (temp.CO2_shareAggSec.isnull()), gas+"_shareAggSec"] = weight
                    else:
                        temp.loc[(temp.ipcc_code==category) & (temp.CO2_shareAggSec.isnull()), gas+"_shareAggSec"] = 1

                # Step 1. calculate weighted price
                #The summation will not work with NA values
                temp[price_cols[priceSeries][0]].fillna(0, inplace=True)
                temp[price_cols[priceSeries][1]].fillna(0, inplace=True)

                # weighted price (emissions or simple)
                temp.loc[:, ecp_cols[priceSeries][0]] = (temp.loc[:, price_cols[priceSeries][0]])*temp.loc[:, gas+"_shareAggSec"]
                temp.loc[:, ecp_cols[priceSeries][1]] = (temp.loc[:, price_cols[priceSeries][1]])*temp.loc[:, gas+"_shareAggSec"]
                temp.loc[:, ecp_cols[priceSeries][2]] = (temp.loc[:, ecp_cols[priceSeries][0]] + temp.loc[:, ecp_cols[priceSeries][1]])

                temp.drop([all_inst_col, price_cols[priceSeries][0], price_cols[priceSeries][1]], 
                          axis=1, inplace=True)

                # to obtain sector-level price, we need to remove rows corresponding to aggregate IPCC category, then sum
                # but we also want to keep (aggregate) category-level emissions, which may be available in original emissions data frame
                # but not at disaggregate level; in which case the sum will not work for emissions
                if level == "level_5":
                    temp_sum = temp.groupby(["jurisdiction", "year", "ipcc_code"]).sum()
                    temp_sum.reset_index(inplace=True)
                else:
                    if len(temp.ipcc_code.unique()) > 1: # handling the case where there are no subcategories
                        temp = temp.loc[temp.ipcc_code!=category]

                    temp_sum = temp.groupby(["jurisdiction", "year"]).sum()
                    temp_sum.reset_index(inplace=True)

                    temp_sum["ipcc_code"] = category

                if dfSecPrice[gas].empty:
                    dfSecPrice[gas] = temp_sum
                else:
                    dfSecPrice[gas] = pd.concat([dfSecPrice[gas], temp_sum])

                # for aggregate sectors, prices need to be taken from the dfSecPrice[gas] dataframe
                # replace rows corresponding to that category in `prices_usd` dataframe with those in `dfSecPrice[gas]` dataframe.
                if category in aggCatList:
                    cfWprices_usd = cfWprices_usd.loc[cfWprices_usd.ipcc_code!=category]

                    columns = ['jurisdiction', 'year', 'ipcc_code']+ecp_cols[priceSeries]

                    dfSecPriceTemp = dfSecPrice[gas].loc[dfSecPrice[gas].ipcc_code==category, columns]
                    dfSecPriceTemp["Product"] = np.nan

                    colMap = dict(zip(ecp_cols[priceSeries], price_cols[priceSeries]+[all_inst_col]))
                    dfSecPriceTemp.rename(columns=colMap, inplace=True)

                    cfWprices_usd = pd.concat([cfWprices_usd, dfSecPriceTemp])

        dfSecPrice[gas].sort_values(by=["jurisdiction", "year", "ipcc_code"], inplace=True)
        dfSecPrice[gas] = dfSecPrice[gas].fillna(value="NA")

        # Adding sector-level ecp from subnational schemes to national sector-level values
        #dfSecPrice[gas] = ecp_sec_natSubnat.secNat_from_secSubnat(cfWprices_usd, dfSecPrice[gas], gas)

        #dfSecPrice[gas].to_csv("/Users/gd/GitHub/ECP/_dataset/ecp/ipcc/ecp_ipcc/"+priceSeriesPath+"/ecp_ipcc_"+gas+".csv", index=None)

Constant (year of introduction), jurisdiction-specific, weights

**Categories 1A (combustion) only**

In [None]:

# sector-level, recording year prior to first year of pricing mechanism implementation

firstYear = wcpd_all[['jurisdiction', 'year', 'ipcc_code', 'iea_code', 'Product', 'tax', 'ets']]

firstYear.loc[:, "pricing"] = firstYear.loc[:, "tax"] + firstYear.loc[:, "ets"]
firstYear.loc[:, "pricing"] = np.where(firstYear.loc[:, "pricing"] > 0, 1.0,0.0)
firstYear = firstYear.drop(["tax", "ets"], axis=1)
firstYear = firstYear.loc[firstYear.pricing == 1,]
firstYear.sort_values(by=["jurisdiction", "year", "ipcc_code", "Product"], ascending=True, inplace=True)

firstYear.drop_duplicates(subset=["jurisdiction", "ipcc_code", "Product"], inplace=True)

firstYear_cat = firstYear.groupby(["jurisdiction", "year", "ipcc_code", "iea_code"]).sum()
firstYear_cat.loc[:, "pricing"] = np.where(firstYear_cat.loc[:, "pricing"] > 0, 1.0, 0.0)
firstYear_cat.reset_index(inplace=True)
firstYear_cat = firstYear_cat.drop_duplicates(subset=["jurisdiction", "iea_code"]) #"IPCC_cat_code" - has to be sorted based on IEA_CODE because there are two IPCC catefories corresponding to IEA ABFLOW011
firstYear_cat.loc[:, "year"] = firstYear_cat.loc[:, "year"]-1 # to take the year before first year of implementation
firstYear_cat = firstYear_cat.drop("pricing", axis=1)

# same adjustment
firstYear_cat.loc[(firstYear_cat.jurisdiction=="Finland") & (firstYear_cat.year==1989), "year"] = 1990
firstYear_cat.loc[(firstYear_cat.jurisdiction=="Poland") & (firstYear_cat.year==1989), "year"] = 1990

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  firstYear.loc[:, "pricing"] = firstYear.loc[:, "tax"] + firstYear.loc[:, "ets"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  firstYear.loc[:, "pricing"] = np.where(firstYear.loc[:, "pricing"] > 0, 1.0,0.0)


In [None]:
# Note: currently, for this function to run, `priceSeries` must be set to "kFixRate" above
inventoryPath = "/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/processed/"
invName = {"national":"nat", "subnational":"subnat"}
inventory = pd.read_csv(inventoryPath+"/inventory_"+invName[jurGroup]+"_"+gas+".csv")

inventory = inventory[['jurisdiction', 'year', 'ipcc_code', 'iea_code', 'Product', gas]]
inventory = inventory.loc[inventory.year<=2018, :]

for yr in range(2019, 2023):
    temp = inventory.loc[inventory.year==2018, :].copy()
    temp["year"].replace(to_replace={2018:yr}, inplace=True)

    inventory = pd.concat([inventory, temp])

inventory = inventory.loc[~inventory.iea_code.isnull()]

aggSecEm = inventory.groupby(["jurisdiction", "year", 'ipcc_code', 'iea_code']).sum()
aggSecEm.reset_index(inplace=True)

share_df = inventory.merge(aggSecEm[["jurisdiction", "year", "iea_code", gas]], 
                                            on=["jurisdiction", "year", "iea_code"], how='left')
share_df[gas+"_shareAggSec"] = share_df[gas+"_x"]/share_df[gas+"_y"]

share_df.drop(["CO2_y"], axis=1, inplace=True)
share_df.rename(columns={gas+"_x":gas}, inplace=True)



In [None]:

def ecp_constIntroCat(share_df, prices):

    df_concat = pd.DataFrame()

    for jur in share_df.jurisdiction.unique():
        # sectors for which a price is in place at one point in the sample
        sectors = firstYear_cat.loc[(firstYear_cat.jurisdiction==jur), :]["iea_code"].unique()

        for sector in share_df.loc[share_df.jurisdiction==jur, "iea_code"].unique():
            if jur in firstYear_cat.jurisdiction.unique() and sector in sectors:
                weight_year = firstYear_cat.loc[(firstYear_cat.jurisdiction==jur) & (firstYear_cat.iea_code==sector)]["year"].item()
            else:
                weight_year = 2015

            temp_df = share_df[(share_df["jurisdiction"]==jur) & (share_df["year"]==weight_year) & (share_df["iea_code"]==sector)]
            temp_df.drop("year", axis=1, inplace=True)

            merge_keys = ["jurisdiction", "ipcc_code", "iea_code", "Product"]
            temp_df = temp_df.merge(prices, on=merge_keys, how="left")

            temp_df[["ets_price_usd_k", "tax_rate_incl_ex_usd_k"]].fillna(0, inplace=True)

            temp_df["ecp_ets_ew_usd_k"] = (temp_df.ets_price_usd_k)*temp_df.CO2_shareAggSec
            temp_df["ecp_tax_ew_usd_k"] = (temp_df.tax_rate_incl_ex_usd_k)*temp_df.CO2_shareAggSec
            temp_df["ecp_all_ew_usd_k"] = (temp_df.ets_price_usd_k.fillna(0) + temp_df.tax_rate_incl_ex_usd_k.fillna(0))*temp_df.CO2_shareAggSec

            temp_df.drop(["ets_price_usd_k", "tax_rate_incl_ex_usd_k", "all_inst_usd_k"], axis=1, inplace=True)

            temp_df_sum = temp_df.groupby(["jurisdiction", "year", "iea_code"]).sum()
            temp_df_sum.reset_index(inplace=True)

            temp_df_sum = temp_df_sum[["jurisdiction", "year", "iea_code", 
                                        "ecp_ets_ew_usd_k", "ecp_tax_ew_usd_k", "ecp_all_ew_usd_k"]]

            if df_concat.empty == True:
                df_concat = temp_df_sum
            else:
                df_concat = pd.concat([df_concat, temp_df_sum])
        
    return df_concat


In [None]:
ecp_ipcc_intro = ecp_constIntroCat(share_df, cfWprices_usd)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df.drop("year", axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df[["ets_price_usd_k", "tax_rate_incl_ex_usd_k"]].fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df.drop("year", axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tem

In [None]:
ecp_ipcc_intro.to_csv("/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/data/ecp/ecp_ipcc/ecp_intro/ecp_ipcc_CO2_intro.csv", index=None)