In [21]:
import csv
import os
import glob
import pandas as pd
import numpy as np

from pandas import read_csv
from importlib.machinery import SourceFileLoader

path_wcpd = '/Users/gd/GitHub/WorldCarbonPricingDatabase/_dataset/data' 
path_dependencies = '/Users/gd/GitHub/ECP/_code/compilation/dependencies'

ecp_general = SourceFileLoader('general', path_dependencies+'/ecp_v3_gen_func.py').load_module()
ecp_cov_fac = SourceFileLoader('coverage_factors', path_dependencies+'/ecp_v3_coverageFactors.py').load_module()

gases = ["CO2"] #CH4, N2O, F-GASES #CO2 only for now

Loading data

In [22]:
for gas in gases:

    # LOAD WCPD DATAFRAMES

    wcpd_ctry = ecp_general.concatenate(path_wcpd+"/"+gas+"/national")
    wcpd_subnat = ecp_general.concatenate(path_wcpd+"/"+gas+"/subnational")
    wcpd_all = pd.concat([wcpd_ctry, wcpd_subnat]).sort_values(by=["jurisdiction", "year"])

    # ADD COLUMN WITH IEA SECTOR CODES
    ipcc_iea_map = pd.read_csv("/Users/gd/GitHub/ECP/_raw/_aux_files/ipcc2006_iea_category_codes.csv", 
                    usecols=["IPCC_CODE", "IEA_CODE"])
    ipcc_iea_map.columns = ["ipcc_code", "iea_code"]

    wcpd_all = wcpd_all.merge(ipcc_iea_map, on=["ipcc_code"], how="left")

    # LISTS OF JURISDICTION NAMES

    ctry_names = list(wcpd_ctry.jurisdiction.unique())
    subnat_names = list(wcpd_subnat.jurisdiction.unique())

    std_ctry_names = [x.replace(".", "").replace(",", "").replace(" ", "_") for x in ctry_names]
    countries_dic = dict(zip(ctry_names, std_ctry_names))

    std_subnat_names = [x.replace(".", "").replace(",", "").replace(" ", "_") for x in subnat_names]
    subnat_dic = dict(zip(subnat_names, std_subnat_names))

    # ADD COVERAGE FACTORS 

    wcpd_all = ecp_cov_fac.coverageFactors(wcpd_all, gas)

In [25]:
combustion_ipcc = ["1A1A1", "1A1A2", "1A1A3", "1A1B", "1A2A", "1A2B", "1A2C",
                   "1A2D", "1A2E", "1A2F", "1A2G", "1A2H", "1A2I", "1A2J", "1A2K", "1A2L",
                   "1A2M", "1A3A1", "1A3A2", "1A3B", "1A3C", "1A3D1", "1A3D2", "1A3E1",
                   "1A4A", "1A4B", "1A4C", "1A4C1", "1A4C2", "1A4C3", "1A5A", "1A5B", "1A5C"]
# "1A1C" - excluding one of the two IPCC categories associated with IEA flow ABFLOW011

priceSeriesPath = {"cFlxRate":"/currentPrices/FlexXRate", 
                    "cFixRate":"/currentPrices/FixedXRate", 
                    "kFixRate":"/constantPrices/FixedXRate"}

price_cols = {"cFlxRate":["ets_price_usd", "tax_rate_incl_ex_usd"], 
              "cFixRate":["ets_price_usd", "tax_rate_incl_ex_usd"], 
              "kFixRate":["ets_price_usd_k", "tax_rate_incl_ex_usd_k"]}



def cfWeightedPrices(gas, priceSeries):

    global prices_usd, prices_usd_comb, all_inst_col

    # PRICES
    prices_usd = ecp_general.concatenate("/Users/gd/GitHub/ECP/_raw/wcpd_usd/"+gas+priceSeriesPath[priceSeries])

    # currently including the price of the main tax or ets scheme; should be revised to account for all schemes
    prices_usd = prices_usd[["jurisdiction", "year", "ipcc_code", "iea_code", "Product"]+price_cols[priceSeries]]

    prices_usd = prices_usd.merge(wcpd_all[["jurisdiction", "year", "ipcc_code", "iea_code", "Product", "tax_cf", "ets_cf"]], 
                                on=["jurisdiction", "year", "ipcc_code", "iea_code", "Product"])

    # calculate total price by summing across all mechanisms columns
    if priceSeries=="kFixRate":
        prices_usd["tax_rate_incl_ex_usd_k"] = prices_usd.tax_rate_incl_ex_usd_k*prices_usd.tax_cf
        prices_usd["ets_price_usd_k"] = prices_usd.ets_price_usd_k*prices_usd.ets_cf

        price_columns = [x for x in prices_usd.columns if (x.endswith("usd_k"))]
        all_inst_col = "all_inst_usd_k"
        prices_usd[all_inst_col] = prices_usd[price_columns].sum(axis=1)

    else:
        prices_usd["tax_rate_incl_ex_usd"] = prices_usd.tax_rate_incl_ex_usd*prices_usd.tax_cf
        prices_usd["ets_price_usd"] = prices_usd.ets_price_usd*prices_usd.ets_cf

        price_columns = [x for x in prices_usd.columns if (x.endswith("usd"))]
        all_inst_col = "all_inst_usd"
        prices_usd[all_inst_col] = prices_usd[price_columns].sum(axis=1)

    prices_usd.drop(["tax_cf", "ets_cf"], axis=1, inplace=True)

    prices_usd  = prices_usd[["jurisdiction", "year", "ipcc_code", "iea_code", "Product"]+price_cols[priceSeries]+[all_inst_col]].sort_values(by=["jurisdiction", "year"])
    prices_usd_comb = prices_usd[prices_usd["ipcc_code"].isin(combustion_ipcc)]

#prices_usd_comb.fillna("NA", inplace=True)

#for jur in countries_dic.keys():
#    prices_usd.loc[prices_usd.jurisdiction==jur, :].to_csv("/Users/gd/GitHub/ECP/_raw/wcpd_weighted_prices_usd/prices_usd_"+gas+"_"+countries_dic[jur]+".csv", index=None)
#for jur in subnat_dic.keys():
#    prices_usd.loc[prices_usd.jurisdiction==jur, :].to_csv("/Users/gd/GitHub/ECP/_raw/wcpd_weighted_prices_usd/prices_usd_"+gas+"_"+subnat_dic[jur]+".csv", index=None)



Calculating sector-level emissions shares and sector-level prices

In [26]:
df_sec_price = {}

priceSeries = "kFixRate"

for gas in ["CO2"]:#gases:
    cfWeightedPrices(gas, priceSeries)

    # EMISSIONS
    inventory_nat = pd.read_csv("/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/processed/inventory_nat_"+gas+".csv")
    inventory_nat_comb = inventory_nat[['jurisdiction', 'year', 'ipcc_code', 'iea_code', 'Product', gas]].loc[inventory_nat.ipcc_code.isin(combustion_ipcc), :]

    #recalculating ipcc category level totals (to account for rounding errors)
    inventory_nat_comb_sectot = inventory_nat_comb.groupby(by=["jurisdiction", "year", "iea_code"]).sum()
    inventory_nat_comb_sectot.reset_index(inplace=True)
    inventory_nat_comb_sectot.rename(columns={gas:gas+"_sectot"}, inplace=True)

    inventory_nat_comb = inventory_nat_comb.merge(inventory_nat_comb_sectot, on=["jurisdiction", "year", "iea_code"])
    inventory_nat_comb[gas+"_sharesec"] = inventory_nat_comb[gas]/inventory_nat_comb[gas+"_sectot"]

    inventory_nat_comb.loc[:, gas+"_sharesec"] = inventory_nat_comb.loc[:, gas+"_sharesec"].fillna(0)
    inventory_nat_comb = inventory_nat_comb.drop([gas, gas+"_sectot"], axis=1)

    # extending inventory to years beyond the last year
    for yr in range(2019, 2023):
        temp = inventory_nat_comb.loc[inventory_nat_comb.year==2018, :].copy()
        temp["year"].replace(to_replace={2018:yr}, inplace=True)

        inventory_nat_comb = pd.concat([inventory_nat_comb, temp])

    sec_price_comb = inventory_nat_comb.merge(prices_usd_comb, on=["jurisdiction", "year", "ipcc_code", "iea_code", "Product"], how='left')

    # Time-varying weights
    # Combustion categories
    #The summation will not work with NA values
    sec_price_comb[price_cols[priceSeries][0]].fillna(0, inplace=True)
    sec_price_comb[price_cols[priceSeries][1]].fillna(0, inplace=True)

    ecp_cols = {"cFlxRate":["ecp_ets_usd", "ecp_tax_usd", "ecp_all_usd"],
                "cFixRate":["ecp_ets_usd", "ecp_tax_usd", "ecp_all_usd"],
                "kFixRate":["ecp_ets_usd_k", "ecp_tax_usd_k", "ecp_all_usd_k"]}

    sec_price_comb.loc[:, ecp_cols[priceSeries][0]] = (sec_price_comb.loc[:, price_cols[priceSeries][0]])*sec_price_comb.loc[:, gas+"_sharesec"]
    sec_price_comb.loc[:, ecp_cols[priceSeries][1]] = (sec_price_comb.loc[:, price_cols[priceSeries][1]])*sec_price_comb.loc[:, gas+"_sharesec"]
    sec_price_comb.loc[:, ecp_cols[priceSeries][2]] = (sec_price_comb.loc[:, ecp_cols[priceSeries][0]] + sec_price_comb.loc[:, ecp_cols[priceSeries][1]])

    sec_price_comb = sec_price_comb.drop([all_inst_col, price_cols[priceSeries][0], price_cols[priceSeries][1]], axis=1)

    sec_price_sum = sec_price_comb.groupby(["jurisdiction", "year", "ipcc_code"]).sum()
    sec_price_sum.reset_index(inplace=True)

    iea_codes = prices_usd[["jurisdiction", "year", "ipcc_code", "iea_code"]]
    iea_codes.drop_duplicates(inplace=True)

    sec_price_sum = sec_price_sum.merge(iea_codes, on=["jurisdiction", "year", "ipcc_code"], how='left')

    # Non combustion sectors
    df_sec_price[gas] = prices_usd.loc[~prices_usd.ipcc_code.isin(combustion_ipcc)]
    df_sec_price[gas] = df_sec_price[gas].loc[df_sec_price[gas].jurisdiction.isin(inventory_nat_comb.jurisdiction.unique())]
    df_sec_price[gas].drop(["Product"], axis=1, inplace=True)
    df_sec_price[gas].drop_duplicates(["jurisdiction", "year", "ipcc_code"], inplace=True)

    df_sec_price[gas][price_cols[priceSeries][0]].fillna(0, inplace=True)
    df_sec_price[gas][price_cols[priceSeries][1]].fillna(0, inplace=True)

    df_sec_price_cols = {"cFlxRate":['ets_price_usd', 'tax_rate_incl_ex_usd'],
                         "cFixRate":['ets_price_usd', 'tax_rate_incl_ex_usd'],
                         "kFixRate":['ets_price_usd_k', 'tax_rate_incl_ex_usd_k']}

    df_sec_price[gas].rename(columns={df_sec_price_cols[priceSeries][0]:ecp_cols[priceSeries][0], df_sec_price_cols[priceSeries][1]:ecp_cols[priceSeries][1]},
                        inplace=True)
    df_sec_price[gas].loc[:, ecp_cols[priceSeries][2]] = df_sec_price[gas].loc[:, ecp_cols[priceSeries][0]] + df_sec_price[gas].loc[:, ecp_cols[priceSeries][1]]

    df_sec_price[gas] = pd.concat([df_sec_price[gas], sec_price_sum])
    df_sec_price[gas].sort_values(by=["jurisdiction", "year", "ipcc_code"], inplace=True)

    df_sec_price[gas] = df_sec_price[gas].drop([all_inst_col], axis=1)
    df_sec_price[gas] = df_sec_price[gas].fillna(value="NA")

    df_sec_price[gas].to_csv("/Users/gd/GitHub/ECP/_dataset/price/ecp_sectors/"+priceSeriesPath[priceSeries]+"/ecp_sector_"+gas+".csv", index=None)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iea_codes.drop_duplicates(inplace=True)


Constant, jurisdiction-specific, weights

In [27]:

# sector-level, recording year prior to first year of pricing mechanism implementation

firstYear = wcpd_all[['jurisdiction', 'year', 'ipcc_code', 'iea_code', 'Product', 'tax', 'ets']]

firstYear.loc[:, "pricing"] = firstYear.loc[:, "tax"] + firstYear.loc[:, "ets"]
firstYear.loc[:, "pricing"] = np.where(firstYear.loc[:, "pricing"] > 0, 1.0,0.0)
firstYear = firstYear.drop(["tax", "ets"], axis=1)
firstYear = firstYear.loc[firstYear.pricing == 1,]
firstYear.sort_values(by=["jurisdiction", "year", "ipcc_code", "Product"], ascending=True, inplace=True)

firstYear.drop_duplicates(subset=["jurisdiction", "ipcc_code", "Product"], inplace=True)

firstYear_sector = firstYear.groupby(["jurisdiction", "year", "ipcc_code", "iea_code"]).sum()
firstYear_sector.loc[:, "pricing"] = np.where(firstYear_sector.loc[:, "pricing"] > 0, 1.0, 0.0)
firstYear_sector.reset_index(inplace=True)
firstYear_sector = firstYear_sector.drop_duplicates(subset=["jurisdiction", "iea_code"]) #"IPCC_cat_code" - has to be sorted based on IEA_CODE because there are two IPCC catefories corresponding to IEA ABFLOW011
firstYear_sector.loc[:, "year"] = firstYear_sector.loc[:, "year"]-1 # to take the year before first year of implementation
firstYear_sector = firstYear_sector.drop("pricing", axis=1)

# same adjustment
firstYear_sector.loc[(firstYear_sector.jurisdiction=="Finland") & (firstYear_sector.year==1989), "year"] = 1990
firstYear_sector.loc[(firstYear_sector.jurisdiction=="Poland") & (firstYear_sector.year==1989), "year"] = 1990

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  firstYear.loc[:, "pricing"] = firstYear.loc[:, "tax"] + firstYear.loc[:, "ets"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  firstYear.loc[:, "pricing"] = np.where(firstYear.loc[:, "pricing"] > 0, 1.0,0.0)


In [12]:
# Note: currently, for this function to run, `priceSeries` must be set to "kFixRate" above

def ecp_const_intro_sec(share_df, prices):
    
    df_concat = pd.DataFrame()

    for jur in share_df.jurisdiction.unique():
        sectors = firstYear_sector.loc[(firstYear_sector.jurisdiction==jur), :]["iea_code"].unique()
        
        for sector in share_df.loc[share_df.jurisdiction==jur, "iea_code"].unique():
            if jur in firstYear_sector.jurisdiction.unique() and sector in sectors:
                weight_year = firstYear_sector.loc[(firstYear_sector.jurisdiction==jur) & (firstYear_sector.iea_code==sector)]["year"].item()
            else:
                weight_year = 2015
    
            temp_df = share_df[(share_df["jurisdiction"]==jur) & (share_df["year"]==weight_year) & (share_df["iea_code"]==sector)]
            temp_df.drop("year", axis=1, inplace=True)

            merge_keys = ["jurisdiction", "ipcc_code", "iea_code", "Product"]
            temp_df = temp_df.merge(prices, on=merge_keys, how="left")

            temp_df["ecp_ets_ew_usd_k"] = (temp_df.ets_price_usd_k)*temp_df.CO2_sharesec
            temp_df["ecp_tax_ew_usd_k"] = (temp_df.tax_rate_incl_ex_usd_k)*temp_df.CO2_sharesec
            temp_df["ecp_all_ew_usd_k"] = (temp_df.ets_price_usd_k + temp_df.tax_rate_incl_ex_usd_k)*temp_df.CO2_sharesec

            temp_df.drop(["ets_price_usd_k", "tax_rate_incl_ex_usd_k", "all_inst_usd_k"], axis=1, inplace=True)

            temp_df_sum = temp_df.groupby(["jurisdiction", "year", "iea_code"]).sum()
            temp_df_sum.reset_index(inplace=True)

            temp_df_sum = temp_df_sum[["jurisdiction", "year", "iea_code", "ecp_ets_ew_usd_k", "ecp_tax_ew_usd_k", 
                                       "ecp_all_ew_usd_k"]]

            if df_concat.empty == True:
                df_concat = temp_df_sum
            else:
                df_concat = pd.concat([df_concat, temp_df_sum])
        
    return df_concat



In [13]:
ecp_sector_intro = ecp_const_intro_sec(inventory_nat_comb, wcpd_all, prices_usd_comb)
ecp_sector_intro.to_csv("/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/data/ecp/ecp_sectors/ecp_intro/ecp_sector_CO2_intro.csv", index=None)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df.drop("year", axis=1, inplace=True)


AttributeError: 'DataFrame' object has no attribute 'ecp_ets_usd_k'