# Emissions-weighted carbon price

# Packages and libraries

In [1]:
path_dependencies = '/Users/gd/GitHub/ECP/_code/compilation/dependencies'
exec(open(path_dependencies+'/pkgs_and_directories.py').read())
exec(open("/Users/gd/GitHub/WorldCarbonPricingDatabase/_code/_compilation/_dependencies/jurisdictions.py").read())

subnat_lists = {"United States":subnat_usa, "Canada":subnat_can, "China":subnat_chn}
all_subnat_list = subnat_usa + subnat_can + subnat_chn

gases = ["CO2"] # "CH4", "N2O", "F-GASES" 


# Institutional design (World Carbon Pricing Database)

In [2]:
wcpd = {}

for gas in gases: 

    # LOAD WCPD DATAFRAMES

    wcpd_ctry = ecp_general.concatenate(path_wcpd+"/"+gas+"/national")
    wcpd_subnat = ecp_general.concatenate(path_wcpd+"/"+gas+"/subnational")
    wcpd_all = pd.concat([wcpd_ctry, wcpd_subnat]).sort_values(by=["jurisdiction", "year"])

    wcpd_all = wcpd_all.drop_duplicates(['jurisdiction', 'year', 'ipcc_code', 'Product']) # duplicates from WCPD need to be corrected

    # ADD COLUMN WITH IEA SECTOR CODES
    ipcc_iea_map = pd.read_csv("/Users/gd/GitHub/ECP/_raw/_aux_files/ipcc2006_iea_category_codes.csv", 
                    usecols=["IPCC_CODE", "IEA_CODE"])
    ipcc_iea_map.columns = ["ipcc_code", "iea_code"]

    wcpd_all = wcpd_all.merge(ipcc_iea_map, on=["ipcc_code"], how="left")

    # LISTS OF JURISDICTION NAMES

    ctry_names = list(wcpd_ctry.jurisdiction.unique())
    subnat_names = list(wcpd_subnat.jurisdiction.unique())

    std_ctry_names = [x.replace(".", "").replace(",", "").replace(" ", "_") for x in ctry_names]
    countries_dic = dict(zip(ctry_names, std_ctry_names))

    std_subnat_names = [x.replace(".", "").replace(",", "").replace(" ", "_") for x in subnat_names]
    subnat_dic = dict(zip(subnat_names, std_subnat_names))

    if len(wcpd_all[wcpd_all.duplicated(['jurisdiction', 'year', 'ipcc_code', 'Product'], keep=False)] != 0):
        print("The dataset contains duplicates!")

    # ADD COVERAGE FACTORS 

    wcpd_all = ecp_cov_fac.coverageFactors(wcpd_all, gas)

    # MECHANISM OVERLAP 
    overlap = pd.read_csv("/Users/gd/GitHub/WorldCarbonPricingDatabase/_raw/overlap/overlap_mechanisms_"+gas+".csv")

    wcpd_all = ecp_overlap.overlap(wcpd_all, overlap)

    wcpd[gas] = wcpd_all


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  inst_df_ids.loc[:, "overlap_"+i[0]+"_"+i[1]] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  inst_df_ids.loc[:, "overlap_"+i[0]+"_"+i[1]+"_ids"] = inst_df_ids.loc[:, scheme_columns[i[0]]] + inst_df_ids.loc[:, scheme_columns[i[1]]]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  inst_df_ids.loc[:,

# Emissions
## I. National jurisdictions 
### I.A Total GHG emissions (EDGAR)

In [3]:
# Global Warming Potential values
ipcc_gwp = pd.read_csv("/Users/gd/GitHub/ECP/_raw/ghg_inventory/gwp_list.csv")
ipcc_gwp_list = dict(zip(ipcc_gwp.edgar_label, ipcc_gwp.ar5_gwp_100y))

# DATAFRAME WITH TOTAL "+gas+" AND OTHER GHG EMISSIONS

edgar_path = "/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/raw/national/EDGAR"

# list of file names containting CO2, CH4, N20 data
file_names = os.listdir("/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/raw/national/EDGAR")
for file_name in ["v60_GHG_fgases_1990_2018", ".DS_Store", "v60_CO2_org_short-cycle_C_1970_2018.xls"]:
    file_names.remove(file_name)

# list of file names containing F-gases data
file_names_fgases = os.listdir("/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/raw/national/EDGAR/v60_GHG_fgases_1990_2018")

# dictionary associating gas names with corresponding file name
gases_dic = {""+gas+"":file_names[0], "N2O":file_names[1], "CH4":file_names[2]}

# concordance between EDGAR and World Bank country names
edgar_wb_map = pd.read_csv("/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/aux_files/edgar_wb_ctry_name_map.csv")
edgar_wb_map = edgar_wb_map.loc[~edgar_wb_map.ctry_name_wb.isnull()]

edgar_wb_map = dict(zip(list(edgar_wb_map['ctry_name_edgar'].values), list(edgar_wb_map['ctry_name_wb'].values)))

# Creation of dataframe with gases figures
df_gases_tot = pd.DataFrame()
df_fgases_tot = pd.DataFrame()

for gas in gases_dic.keys():
    df_gas = pd.read_excel(path_ghg+"/national/EDGAR/"+gases_dic[gas], skiprows=9,
                           sheet_name="TOTALS BY COUNTRY")

    df_gas = df_gas.loc[df_gas.fossil_bio=="fossil"] # keep only fossil emissions - is this assumption correct?

    df_gas.drop(["IPCC_annex", "C_group_IM24_sh", "Country_code_A3", "fossil_bio"], axis=1, inplace=True)
    df_gas.rename(columns={"Name":"jurisdiction"}, inplace=True)

    df_gas = df_gas.melt(id_vars=["jurisdiction"])

    df_gas.rename(columns={"variable":"year", "value":gas}, inplace=True)

    df_gas["year"] = df_gas["year"].apply(lambda x: x[2:])
    df_gas["year"] = df_gas["year"].astype(int)

    df_gas['jurisdiction'].replace(to_replace=edgar_wb_map, inplace=True)

    # convert to CO2 equivalent
    df_gas[gas] =  df_gas[gas]*ipcc_gwp_list[gas]

    if df_gases_tot.empty == True:
        df_gases_tot = df_gas
    else:
        df_gases_tot = df_gases_tot.merge(df_gas, on=["jurisdiction", "year"], how="outer")

# Aggregate files for f-gases
for i in range(0,len(file_names_fgases)):
    df_fgas = pd.read_excel(path_ghg+"/national/EDGAR/v60_GHG_fgases_1990_2018/"+file_names_fgases[i], skiprows=9,
                           sheet_name="TOTALS BY COUNTRY")
    df_fgas = df_fgas.loc[df_fgas.fossil_bio=="fossil"] # keep only fossil emissions

    df_fgas.drop(["IPCC_annex", "C_group_IM24_sh", "Country_code_A3", "fossil_bio"], axis=1, inplace=True)
    df_fgas.rename(columns={"Name":"jurisdiction"}, inplace=True)

    df_fgas = df_fgas.melt(id_vars=["jurisdiction"])

    df_fgas.rename(columns={"variable":"year", "value":file_names_fgases[i][:-15]}, inplace=True)

    df_fgas["year"] = df_fgas["year"].apply(lambda x: x[2:])
    df_fgas["year"] = df_fgas["year"].astype(int)

    df_fgas['jurisdiction'].replace(to_replace=edgar_wb_map, inplace=True)

    # convert to CO2 equivalent
    df_fgas[file_names_fgases[i][:-15]] =  df_fgas[file_names_fgases[i][:-15]]*ipcc_gwp_list[file_names_fgases[i][:-15]]

    if df_fgases_tot.empty == True:
        df_fgases_tot = df_fgas
    else:
        df_fgases_tot = df_fgases_tot.merge(df_fgas, on=["jurisdiction", "year"], how="outer")

# Sum of all f-gases
df_fgases_tot.fillna(0, inplace=True)
df_fgases_tot["F-GASES"] = df_fgases_tot.drop(["jurisdiction", "year"], axis=1).sum(axis=1)
df_fgases_tot = df_fgases_tot[["jurisdiction", "year", "F-GASES"]] # keep only aggregate F-GASES value and merge keys

df_gases_tot = df_gases_tot.merge(df_fgases_tot, on=["jurisdiction", "year"], how="outer")

df_gases_tot["all_GHG"] = 0

for gas in gases_dic.keys():
    df_gases_tot["all_GHG"] = df_gases_tot["all_GHG"]+df_gases_tot[gas]

df_gases_tot.to_csv('/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/processed/ghg_national_total.csv',index=None)

In [4]:
# CREATE DATAFRAME WITH TOTAL EMISSIONS WORLD

df_gases_tot_world = df_gases_tot.groupby(by=["year"]).sum()
df_gases_tot_world.reset_index(inplace=True)
df_gases_tot_world.to_csv('/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/processed/ghg_world_total.csv',index=None)

### I.B National GHG Inventory (kt and % of totals)

In [5]:
inventory_nat_co2 = ecp_inv_nat.inventory_co2(wcpd_all, ipcc_iea_map, ctry_names, edgar_wb_map)
inventory_nat_ch4 = ecp_inv_nat.inventory_non_co2(wcpd_all, ctry_names, "CH4", edgar_wb_map, ipcc_gwp_list)
inventory_nat_n2o = ecp_inv_nat.inventory_non_co2(wcpd_all, ctry_names, "N2O", edgar_wb_map, ipcc_gwp_list)
inventory_nat_fgases = ecp_inv_nat.inventory_non_co2(wcpd_all, ctry_names, "F-GASES", edgar_wb_map, ipcc_gwp_list)

inventories = {"CO2":inventory_nat_co2, "CH4":inventory_nat_ch4, "N2O":inventory_nat_n2o, "F-GASES":inventory_nat_fgases}
inventories_wldSect = {}

for gas in gases:
    inventory_share = ecp_inv_share.emissions_share(inventories[gas], df_gases_tot, df_gases_tot_world, gas)

    if gas == "CO2":
        merge_keys = ["jurisdiction", "year", "ipcc_code", "iea_code", "Product"]
        columns = ["jurisdiction", "year", "ipcc_code", "iea_code", "Product", gas]
    else:
        merge_keys = ["jurisdiction", "year", "ipcc_code"]
        columns = ["jurisdiction", "year", "ipcc_code", gas]

    inventories[gas] = pd.merge(inventories[gas], inventory_share, on=merge_keys, how="left")

    inventories[gas].to_csv("/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/processed/inventory_nat_"+gas+".csv", index=None)

    # Shares of total world sector emissions
    sectors_wld_total = inventories[gas][columns].groupby(["ipcc_code", "year"]).sum()
    sectors_wld_total.reset_index(inplace=True)

    inventories_wldSect[gas] = ecp_inv_share.emissions_share_sectors(inventories[gas], sectors_wld_total, gas, "national")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ippu_fug_nat["jurisdiction"] = ippu_fug_nat["jurisdiction"].replace(to_replace=edgar_wb_map)


In [6]:
for gas in gases:
    for ctry in countries_dic.keys():
        inventories[gas].loc[inventories[gas].jurisdiction==ctry, :].to_csv("/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/processed/national/"+gas+"/inventory_"+gas+"_"+countries_dic[ctry]+".csv", index=None)


## II. Subnational jurisdictions
### II.A Total GHG emissions

In [7]:
df_gases_tot_subnat = ecp_inv_subnat.subnat_total()
df_gases_tot_subnat.to_csv('/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/processed/ghg_subnat_total.csv',index=None)

### II.B Subnational inventory (kt and % totals)

In [8]:
inventories_subnat = {}
inventories_subnat_wldSect = {}

for gas in gases:

    inventory_subnat = ecp_inv_subnat.inventory_subnat(wcpd[gas], subnat_names, ipcc_iea_map, gas)

    inventory_subnat_share = ecp_inv_share.emissions_share(inventory_subnat, 
                                                           df_gases_tot_subnat, df_gases_tot_world, gas, df_gases_tot, "subnational")

    merge_keys = ["supra_jur", "jurisdiction", "year", "ipcc_code", "iea_code"]
    columns = ["supra_jur", "jurisdiction", "year", "ipcc_code", gas]

    inventories_subnat[gas] = pd.merge(inventory_subnat, inventory_subnat_share, on=merge_keys, how="left")
    inventories_subnat[gas].to_csv("/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/processed/inventory_subnat_"+gas+".csv", index=None)

    # Shares of total world sector emissions
    sectors_wld_total = inventories[gas][["jurisdiction", "year", "ipcc_code", "iea_code", gas]].groupby(["ipcc_code", "year"]).sum()
    sectors_wld_total.reset_index(inplace=True)

    inventories_subnat_wldSect[gas] = ecp_inv_share.emissions_share_sectors(inventories_subnat[gas], sectors_wld_total, gas, "subnational")


In [9]:
for gas in gases:
    for jur in subnat_dic.keys():
        inventories_subnat[gas].loc[inventories_subnat[gas].jurisdiction==jur, :].to_csv("/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/processed/subnational/"+gas+"/inventory_"+gas+"_"+subnat_dic[jur]+".csv", index=None)


# Coverage 
## I. Disaggregated coverage dataframes

** Note: National and subnational inventories do not have the same level of disaggregation **

In [10]:
coverage_nat = {}
coverage_subnat = {}
coverage = {}
coverage_sect = {}
coverage_nat_sect = {}
coverage_subnat_sect = {}

# SHARE OF JURISDICTIONS TOTAL EMISSIONS
for gas in gases:

    coverage_nat[gas] = ecp_coverage.coverage(inventories[gas], 2018, 2022, wcpd[gas], gas,
                                        False, "national")
    coverage_subnat[gas] = ecp_coverage.coverage(inventories_subnat[gas], 2018, 2022, wcpd[gas], gas,
                                            False, "subnational")

    coverage_all = pd.concat([coverage_nat[gas], coverage_subnat[gas]])
    coverage_all = coverage_all.loc[coverage_all["jurisdiction"]!="World", :]

    # Coverage figures should be calculated only based on aggregation of the most disaggregated flows, not their higher-level aggregation. 
    # Otherwise this might result in double counting. Hence aggregate sectors should be dropped from coverage dataframe.
    # It also currently excludes coverage of international aviation ('ABFLOW039') and marine ('ABFLOW040') bunkers 
    # as they are currently excluded from national total emissions.
    # Drop combustion sectors that are aggregation of lower level sectors and concatenate all coverage dataframes into a single one*

    flow_excl = ['1A', '1A1A', '1A1C', '1A2', '1A3'] #'1A1C' is exluded here as ABFLOW011 emissions are attributed twice (to both 1A1B and 1A1C)
    coverage_all = coverage_all.loc[~coverage_all.ipcc_code.isin(flow_excl), :]

    coverage[gas] = coverage_all


    # SHARE OF SECTORS' GLOBAL TOTAL EMISSIONS

    coverage_nat_sect[gas] = ecp_coverage.coverage(inventories_wldSect[gas], 2018, 2022, wcpd[gas], gas,
                                True, "national")
    coverage_subnat_sect[gas] = ecp_coverage.coverage(inventories_subnat_wldSect[gas], 2018, 2022, wcpd[gas], gas,
                                    True, "subnational")

    coverage_sect_all = pd.concat([coverage_nat_sect[gas], coverage_subnat_sect[gas]])
    coverage_sect_all = coverage_sect_all.loc[coverage_sect_all["jurisdiction"]!="World", :]

    coverage_sect[gas] = coverage_sect_all

## II. Aggregate coverage

- "The sum over all pricing mechanisms" of [emissions_share x coverage_factor] minus the overlapping coverage

We account for the fact that more than one tax scheme or ets scheme can apply to the same emissions. However, covered emissions should be counted only once when covered by one or more scheme. To calculate overlapping coverage at the sector-fuel level, we use the `overlap_` variable in `wcpd_all` dataframe created above.

### II.1 jurisdictions

In [11]:
agg_cov = {}

for gas in gases:
    # Create dataframe to contain aggregate coverage
    coverage_agg = coverage_all[["jurisdiction", "year", "ipcc_code", "iea_code", "Product"]]

    # TAXES

    cov_tax_columns_jurGHG = [x for x in coverage_all.columns if "cov_tax" in x and "jurGHG" in x]
    cov_tax_columns_jurGas = [x for x in coverage_all.columns if "cov_tax" in x and "jur"+gas in x]
    cov_tax_columns_wldGHG = [x for x in coverage_all.columns if "cov_tax" in x and "wldGHG" in x]
    cov_tax_columns_wldGas = [x for x in coverage_all.columns if "cov_tax" in x and "wld"+gas in x]
    cov_tax_columns_supraGHG = [x for x in coverage_all.columns if "cov_tax" in x and "supraGHG" in x]
    cov_tax_columns_supraGas = [x for x in coverage_all.columns if "cov_tax" in x and "supra"+gas in x]

    tax_columns = {"cov_tax_"+gas+"_jurGHG":cov_tax_columns_jurGHG, "cov_tax_"+gas+"_jur"+gas:cov_tax_columns_jurGas, 
                "cov_tax_"+gas+"_wldGHG":cov_tax_columns_wldGHG, "cov_tax_"+gas+"_wld"+gas:cov_tax_columns_wldGas, 
                "cov_tax_"+gas+"_supraGHG":cov_tax_columns_supraGHG, "cov_tax_"+gas+"_supra"+gas:cov_tax_columns_supraGas}

    # ETS

    cov_ets_columns_jurGHG = [x for x in coverage_all.columns if "cov_ets" in x and "jurGHG" in x]
    cov_ets_columns_jurGas = [x for x in coverage_all.columns if "cov_ets" in x and "jur"+gas in x]
    cov_ets_columns_wldGHG = [x for x in coverage_all.columns if "cov_ets" in x and "wldGHG" in x]
    cov_ets_columns_wldGas = [x for x in coverage_all.columns if "cov_ets" in x and "wld"+gas in x]
    cov_ets_columns_supraGHG = [x for x in coverage_all.columns if "cov_ets" in x and "supraGHG" in x]
    cov_ets_columns_supraGas = [x for x in coverage_all.columns if "cov_ets" in x and "supra"+gas in x]

    ets_columns = {"cov_ets_"+gas+"_jurGHG":cov_ets_columns_jurGHG, "cov_ets_"+gas+"_jur"+gas:cov_ets_columns_jurGas, 
                "cov_ets_"+gas+"_wldGHG": cov_ets_columns_wldGHG, "cov_ets_"+gas+"_wld"+gas:cov_ets_columns_wldGas, 
                "cov_ets_"+gas+"_supraGHG":cov_ets_columns_supraGHG, "cov_ets_"+gas+"_supra"+gas:cov_ets_columns_supraGas}

    # ALL INSTRUMENTS

    cov_all_columns_jurGHG = [x for x in coverage_all.columns if "cov_" in x and "jurGHG" in x and "overlap" not in x]
    cov_all_columns_jurGas = [x for x in coverage_all.columns if "cov_" in x and "jur"+gas in x and "overlap" not in x]
    cov_all_columns_wldGHG = [x for x in coverage_all.columns if "cov_" in x and "wldGHG" in x and "overlap" not in x]
    cov_all_columns_wldGas = [x for x in coverage_all.columns if "cov_" in x and "wld"+gas in x and "overlap" not in x]
    cov_all_columns_supraGHG = [x for x in coverage_all.columns if "cov_" in x and "supraGHG" in x and "overlap" not in x]
    cov_all_columns_supraGas = [x for x in coverage_all.columns if "cov_" in x and "supra"+gas in x and "overlap" not in x]

    all_columns = {"cov_all_"+gas+"_jurGHG":cov_all_columns_jurGHG, "cov_all_"+gas+"_jur"+gas:cov_all_columns_jurGas, 
                "cov_all_"+gas+"_wldGHG":cov_all_columns_wldGHG, "cov_all_"+gas+"_wld"+gas:cov_all_columns_wldGas, 
                "cov_all_"+gas+"_supraGHG":cov_all_columns_supraGHG, "cov_all_"+gas+"_supra"+gas:cov_all_columns_supraGas}


    all_overlap_dic = {"cov_all_"+gas+"_jurGHG":"cov_overlap_"+gas+"_jurGHG", "cov_all_"+gas+"_jur"+gas:"cov_overlap_"+gas+"_jur"+gas, 
                    "cov_all_"+gas+"_wldGHG":"cov_overlap_"+gas+"_wldGHG", "cov_all_"+gas+"_wld"+gas:"cov_overlap_"+gas+"_wld"+gas, 
                    "cov_all_"+gas+"_supraGHG":"cov_overlap_"+gas+"_supraGHG", "cov_all_"+gas+"_supra"+gas:"cov_overlap_"+gas+"_supra"+gas}

    # Calculation of coverage

    # An adjustment to the coverage function needs to be made. The function's output needs to include i) overlap across taxes, ii) overlap across ets, 
    # iii) overlap across all instruments

    # A. Sum across all instruments (columns)

    for dic in [tax_columns, ets_columns]: # [all_columns]
        for key in dic.keys():
            # sum across all instrument columns and substract overlaping coverage
            coverage_agg[key] = coverage_all[dic[key]].sum(axis=1) # - coverage_all[all_overlap_dic[key]]

    for dic in [all_columns]:
        for key in dic.keys():
            coverage_agg[key] = coverage_all[dic[key]].sum(axis=1) - coverage_all[all_overlap_dic[key]]

    # B. Sum across all emission categories (rows)
    coverage_agg = coverage_agg.groupby(['jurisdiction','year']).sum()
    coverage_agg.reset_index(inplace=True)

    agg_cov[gas] = coverage_agg

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  coverage_agg[key] = coverage_all[dic[key]].sum(axis=1) # - coverage_all[all_overlap_dic[key]]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  coverage_agg[key] = coverage_all[dic[key]].sum(axis=1) # - coverage_all[all_overlap_dic[key]]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  coverage_agg[key]

In [12]:
# WORLD TOTAL COVERAGE 

for gas in gases:
    cov_world_agg = agg_cov[gas][["jurisdiction","year", "cov_tax_"+gas+"_wld"+gas, "cov_ets_"+gas+"_wld"+gas, 
                                        "cov_tax_"+gas+"_wldGHG", "cov_ets_"+gas+"_wldGHG"]]

    cov_world_agg.reset_index(inplace=True)
    cov_world_agg = cov_world_agg.groupby(['year']).sum()

    cov_world_agg["cov_all_"+gas+"_jurGHG"] = cov_world_agg["cov_tax_"+gas+"_wldGHG"] + cov_world_agg["cov_ets_"+gas+"_wldGHG"]
    cov_world_agg["cov_all_"+gas+"_jur"+gas] = cov_world_agg["cov_tax_"+gas+"_wld"+gas] + cov_world_agg["cov_ets_"+gas+"_wld"+gas]
    
    cov_world_agg["cov_all_"+gas+"_wldGHG"] = cov_world_agg["cov_all_"+gas+"_jurGHG"]
    cov_world_agg["cov_all_"+gas+"_wld"+gas] = cov_world_agg["cov_all_"+gas+"_jur"+gas]

    # addind values in 'jur' columns for the "World" jurisdiction
    cov_world_agg["cov_tax_"+gas+"_jurGHG"] = cov_world_agg["cov_tax_"+gas+"_wldGHG"]
    cov_world_agg["cov_ets_"+gas+"_jurGHG"] = cov_world_agg["cov_ets_"+gas+"_wldGHG"]
    cov_world_agg["cov_tax_"+gas+"_jur"+gas] = cov_world_agg["cov_tax_"+gas+"_wld"+gas]
    cov_world_agg["cov_ets_"+gas+"_jur"+gas] = cov_world_agg["cov_ets_"+gas+"_wld"+gas]

#    cov_world_agg["cov_tax_"+gas+"_wldGHG"] = "NA"
#    cov_world_agg["cov_ets_"+gas+"_wldGHG"] = "NA"
#    cov_world_agg["cov_tax_"+gas+"_wld"+gas] = "NA"
#    cov_world_agg["cov_ets_"+gas+"_wld"+gas] = "NA"

    cov_world_agg["jurisdiction"] = "World"

    cov_world_agg.drop("index", axis=1, inplace=True)
    cov_world_agg.reset_index(inplace=True)

    agg_cov[gas] = pd.concat([agg_cov[gas], cov_world_agg])


In [13]:
# National-level coverage from subnational schemes

for gas in gases:

      for subnat_list in subnat_lists.keys():
            temp = agg_cov[gas].loc[agg_cov[gas].jurisdiction.isin(subnat_lists[subnat_list]), :]
            temp = temp.groupby(["year"]).sum()
            temp.reset_index(inplace=True)
            temp["jurisdiction"] = subnat_list+"_sub" # indicating it is the country-level coverage from subnational mechanisms

            temp[["cov_tax_"+gas+"_jurGHG", "cov_tax_"+gas+"_jur"+gas, "cov_ets_"+gas+"_jurGHG", "cov_ets_"+gas+"_jur"+gas,
                  "cov_all_"+gas+"_jurGHG", "cov_all_"+gas+"_jur"+gas]] = np.nan
            
            swap_list = {"cov_tax_"+gas+"_jurGHG":"cov_tax_"+gas+"_supraGHG", "cov_tax_"+gas+"_jur"+gas:"cov_tax_"+gas+"_supra"+gas, "cov_ets_"+gas+"_jurGHG":"cov_ets_"+gas+"_supraGHG", 
                        "cov_ets_"+gas+"_jur"+gas:"cov_ets_"+gas+"_supra"+gas, "cov_all_"+gas+"_jurGHG":"cov_all_"+gas+"_supraGHG", "cov_all_"+gas+"_jur"+gas:"cov_all_"+gas+"_supra"+gas,
                        "cov_tax_"+gas+"_supraGHG":"cov_tax_"+gas+"_jurGHG", "cov_tax_"+gas+"_supra"+gas:"cov_tax_"+gas+"_jur"+gas, "cov_ets_"+gas+"_supraGHG":"cov_ets_"+gas+"_jurGHG", 
                        "cov_ets_"+gas+"_supra"+gas:"cov_ets_"+gas+"_jur"+gas, "cov_all_"+gas+"_supraGHG":"cov_all_"+gas+"_jurGHG", "cov_all_"+gas+"_supra"+gas:"cov_all_"+gas+"_jur"+gas}
            
            temp.rename(columns=swap_list, inplace=True)
            
            temp_nat = agg_cov[gas].loc[agg_cov[gas].jurisdiction == subnat_list, :]

            temp_nat_subnat = pd.concat([temp_nat, temp])
            temp_nat_subnat = temp_nat_subnat.groupby(["year"]).sum() # summing country-level coverage from country-level and subnational mechanisms
            temp_nat_subnat.reset_index(inplace=True)

            temp_nat_subnat["jurisdiction"] = subnat_list

            agg_cov[gas] = agg_cov[gas].loc[agg_cov[gas].jurisdiction != subnat_list, :]
            
            agg_cov[gas] = pd.concat([agg_cov[gas], temp_nat_subnat])
    

In [14]:
# NA values for all entries of 'supra' columns of national jurisdictions

all_subnat_list = subnat_usa + subnat_can + subnat_chn

for gas in gases:

    supra_cols = ["cov_tax_"+gas+"_supraGHG", "cov_tax_"+gas+"_supra"+gas, "cov_ets_"+gas+"_supraGHG", 
                "cov_ets_"+gas+"_supra"+gas, "cov_all_"+gas+"_supraGHG", "cov_all_"+gas+"_supra"+gas]

    agg_cov[gas].loc[~agg_cov[gas].jurisdiction.isin(all_subnat_list), supra_cols] = np.nan

    coverage_agg_OUT = agg_cov[gas].fillna("NA")
    coverage_agg_OUT.sort_values(by=["jurisdiction", "year"]).to_csv(path_aux_data+"/data/coverage/tot_coverage_jurisdiction_"+gas+".csv", index=None)

### II.2 World sectors

In [15]:
coverage_WldSect = {}

for gas in gases:
    
    coverage_sect[gas]

    cov_tax_columns_WldSectGas = [x for x in coverage_sect[gas].columns if "cov_tax" in x and "wld_sect" in x]
    cov_ets_columns_WldSectGas = [x for x in coverage_sect[gas].columns if "cov_ets" in x and "wld_sect" in x]
    cov_all_columns_WldSectGas = [x for x in coverage_sect[gas].columns if "cov_" in x and "wld_sect" in x]

    tax_columns = {"cov_tax_"+gas+"_WldSect"+gas:cov_tax_columns_WldSectGas}
    ets_columns = {"cov_ets_"+gas+"_WldSect"+gas:cov_ets_columns_WldSectGas}
    all_columns = {"cov_all_"+gas+"_WldSect"+gas:cov_all_columns_WldSectGas}

    coverage_sect_agg_schemes = coverage_sect[gas][["jurisdiction", "year", "ipcc_code", "iea_code", "Product"]]

    for dic in [tax_columns, ets_columns, all_columns]:
        for key in dic.keys():
            coverage_sect_agg_schemes[key] = coverage_sect[gas][dic[key]].sum(axis=1)

    coverage_WldSect[gas] = coverage_sect_agg_schemes.groupby(['ipcc_code','year']).sum()
    coverage_WldSect[gas].reset_index(inplace=True)

    coverage_WldSect[gas].to_csv(path_aux_data+"/data/coverage/tot_coverage_world_sectors_"+gas+".csv", index=None)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  coverage_sect_agg_schemes[key] = coverage_sect[gas][dic[key]].sum(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  coverage_sect_agg_schemes[key] = coverage_sect[gas][dic[key]].sum(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  coverage_sect_agg_schemes[key] = coverage_sect[gas][dic[k

# Emissions-weighted Carbon Price (ECP)
Combines: (i) (total) coverage of ETS and associated price, (ii) user-fuel coverage of taxes and associated tax rates


In [17]:
prices_usd = {}

for gas in gases:
    # simply execute function to create cFlxRate series
    ecp_cur_conv.cur_conv(wcpd[gas], gas, subnat_can, subnat_usa, subnat_chn, False, None)

    wcpd_usd = ecp_cur_conv.cur_conv(wcpd[gas], gas, subnat_can, subnat_usa, subnat_chn, True, 2021)

    #Bring together calculated emissions share at sector and sector-fuel level and carbon prices in constant USD

    id_columns = [x for x in wcpd_usd.columns if bool(re.match(re.compile("ets.+id"), x))==True or bool(re.match(re.compile("tax.+id"), x))==True]
    price_columns = [x for x in wcpd_usd.columns if bool(re.match(re.compile("ets.+price_usd_k"), x))==True or bool(re.match(re.compile("tax.+rate.+usd_k"), x))==True]

    prices_usd[gas] = wcpd_usd[['jurisdiction', 'year', 'ipcc_code', 'iea_code', 'Product']+price_columns]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gdp_dfl_base_yr.rename(columns={"gdp_dfl":"gdp_dfl_by"}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gdp_dfl_base_yr.drop(["year"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gdp_dfl_base_yr.rename(columns={"gdp_dfl":"gdp_dfl_by"}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

## I. ECP from ETS and taxes (time-varying and fixed weights, jurisdiction level)

National and subnational jurisdictions, sectoral level

In [37]:
ecp_variables_map = {}

ecp_tv = {}
ecp_fixed = {}

for gas in gases:
    ecp_tv_nat = ecp_wav.ecp(coverage_nat[gas], prices_usd[gas], "national", gas, flow_excl, "time_varying", sectors=False)
    ecp_tv_subnat = ecp_wav.ecp(coverage_subnat[gas], prices_usd[gas], "subnational", gas, flow_excl, "time_varying", sectors=False)
    
    ecp_tv[gas] = pd.concat([ecp_tv_nat, ecp_tv_subnat])

    ecp_fixed_nat = ecp_wav.ecp(coverage_nat[gas], prices_usd[gas], "national", gas, flow_excl, "fixed", 2015, sectors=False)
    ecp_fixed_subnat = ecp_wav.ecp(coverage_subnat[gas], prices_usd[gas], "subnational", gas, flow_excl, "fixed", 2015, sectors=False)
    
    ecp_fixed[gas] = pd.concat([ecp_fixed_nat, ecp_fixed_subnat])



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df.drop(["year"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df.drop(["year"], axis=1, inplace=True)


In [16]:
ecp_tv_sect = {}
ecp_fixed_sect = {}

for gas in gases:
    ecp_tv_nat_sect = ecp_wav.ecp(coverage_nat_sect[gas], prices_usd[gas], "national", gas, flow_excl, "time_varying", sectors=True)
    ecp_tv_subnat_sect = ecp_wav.ecp(coverage_subnat_sect[gas], prices_usd[gas], "subnational", gas, flow_excl, "time_varying", sectors=True)
    
    ecp_tv_sect[gas] = pd.concat([ecp_tv_nat_sect, ecp_tv_subnat_sect])
    ecp_tv_nat_sect.groupby(["ipcc_code", "year"]).sum().to_csv(path_aux_data+"/data/ecp/ecp_sectors_wld/world_sectoral_ecp_"+gas+".csv")

    ecp_fixed_nat_sect = ecp_wav.ecp(coverage_nat_sect[gas], prices_usd[gas], "national", gas, flow_excl, "fixed", 2015, sectors=True)
    ecp_fixed_subnat_sect = ecp_wav.ecp(coverage_subnat_sect[gas], prices_usd[gas], "subnational", gas, flow_excl, "fixed", 2015, sectors=True)
    
    ecp_fixed_sect[gas] = pd.concat([ecp_fixed_nat, ecp_fixed_subnat])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df.drop(["year"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df.drop(["year"], axis=1, inplace=True)


In [33]:
ecp_tv_agg = {}
ecp_fixed_agg = {}

for gas in gases: 
    ecp_tv_agg[gas] = ecp_wav.ecp_aggregation(ecp_tv[gas], gas)
    ecp_fixed_agg[gas] = ecp_wav.ecp_aggregation(ecp_fixed[gas], gas)

    # National-level ecp from subnational schemes        
    ecp_list = {"time_varying":ecp_tv_agg[gas], "fixed_weights":ecp_fixed_agg[gas]}

    for key in subnat_lists.keys():
        ecp_tv_agg[gas] = ecp_wav.national_from_subnat(ecp_tv_agg[gas], subnat_lists[key], key, gas)
        ecp_fixed_agg[gas] = ecp_wav.national_from_subnat(ecp_fixed_agg[gas], subnat_lists[key], key, gas)

    # NA values for all entries of 'supra' columns of national jurisdictions

    supra_cols = ["ecp_ets_supraGHG_usd_k", "ecp_tax_supraGHG_usd_k", 
                  "ecp_ets_supra"+gas+"_usd_k", "ecp_tax_supra"+gas+"_usd_k", 
                  "ecp_all_supraGHG_usd_k", "ecp_all_supra"+gas+"_usd_k"]

    for df in [ecp_tv_agg[gas], ecp_fixed_agg[gas]]:
        df.loc[~df.jurisdiction.isin(all_subnat_list), supra_cols] = np.nan

In [38]:
for gas in gases:
    col_sel = ["jurisdiction", "year", 
                "ecp_ets_jurGHG_usd_k", "ecp_tax_jurGHG_usd_k", "ecp_all_jurGHG_usd_k", 
                "ecp_ets_jur"+gas+"_usd_k", "ecp_tax_jur"+gas+"_usd_k", "ecp_all_jur"+gas+"_usd_k",
                "ecp_ets_supraGHG_usd_k", "ecp_tax_supraGHG_usd_k", "ecp_all_supraGHG_usd_k", 
                "ecp_ets_supra"+gas+"_usd_k", "ecp_tax_supra"+gas+"_usd_k", "ecp_all_supra"+gas+"_usd_k"]

    ecp_tv_agg[gas][col_sel].fillna("NA").sort_values(by=["jurisdiction", "year"]).to_csv(path_aux_data+"/data/ecp/ecp_economy/ecp_vw/ecp_tv_"+gas+".csv", index=None)
    ecp_fixed_agg[gas][col_sel].fillna("NA").sort_values(by=["jurisdiction", "year"]).to_csv(path_aux_data+"/data/ecp/ecp_economy/ecp_fw/ecp_fixed_"+gas+".csv", index=None)

## II. Calculation of ECP from ETS and taxes (CO2 only, constant, jurisdiction-specific weights, jurisdiction level)

In [19]:
#Information needed (need two dataframes): 
#- at sector level: year of first implementation of carbon pricing on any fuel (one with the list of jurisdiction and year of implementation of first scheme)
#- at jurisdiction level: year of first implementation of carbon pricing in any sector (one with the list of jurisdiction-sector entries and year of implementation of first scheme)

firstYear = wcpd_all[['jurisdiction', 'year', 'ipcc_code', 'iea_code', 'Product', 'tax', 'ets']]

firstYear.loc[:, "pricing"] = firstYear.loc[:, "tax"] + firstYear.loc[:, "ets"]
firstYear.loc[:, "pricing"] = np.where(firstYear.loc[:, "pricing"] > 0, 1.0,0.0)
firstYear = firstYear.drop(["tax", "ets"], axis=1)
firstYear = firstYear.loc[firstYear.pricing == 1,]
firstYear.sort_values(by=["jurisdiction", "year", "ipcc_code", "Product"], ascending=True, inplace=True)

firstYear.drop_duplicates(subset=["jurisdiction", "ipcc_code", "Product"], inplace=True)

# jurisdiction-level, recording year prior to first year of pricing mechanism implementation
firstYear_jur = firstYear.groupby(["jurisdiction", "year"]).sum()
firstYear_jur.loc[:, "pricing"] = np.where(firstYear_jur.loc[:, "pricing"] > 0, 1.0, 0.0)
firstYear_jur.reset_index(inplace=True)

firstYear_jur = firstYear_jur.drop_duplicates(subset=["jurisdiction"])
firstYear_jur["year"] = firstYear_jur["year"]-1 # to take the year before first year of implementation
firstYear_jur = firstYear_jur.drop("pricing", axis=1)

firstYear_jur = pd.Series(firstYear_jur.year.values,index=firstYear_jur.jurisdiction).to_dict()

## adjustment needed for Finland and Poland - their respective schemes started in 1990 so 1989 should be the reference year for
## emissions. However, because GHG/CO2 CAIT series start in 1990, shares series start in 1990
firstYear_jur["Finland"] = 1990
firstYear_jur["Poland"] = 1990


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  firstYear.loc[:, "pricing"] = firstYear.loc[:, "tax"] + firstYear.loc[:, "ets"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  firstYear.loc[:, "pricing"] = np.where(firstYear.loc[:, "pricing"] > 0, 1.0,0.0)


In [49]:
def ecp_constIntro():
    
    df_concat = pd.DataFrame()
    gas = "CO2"

    for jur in ["Austria"]:#share_df.jurisdiction.unique():
        if jur in firstYear_jur.keys():
            weight_year = firstYear_jur[jur]
        else:
            weight_year = 2015

        temp_cp_jur = prices_usd[gas].loc[(prices_usd[gas]["jurisdiction"]==jur), :]

        share_df_jur = coverage_nat[gas][(coverage_nat[gas]["jurisdiction"]==jur)]
        x = ecp_wav.ecp(share_df_jur, temp_cp_jur, "national", gas, flow_excl, "fixed", weight_year, sectors=False)    

        try:
            share_df_jurSubnat = coverage_subnat[gas][(coverage_nat[gas]["jurisdiction"].isin(subnat_lists[jur]))]
            temp_cp_jurSubnat = prices_usd[gas].loc[(prices_usd[gas]["jurisdiction"].isin(subnat_lists[jur])), :]
            x_subnat = ecp_wav.ecp(share_df_jurSubnat, temp_cp_jurSubnat, "subnational", gas, flow_excl, "fixed", weight_year, sectors=False)

            x = pd.concat([x, x_subnat])
        except:
            columns = ['ecp_ets_supraGHG_usd_k', 'ecp_ets_supraCO2_usd_k',
                    'ecp_tax_supraGHG_usd_k', 'ecp_tax_supraCO2_usd_k',
                    'ecp_all_supraGHG_usd_k', 'ecp_all_supraCO2_usd_k']
            
            for col in columns:
                x[col] = np.nan

        y = ecp_wav.ecp_aggregation(x, gas)

        # National-level ecp from subnational schemes        
        for key in subnat_lists.keys():
            y = ecp_wav.national_from_subnat(y, subnat_lists[key], key, gas)

        # NA values for all entries of 'supra' columns of national jurisdictions

        supra_cols = ["ecp_ets_supraGHG_usd_k", "ecp_tax_supraGHG_usd_k", 
                    "ecp_ets_supra"+gas+"_usd_k", "ecp_tax_supra"+gas+"_usd_k", 
                    "ecp_all_supraGHG_usd_k", "ecp_all_supra"+gas+"_usd_k"]

        y.loc[~y.jurisdiction.isin(all_subnat_list), supra_cols] = np.nan

        if df_concat.empty == True:
            df_concat = y
        else:
            df_concat = pd.concat([df_concat, y])
            
        return df_concat



In [52]:
ecp_intro = {}

for gas in gases:
    ecp_intro[gas] = ecp_constIntro()

    col_sel = ["jurisdiction", "year", "ecp_ets_jurGHG_usd_k", "ecp_tax_jurGHG_usd_k",
               "ecp_all_jurGHG_usd_k", "ecp_ets_jur"+gas+"_usd_k", "ecp_tax_jur"+gas+"_usd_k", "ecp_all_jur"+gas+"_usd_k"]

    ecp_intro[gas].loc[ecp_intro[gas].year<=2018][col_sel].to_csv(path_aux_data+"/data/ecp/ecp_economy/ecp_intro/ecp_intro_"+gas+".csv", index=None)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df.drop(["year"], axis=1, inplace=True)
