# Emissions-weighted carbon price

# Packages and libraries

In [1]:
from datetime import date
today = date.today()
d1 = today.strftime("%b-%d-%Y")

path_dependencies = '/Users/gd/GitHub/ECP/_code/compilation/_dependencies/dep_ecp'
exec(open(path_dependencies+'/pkgs_and_directories.py').read())
exec(open("/Users/gd/GitHub/WorldCarbonPricingDatabase/_code/_compilation/_dependencies/jurisdictions.py").read())

subnat_lists = {"United States":jurisdictions["subnationals"]["United States"], "Canada":jurisdictions["subnationals"]["Canada"], "China":jurisdictions["subnationals"]["China"]}
all_subnat_list = jurisdictions["subnationals"]["United States"]+jurisdictions["subnationals"]["Canada"]+jurisdictions["subnationals"]["China"]

gases = ["CO2"] # "CH4", "N2O", "FGASES" 

lastInvYear = {"national":2022, "subnat":2018} 
lastDbYear = 2024


# Institutional design (World Carbon Pricing Database)

In [2]:
wcpd = {}

# Load IPCC to IEA mapping once
ipcc_iea_map = pd.read_csv(
    "/Users/gd/GitHub/ECP/_raw/_aux_files/ipcc2006_iea_category_codes.csv",
    usecols=["ipcc_code", "FLOW"]
).rename(columns={"FLOW": "iea_code"})

for gas in gases:
    # Load WCPD data
    wcpd_ctry = ecp_general.concatenate(f"{path_wcpd}/{gas}/national")
    wcpd_subnat = ecp_general.concatenate(f"{path_wcpd}/{gas}/subnational")
    wcpd_all = pd.concat([wcpd_ctry, wcpd_subnat]).sort_values(by=["jurisdiction", "year"])

    # Clean and deduplicate
    wcpd_all["Product"] = wcpd_all["Product"].fillna('NA')
    wcpd_all = wcpd_all.drop_duplicates(subset=["jurisdiction", "year", "ipcc_code", "Product"])

    # Add IEA sector codes
    wcpd_all = wcpd_all.merge(ipcc_iea_map, on="ipcc_code", how="left")
    wcpd_all["iea_code"] = wcpd_all["iea_code"].fillna('NA')

    # Standardize jurisdiction names
    def standardize(names):
        return {name: name.replace(".", "").replace(",", "").replace(" ", "_") for name in names}

    ctry_names = wcpd_ctry["jurisdiction"].unique()
    subnat_names = wcpd_subnat["jurisdiction"].unique()
    countries_dic = standardize(ctry_names)
    subnat_dic = standardize(subnat_names)

    # Check for duplicates
    if wcpd_all.duplicated(["jurisdiction", "year", "ipcc_code", "Product"]).any():
        print(f"The dataset for {gas} contains duplicates!")

    # Add coverage factors
    wcpd_all = ecp_cov_fac.coverageFactors(wcpd_all, gas)

    # Handle mechanism overlaps
    overlap = pd.read_csv(f"/Users/gd/GitHub/WorldCarbonPricingDatabase/_raw/overlap/overlap_mechanisms_{gas}.csv")
    wcpd_all = ecp_overlap.overlap(wcpd_all, overlap)

    # Save
    wcpd[gas] = wcpd_all


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  inst_df_ids.loc[:, "overlap_"+i[0]+"_"+i[1]] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  inst_df_ids.loc[:, "overlap_"+i[0]+"_"+i[1]+"_ids"] = inst_df_ids.loc[:, scheme_columns[i[0]]] + inst_df_ids.loc[:, scheme_columns[i[1]]]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  inst_df_ids.loc[:,

# Emissions
## I. National jurisdictions 
### I.A Total GHG emissions (EDGAR)

In [3]:
# Global Warming Potential values
ipcc_gwp = pd.read_csv("/Users/gd/GitHub/ECP/_raw/ghg_inventory/gwp_list.csv")
ipcc_gwp_list = dict(zip(ipcc_gwp.edgar_label, ipcc_gwp.ar5_gwp_100y))

In [4]:
EDGAR_URL = "https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/EDGAR/datasets/EDGAR_2024_GHG/"

dataURLs = {"CH4":EDGAR_URL+"EDGAR_CH4_1970_2023.zip",
            "CO2":EDGAR_URL+"IEA_EDGAR_CO2_1970_2023.zip",
            "FGASES":EDGAR_URL+"EDGAR_F-gases_1990_2023.zip",
            "N2O":EDGAR_URL+"EDGAR_N2O_1970_2023.zip"}

fileNames = {"CH4":'EDGAR_CH4_1970_2023.xlsx',
            "CO2":"IEA_EDGAR_CO2_1970_2023.xlsx",
            "FGASES":"EDGAR_F-gases_1990_2023.xlsx",
            "N2O":"EDGAR_N2O_1970_2023.xlsx"}

sheetNames = {"CH4":"IPCC 2006",
              "CO2":"IPCC 2006",
              "FGASES":"IPCC 2006",
              "N2O":"IPCC 2006"}

edgar_ghg = {}

for gas in dataURLs.keys():
    print(gas)
    resp = requests.get(dataURLs[gas], verify=False).content

    ## Open zip folder
    myzip = ZipFile(BytesIO(resp))
    #info = myzip.infolist()
    #print(info)
    
    download = myzip.open(fileNames[gas])

    df = pd.read_excel(download, header=0,
                    sheet_name=sheetNames[gas], skiprows=[x for x in range(0,9)])
    
    edgar_ghg[gas] = df

CH4




CO2




FGASES




N2O




In [5]:
# concordance between EDGAR and World Bank country names
edgar_wb_map = pd.read_csv("/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/aux_files/edgar_wb_ctry_name_map.csv")
edgar_wb_map = edgar_wb_map.loc[~edgar_wb_map.ctry_name_wb.isnull()]

edgar_wb_map = dict(zip(list(edgar_wb_map['ctry_name_edgar'].values), list(edgar_wb_map['ctry_name_wb'].values)))

def process_gas_dataframe(gas, df, gwp_dict, edgar_wb_map):
    df = df.copy()

    # Filter to fossil emissions
    df = df[df["fossil_bio"] == "fossil"]

    # Drop and rename columns
    df = df.drop(columns=["IPCC_annex", "C_group_IM24_sh", "Country_code_A3", "fossil_bio"])
    df.rename(columns={"Name": "jurisdiction"}, inplace=True)

    # Aggregate and reshape
    df = df.groupby(["jurisdiction", "ipcc_code_2006_for_standard_report", "Substance"]).sum().reset_index()
    df = df.melt(
        id_vars=["jurisdiction", "ipcc_code_2006_for_standard_report", "Substance"],
        var_name="year",
        value_name=gas
    )

    # Clean and map
    df["year"] = df["year"].str[2:].astype(int)
    df["jurisdiction"] = df["jurisdiction"].replace(edgar_wb_map)

    # Apply GWP
    if gas in ["CO2", "CH4", "N2O"]:
        df[gas] = df[gas] * gwp_dict[gas]
        df.drop(columns=["Substance"], inplace=True)
    else:
        df_gwp = pd.DataFrame({"Substance": gwp_dict.keys(), "gwp": gwp_dict.values()})
        df = df.merge(df_gwp, on="Substance", how="left")
        df[gas] = df[gas] * df["gwp"]
        df.drop(columns=["gwp"], inplace=True)
        df = df.groupby(["jurisdiction", "year", "ipcc_code_2006_for_standard_report"]).sum().reset_index()

    # Final cleanup
    df.rename(columns={"ipcc_code_2006_for_standard_report": "ipcc_code"}, inplace=True)

    df["ipcc_code"] = df["ipcc_code"].apply(lambda x: x.replace('.', '').upper())
    df["ipcc_code"] = df["ipcc_code"].apply(lambda x: x.replace('_NORES', '').upper())

    return df


# --- Main processing ---
df_gases = pd.DataFrame()

for gas, df in edgar_ghg.items():
    df_gas = process_gas_dataframe(gas, df, ipcc_gwp_list, edgar_wb_map)

    if df_gases.empty:
        df_gases = df_gas
    else:
        df_gases = df_gases.merge(df_gas, on=["jurisdiction", "year", "ipcc_code"], how="outer")

# Compute total GHGs
df_gases["all_GHG"] = df_gases[list(edgar_ghg.keys())].sum(axis=1)

df_gases_jurAgg = df_gases.groupby(["jurisdiction", "year"]).sum().reset_index()

df_gases_jurAgg = df_gases_jurAgg.loc[df_gases_jurAgg["year"]<=lastInvYear["national"]]

df_gases_jurAgg.to_csv('/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/processed/ghg_national_total.csv',index=None)

In [6]:
# Country names
iea_wb_map = {'Australi':'Australia', 
            'Bosniaherz':'Bosnia and Herzegovina',
            'Brunei':'Brunei Darussalam', 
            'Congo':'Congo, Rep.', 
            'Congorep':'Congo, Dem. Rep.',
            'Costarica':'Costa Rica',
            'Coteivoire':"Cote d'Ivoire", 
            'Czech':'Czech Republic',
            'Dominicanr':'Dominican Republic',
            'Egypt':'Egypt, Arab Rep.', 
            'Elsalvador':'El Salvador',
            'Eqguinea':'Equatorial Guinea',
            'Eswatini':'Lesotho', 
            'Hongkong':'Hong Kong, SAR', 
            'Iran':'Iran, Islamic, Rep.', 
            'Korea':'Korea, Rep.', 
            'Koreadpr':'Korea, Dem. Rep.', 
            'Kyrgyzstan':'Kyrgyz Republic', 
            'Lao':'Lao PDR', 
            'Luxembou':'Luxembourg',
            'Nethland':'Netherlands',
            'Northmaced':'North Macedonia',
            'Nz':'New Zealand',
            'Philippine':'Philippines',
            'Russia':'Russian Federation',
            'Saudiarabi':'Saudi Arabia', 
            'Slovakia':'Slovak Republic',
            'Southafric':'South Africa',
            'Srilanka':'Sri Lanka', 
            'Ssudan':'South Sudan', 
            'Switland':'Switzerland', 
            'Syria':'Syrian Arab Republic', 
            'Turkmenist':'Turkmenistan',
            'Uae':'United Arab Emirates',
            'Uk':'United Kingdom',
            'Usa':'United States',
            'Venezuela':'Venezuela, RB',
            'Yemen':'Yemen, Rep.'}

In [7]:
# CREATE DATAFRAME WITH TOTAL EMISSIONS WORLD

df_gases_tot_world = df_gases_jurAgg.groupby(by=["year"]).sum()
df_gases_tot_world.reset_index(inplace=True)
df_gases_tot_world.to_csv('/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/processed/ghg_world_total.csv',index=None)

### I.B National GHG Inventory (kt and % of totals)

In [8]:
# Load inventories
inventories = {
    "CO2": ecp_inv_nat.inventory_co2(wcpd_all, ctry_names, iea_wb_map, df_gases, edgar_wb_map),
    "CH4": ecp_inv_nat.inventory_non_co2("CH4", df_gases),
    "N2O": ecp_inv_nat.inventory_non_co2("N2O", df_gases),
    "FGASES": ecp_inv_nat.inventory_non_co2("FGASES", df_gases)
}

inventories_wldSect = {}

for gas in ['CO2']:#, inventory in inventories.items()[0]:
    inventory = inventories[gas]

    inventory = inventory[(inventory["year"]<=lastInvYear["national"])]

    # Calculate shares
    inventory_share = ecp_inv_share.emissions_share(inventory, df_gases_jurAgg, df_gases_tot_world, gas)

    # Define merge keys and columns
    if gas == "CO2":
        merge_keys = ["jurisdiction", "year", "ipcc_code", "iea_code", "Product"]
        value_columns = ["jurisdiction", "year", "ipcc_code", "iea_code", "Product", gas]
    else:
        merge_keys = ["jurisdiction", "year", "ipcc_code"]
        value_columns = ["jurisdiction", "year", "ipcc_code", gas]

    # Merge shares into inventory
    inventory = inventory.merge(inventory_share, on=merge_keys, how="left")
    inventories[gas] = inventory

    # Save to CSV
    out_path = f"/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/processed/inventory_nat_{gas}.csv"
    inventory.to_csv(out_path, index=False)

    # Calculate world sector shares
    sectors_wld_total = inventory[value_columns].groupby(["ipcc_code", "year"], as_index=False).sum()
    inventories_wldSect[gas] = ecp_inv_share.emissions_share_wld_sectors(inventory, sectors_wld_total, gas, "national")


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ippu_fug_nat["jurisdiction"].replace(edgar_wb_map, inplace=True)


In [9]:
for gas in gases:
    for ctry in countries_dic.keys():
        inventories[gas].loc[inventories[gas].jurisdiction==ctry, :].to_csv("/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/processed/national/"+gas+"/inventory_"+gas+"_"+countries_dic[ctry]+".csv", index=None)

KeyboardInterrupt: 

## II. Subnational jurisdictions
### II.A Total GHG emissions

In [10]:
can = ecp_inv_subnat.load_canada_data("/Users/gd/Library/CloudStorage/OneDrive-rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/raw")
chn = ecp_inv_subnat.load_china_data("/Users/gd/Library/CloudStorage/OneDrive-rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/raw")
usa = ecp_inv_subnat.load_usa_data("/Users/gd/Library/CloudStorage/OneDrive-rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/raw")

inventory_subnat = pd.concat([can, chn, usa])
inventory_subnat = pd.merge(inventory_subnat, ipcc_iea_map, on=['ipcc_code'], how='left')

df_gases_tot_subnat = ecp_inv_subnat.generate_subnat_total(can, chn, usa)
df_gases_tot_subnat.to_csv('/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/processed/ghg_subnat_total.csv',index=None)

  df = pd.read_csv(f"{path}/subnational/Canada/harmonized_data/ECCC/GHG_IPCC_Can_Prov_Terr_2021.csv")
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=["Rollup", "CategoryID", "CH4", "N2O", "Unit"], errors="ignore", inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={


### II.B Subnational inventory (kt and % totals)

In [11]:
inventories_subnat = {}
inventories_subnat_wldSect = {}
inventories_subnat_ctrySect = {}

for gas in gases:

    inventory_subnat_gas = inventory_subnat[["supra_jur", "jurisdiction", "year", "ipcc_code", "iea_code", gas]]

    inventory_subnat_share = ecp_inv_share.emissions_share(inventory_subnat_gas, 
                                                           df_gases_tot_subnat, df_gases_tot_world, gas, df_gases_jurAgg, "subnational")

    merge_keys = ["supra_jur", "jurisdiction", "year", "ipcc_code", "iea_code"]
    columns = ["supra_jur", "jurisdiction", "year", "ipcc_code", gas]

    inventories_subnat[gas] = pd.merge(inventory_subnat_gas, inventory_subnat_share, on=merge_keys, how="left")
    inventories_subnat[gas].to_csv("/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/processed/inventory_subnat_"+gas+"_test.csv", index=None)

    # Shares of total world sector emissions
    sectors_wld_total = inventories[gas][["jurisdiction", "year", "ipcc_code", "iea_code", gas]].groupby(["ipcc_code", "year"]).sum()
    sectors_wld_total.reset_index(inplace=True)

    inventories_subnat_wldSect[gas] = ecp_inv_share.emissions_share_wld_sectors(inventories_subnat[gas], sectors_wld_total, gas, "subnational")

    # Shares of total country-sector emissions
    sectors_ctry_total = inventories[gas][["jurisdiction", "year", "ipcc_code", "iea_code", gas]].groupby(["jurisdiction", "year", "ipcc_code"]).sum()
    sectors_ctry_total.reset_index(inplace=True)

    inventories_subnat_ctrySect[gas] = ecp_inv_share.emissions_share_ctry_sectors(inventories_subnat[gas], sectors_ctry_total, gas)


In [51]:
for gas in gases:
    inventories_subnat_ctrySect[gas].to_csv("/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/processed/subnational/"+gas+"/sector_level/inventory_"+gas+".csv", index=None)

    for jur in subnat_dic.keys():
        inventories_subnat[gas].loc[inventories_subnat[gas].jurisdiction==jur, :].to_csv("/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/processed/subnational/"+gas+"/inventory_"+gas+"_"+subnat_dic[jur]+".csv", index=None)
        

TimeoutError: [Errno 60] Operation timed out: '/Users/gd/OneDrive - rff/Documents/Research/projects/ecp/ecp_dataset/source_data/ghg_inventory/processed/subnational/CO2/inventory_CO2_Gunma.csv'

# Coverage 
## I. Disaggregated coverage dataframes

** Note: National and subnational inventories do not have the same level of disaggregation **

In [12]:
coverage_nat = {}
coverage_subnat = {}
coverage = {}
coverage_sect = {}
coverage_nat_sect = {}
coverage_subnat_sect = {}

# Canada inventory paths need updating to bring subnat start year to 2021

# SHARE OF JURISDICTIONS TOTAL EMISSIONS
for gas in gases:

    coverage_nat[gas] = ecp_coverage.coverage(inventories[gas], lastInvYear["national"], lastDbYear, wcpd[gas], gas,
                                              False, "national")
    coverage_subnat[gas] = ecp_coverage.coverage(inventories_subnat[gas], lastInvYear["subnat"], lastDbYear, wcpd[gas], gas,
                                                 False, "subnational")

    coverage_all = pd.concat([coverage_nat[gas], coverage_subnat[gas]], ignore_index=True)
    coverage_all = coverage_all.loc[coverage_all["jurisdiction"]!="World", :]

    # Coverage figures should be calculated only based on aggregation of the most disaggregated flows, not their higher-level aggregation. 
    # Otherwise this might result in double counting. Hence aggregate sectors should be dropped from coverage dataframe.
    # It also currently excludes coverage of international aviation ('ABFLOW039') and marine ('ABFLOW040') bunkers 
    # as they are currently excluded from national total emissions.
    # Drop combustion sectors that are aggregation of lower level sectors and concatenate all coverage dataframes into a single one*

    flow_excl = ['1A', '1A1A', '1A1C', '1A2', '1A3'] #'1A1C' is exluded here as ABFLOW011 emissions are attributed twice (to both 1A1B and 1A1C)
    coverage_all = coverage_all.loc[~coverage_all.ipcc_code.isin(flow_excl), :]

    coverage[gas] = coverage_all


    # SHARE OF SECTORS' GLOBAL TOTAL EMISSIONS

    coverage_nat_sect[gas] = ecp_coverage.coverage(inventories_wldSect[gas], lastInvYear["national"], lastDbYear, wcpd[gas], gas,
                                True, "national")
    coverage_subnat_sect[gas] = ecp_coverage.coverage(inventories_subnat_wldSect[gas], lastInvYear["subnat"], lastDbYear, wcpd[gas], gas,
                                    True, "subnational")

    coverage_sect_all = pd.concat([coverage_nat_sect[gas], coverage_subnat_sect[gas]])
    coverage_sect_all = coverage_sect_all.loc[coverage_sect_all["jurisdiction"]!="World", :]

    coverage_sect[gas] = coverage_sect_all

## II. Aggregate coverage

- "The sum over all pricing mechanisms" of [emissions_share x coverage_factor] minus the overlapping coverage

We account for the fact that more than one tax scheme or ets scheme can apply to the same emissions. However, covered emissions should be counted only once when covered by one or more scheme. To calculate overlapping coverage at the sector-fuel level, we use the `overlap_` variable in `wcpd_all` dataframe created above.

### II.1 jurisdictions

In [13]:
agg_cov = {}

for gas in gases:
    # Initialize output DataFrame
    coverage_agg = coverage_all[["jurisdiction", "year", "ipcc_code", "iea_code", "Product"]].copy()

    # Define coverage scopes
    scopes = ["jurGHG", f"jur{gas}", "wldGHG", f"wld{gas}", "supraGHG", f"supra{gas}"]

    # Utility function to get relevant columns
    def get_cov_columns(prefix, scopes, exclude_overlap=True, rename_prefix=None):
        result = {}
        for scope in scopes:
            matching = [
                col for col in coverage_all.columns
                if prefix in col and scope in col and (not exclude_overlap or "overlap" not in col)
            ]
            if rename_prefix:
                key = f"{rename_prefix}_{gas}_{scope}"
            else:
                key = f"{prefix}_{gas}_{scope}"
            result[key] = matching
        return result

    # Extract columns by instrument
    tax_columns = get_cov_columns("cov_tax", scopes)
    ets_columns = get_cov_columns("cov_ets", scopes)
    all_columns = get_cov_columns("cov_", scopes, exclude_overlap=True, rename_prefix="cov_all")

    # Overlap columns
    all_overlap_dic = {
        f"cov_all_{gas}_{scope}": f"cov_overlap_{gas}_{scope}" for scope in scopes
    }

    # A. Sum across tax and ETS instruments
    for dic in [tax_columns, ets_columns]:
        for new_col, cols in dic.items():
            coverage_agg[new_col] = coverage_all[cols].sum(axis=1)

    # B. Sum across all instruments and subtract overlaps
    for new_col, cols in all_columns.items():
        coverage_agg[new_col] = coverage_all[cols].sum(axis=1)
        overlap_col = all_overlap_dic.get(new_col)
        if overlap_col in coverage_all.columns:
            coverage_agg[new_col] -= coverage_all[overlap_col]

    # C. Aggregate across emission categories (rows)
    agg = coverage_agg.groupby(["jurisdiction", "year"], as_index=False).sum(numeric_only=True)

    # Store in dictionary
    agg_cov[gas] = agg


In [14]:
# WORLD TOTAL COVERAGE 

for gas in gases:
    cov_world_agg = agg_cov[gas][["jurisdiction","year", "cov_tax_"+gas+"_wld"+gas, "cov_ets_"+gas+"_wld"+gas, 
                                        "cov_tax_"+gas+"_wldGHG", "cov_ets_"+gas+"_wldGHG"]]

    cov_world_agg.reset_index(inplace=True)
    cov_world_agg = cov_world_agg.groupby(['year']).sum()

    cov_world_agg["cov_all_"+gas+"_jurGHG"] = cov_world_agg["cov_tax_"+gas+"_wldGHG"] + cov_world_agg["cov_ets_"+gas+"_wldGHG"]
    cov_world_agg["cov_all_"+gas+"_jur"+gas] = cov_world_agg["cov_tax_"+gas+"_wld"+gas] + cov_world_agg["cov_ets_"+gas+"_wld"+gas]
    
    cov_world_agg["cov_all_"+gas+"_wldGHG"] = cov_world_agg["cov_all_"+gas+"_jurGHG"]
    cov_world_agg["cov_all_"+gas+"_wld"+gas] = cov_world_agg["cov_all_"+gas+"_jur"+gas]

    # addind values in 'jur' columns for the "World" jurisdiction
    cov_world_agg["cov_tax_"+gas+"_jurGHG"] = cov_world_agg["cov_tax_"+gas+"_wldGHG"]
    cov_world_agg["cov_ets_"+gas+"_jurGHG"] = cov_world_agg["cov_ets_"+gas+"_wldGHG"]
    cov_world_agg["cov_tax_"+gas+"_jur"+gas] = cov_world_agg["cov_tax_"+gas+"_wld"+gas]
    cov_world_agg["cov_ets_"+gas+"_jur"+gas] = cov_world_agg["cov_ets_"+gas+"_wld"+gas]

#    cov_world_agg["cov_tax_"+gas+"_wldGHG"] = "NA"
#    cov_world_agg["cov_ets_"+gas+"_wldGHG"] = "NA"
#    cov_world_agg["cov_tax_"+gas+"_wld"+gas] = "NA"
#    cov_world_agg["cov_ets_"+gas+"_wld"+gas] = "NA"

    cov_world_agg["jurisdiction"] = "World"

    cov_world_agg.drop("index", axis=1, inplace=True)
    cov_world_agg.reset_index(inplace=True)

    agg_cov[gas] = pd.concat([agg_cov[gas], cov_world_agg])


In [15]:
# National-level coverage from subnational schemes

for gas in gases:

      for subnat_list in subnat_lists.keys():
            temp = agg_cov[gas].loc[agg_cov[gas].jurisdiction.isin(subnat_lists[subnat_list]), :]
            temp = temp.groupby(["year"]).sum()
            temp.reset_index(inplace=True)
            temp["jurisdiction"] = subnat_list+"_sub" # indicating it is the country-level coverage from subnational mechanisms

            temp[["cov_tax_"+gas+"_jurGHG", "cov_tax_"+gas+"_jur"+gas, "cov_ets_"+gas+"_jurGHG", "cov_ets_"+gas+"_jur"+gas,
                  "cov_all_"+gas+"_jurGHG", "cov_all_"+gas+"_jur"+gas]] = np.nan
            
            swap_list = {"cov_tax_"+gas+"_jurGHG":"cov_tax_"+gas+"_supraGHG", "cov_tax_"+gas+"_jur"+gas:"cov_tax_"+gas+"_supra"+gas, "cov_ets_"+gas+"_jurGHG":"cov_ets_"+gas+"_supraGHG", 
                        "cov_ets_"+gas+"_jur"+gas:"cov_ets_"+gas+"_supra"+gas, "cov_all_"+gas+"_jurGHG":"cov_all_"+gas+"_supraGHG", "cov_all_"+gas+"_jur"+gas:"cov_all_"+gas+"_supra"+gas,
                        "cov_tax_"+gas+"_supraGHG":"cov_tax_"+gas+"_jurGHG", "cov_tax_"+gas+"_supra"+gas:"cov_tax_"+gas+"_jur"+gas, "cov_ets_"+gas+"_supraGHG":"cov_ets_"+gas+"_jurGHG", 
                        "cov_ets_"+gas+"_supra"+gas:"cov_ets_"+gas+"_jur"+gas, "cov_all_"+gas+"_supraGHG":"cov_all_"+gas+"_jurGHG", "cov_all_"+gas+"_supra"+gas:"cov_all_"+gas+"_jur"+gas}
            
            temp.rename(columns=swap_list, inplace=True)
            
            temp_nat = agg_cov[gas].loc[agg_cov[gas].jurisdiction == subnat_list, :]

            temp_nat_subnat = pd.concat([temp_nat, temp])
            temp_nat_subnat = temp_nat_subnat.groupby(["year"]).sum() # summing country-level coverage from country-level and subnational mechanisms
            temp_nat_subnat.reset_index(inplace=True)

            temp_nat_subnat["jurisdiction"] = subnat_list

            agg_cov[gas] = agg_cov[gas].loc[agg_cov[gas].jurisdiction != subnat_list, :]
            
            agg_cov[gas] = pd.concat([agg_cov[gas], temp_nat_subnat])
    

In [16]:
# NA values for all entries of 'supra' columns of national jurisdictions

for gas in gases:

    supra_cols = ["cov_tax_"+gas+"_supraGHG", "cov_tax_"+gas+"_supra"+gas, "cov_ets_"+gas+"_supraGHG", 
                "cov_ets_"+gas+"_supra"+gas, "cov_all_"+gas+"_supraGHG", "cov_all_"+gas+"_supra"+gas]

    agg_cov[gas].loc[~agg_cov[gas].jurisdiction.isin(all_subnat_list), supra_cols] = np.nan

    coverage_agg_OUT = agg_cov[gas].fillna("NA")
    coverage_agg_OUT.sort_values(by=["jurisdiction", "year"]).to_csv(path_aux_data+"/data/coverage/jurisdictions/tot_coverage_jurisdiction_"+gas+"_"+d1+".csv", index=None)

### II.2 World sectors

In [17]:
coverage_WldSect = {}

for gas in gases:
    
    coverage_sect[gas]

    cov_tax_columns_WldSectGas = [x for x in coverage_sect[gas].columns if "cov_tax" in x and "wld_sect" in x]
    cov_ets_columns_WldSectGas = [x for x in coverage_sect[gas].columns if "cov_ets" in x and "wld_sect" in x]
    cov_all_columns_WldSectGas = [x for x in coverage_sect[gas].columns if "cov_" in x and "wld_sect" in x]

    tax_columns = {"cov_tax_"+gas+"_WldSect"+gas:cov_tax_columns_WldSectGas}
    ets_columns = {"cov_ets_"+gas+"_WldSect"+gas:cov_ets_columns_WldSectGas}
    all_columns = {"cov_all_"+gas+"_WldSect"+gas:cov_all_columns_WldSectGas}

    coverage_sect_agg_schemes = coverage_sect[gas][["jurisdiction", "year", "ipcc_code", "iea_code", "Product"]]

    for dic in [tax_columns, ets_columns, all_columns]:
        for key in dic.keys():
            coverage_sect_agg_schemes[key] = coverage_sect[gas][dic[key]].sum(axis=1)

    coverage_WldSect[gas] = coverage_sect_agg_schemes.groupby(['ipcc_code','year']).sum()
    coverage_WldSect[gas].reset_index(inplace=True)

    coverage_WldSect[gas].to_csv(path_aux_data+"/data/coverage/world_sectors/tot_coverage_world_sectors_"+gas+"_"+d1+".csv", index=None)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  coverage_sect_agg_schemes[key] = coverage_sect[gas][dic[key]].sum(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  coverage_sect_agg_schemes[key] = coverage_sect[gas][dic[key]].sum(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  coverage_sect_agg_schemes[key] = coverage_sect[gas][dic[k

# Emissions-weighted Carbon Price (ECP)
Combines: (i) (total) coverage of ETS and associated price, (ii) user-fuel coverage of taxes and associated tax rates


In [18]:
prices_usd = {}

for gas in gases:
    # simply execute function to create cFlxRate series
    ecp_cur_conv.cur_conv(wcpd[gas], gas, subnat_lists["Canada"], subnat_lists["United States"], subnat_lists["China"], False, None)

    wcpd_usd = ecp_cur_conv.cur_conv(wcpd[gas], gas, subnat_lists["Canada"], subnat_lists["United States"], subnat_lists["China"], True, 2021)

    #Bring together calculated emissions share at sector and sector-fuel level and carbon prices in constant USD

    id_columns = [x for x in wcpd_usd.columns if bool(re.match(re.compile("ets.+id"), x))==True or bool(re.match(re.compile("tax.+id"), x))==True]
    price_columns = [x for x in wcpd_usd.columns if bool(re.match(re.compile("ets.+price_usd_k"), x))==True or bool(re.match(re.compile("tax.+rate.+usd_k"), x))==True]

    prices_usd[gas] = wcpd_usd[['jurisdiction', 'year', 'ipcc_code', 'iea_code', 'Product']+price_columns]

    prices_usd['CO2']['Product'].fillna('NA', inplace=True)
    prices_usd['CO2']['iea_code'].fillna('NA', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prices_usd['CO2']['Product'].fillna('NA', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prices_usd['CO2']['iea_code'].fillna('NA', inplace=True)


## I. ECP from ETS and taxes (time-varying and fixed weights, jurisdiction level)

National and subnational jurisdictions, sectoral level

In [19]:
ecp_variables_map = {}

ecp_tv = {}
ecp_fixed = {}

for gas in gases:
    ecp_tv_nat = ecp_wav.ecp(coverage_nat[gas], prices_usd[gas], "national", gas, flow_excl, "time_varying", sectors=False)
    ecp_tv_subnat = ecp_wav.ecp(coverage_subnat[gas], prices_usd[gas], "subnational", gas, flow_excl, "time_varying", sectors=False)
    
    ecp_tv[gas] = pd.concat([ecp_tv_nat, ecp_tv_subnat])

    ecp_fixed_nat = ecp_wav.ecp(coverage_nat[gas], prices_usd[gas], "national", gas, flow_excl, "fixed", 2015, sectors=False)
    ecp_fixed_subnat = ecp_wav.ecp(coverage_subnat[gas], prices_usd[gas], "subnational", gas, flow_excl, "fixed", 2015, sectors=False)
    
    ecp_fixed[gas] = pd.concat([ecp_fixed_nat, ecp_fixed_subnat])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df.drop(["year"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df.drop(["year"], axis=1, inplace=True)


In [20]:
ecp_tv_sect = {}
ecp_fixed_sect = {}

for gas in gases:
    ecp_tv_nat_sect = ecp_wav.ecp(coverage_nat_sect[gas], prices_usd[gas], "national", gas, flow_excl, "time_varying", sectors=True)
    ecp_tv_subnat_sect = ecp_wav.ecp(coverage_subnat_sect[gas], prices_usd[gas], "subnational", gas, flow_excl, "time_varying", sectors=True)
    
    ecp_tv_sect[gas] = pd.concat([ecp_tv_nat_sect, ecp_tv_subnat_sect])
    ecp_tv_nat_sect.groupby(["ipcc_code", "year"]).sum().to_csv(path_aux_data+"/data/ecp/ecp_sectors_wld/world_sectoral_ecp_"+gas+".csv")

    ecp_fixed_nat_sect = ecp_wav.ecp(coverage_nat_sect[gas], prices_usd[gas], "national", gas, flow_excl, "fixed", 2015, sectors=True)
    ecp_fixed_subnat_sect = ecp_wav.ecp(coverage_subnat_sect[gas], prices_usd[gas], "subnational", gas, flow_excl, "fixed", 2015, sectors=True)
    
    ecp_fixed_sect[gas] = pd.concat([ecp_fixed_nat, ecp_fixed_subnat])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df.drop(["year"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df.drop(["year"], axis=1, inplace=True)


In [21]:
ecp_tv_agg = {}
ecp_fixed_agg = {}

for gas in gases: 
    ecp_tv_agg[gas] = ecp_wav.ecp_aggregation(ecp_tv[gas], gas)
    ecp_fixed_agg[gas] = ecp_wav.ecp_aggregation(ecp_fixed[gas], gas)

    # National-level ecp from subnational schemes        

    for key in subnat_lists.keys():
        ecp_tv_agg[gas] = ecp_wav.national_from_subnat(ecp_tv_agg[gas], subnat_lists[key], key, gas)
        ecp_fixed_agg[gas] = ecp_wav.national_from_subnat(ecp_fixed_agg[gas], subnat_lists[key], key, gas)

    # NA values for all entries of 'supra' columns of national jurisdictions

    supra_cols = ["ecp_ets_supraGHG_usd_k", "ecp_tax_supraGHG_usd_k", 
                  "ecp_ets_supra"+gas+"_usd_k", "ecp_tax_supra"+gas+"_usd_k", 
                  "ecp_all_supraGHG_usd_k", "ecp_all_supra"+gas+"_usd_k"]

    for df in [ecp_tv_agg[gas], ecp_fixed_agg[gas]]:
        df.loc[~df.jurisdiction.isin(all_subnat_list), supra_cols] = np.nan

In [22]:
for gas in gases:
    col_sel = ["jurisdiction", "year", 
                "ecp_ets_jurGHG_usd_k", "ecp_tax_jurGHG_usd_k", "ecp_all_jurGHG_usd_k", 
                "ecp_ets_jur"+gas+"_usd_k", "ecp_tax_jur"+gas+"_usd_k", "ecp_all_jur"+gas+"_usd_k",
                "ecp_ets_supraGHG_usd_k", "ecp_tax_supraGHG_usd_k", "ecp_all_supraGHG_usd_k", 
                "ecp_ets_supra"+gas+"_usd_k", "ecp_tax_supra"+gas+"_usd_k", "ecp_all_supra"+gas+"_usd_k"]

    ecp_tv_agg[gas][col_sel].fillna("NA").sort_values(by=["jurisdiction", "year"]).to_csv(path_aux_data+"/data/ecp/ecp_economy/ecp_vw/ecp_tv_"+gas+"_"+d1+".csv", index=None)
    ecp_fixed_agg[gas][col_sel].fillna("NA").sort_values(by=["jurisdiction", "year"]).to_csv(path_aux_data+"/data/ecp/ecp_economy/ecp_fw/ecp_fixed_"+gas+"_"+d1+".csv", index=None)

## II. Calculation of ECP from ETS and taxes (CO2 only, constant, jurisdiction-specific weights, jurisdiction level)

In [19]:
#Information needed (need two dataframes): 
#- at sector level: year of first implementation of carbon pricing on any fuel (one with the list of jurisdiction and year of implementation of first scheme)
#- at jurisdiction level: year of first implementation of carbon pricing in any sector (one with the list of jurisdiction-sector entries and year of implementation of first scheme)

firstYear = wcpd_all[['jurisdiction', 'year', 'ipcc_code', 'iea_code', 'Product', 'tax', 'ets']]

firstYear.loc[:, "pricing"] = firstYear.loc[:, "tax"] + firstYear.loc[:, "ets"]
firstYear.loc[:, "pricing"] = np.where(firstYear.loc[:, "pricing"] > 0, 1.0,0.0)
firstYear = firstYear.drop(["tax", "ets"], axis=1)
firstYear = firstYear.loc[firstYear.pricing == 1,]
firstYear.sort_values(by=["jurisdiction", "year", "ipcc_code", "Product"], ascending=True, inplace=True)

firstYear.drop_duplicates(subset=["jurisdiction", "ipcc_code", "Product"], inplace=True)

# jurisdiction-level, recording year prior to first year of pricing mechanism implementation
firstYear_jur = firstYear.groupby(["jurisdiction", "year"]).sum()
firstYear_jur.loc[:, "pricing"] = np.where(firstYear_jur.loc[:, "pricing"] > 0, 1.0, 0.0)
firstYear_jur.reset_index(inplace=True)

firstYear_jur = firstYear_jur.drop_duplicates(subset=["jurisdiction"])
firstYear_jur["year"] = firstYear_jur["year"]-1 # to take the year before first year of implementation
firstYear_jur = firstYear_jur.drop("pricing", axis=1)

firstYear_jur = pd.Series(firstYear_jur.year.values,index=firstYear_jur.jurisdiction).to_dict()

## adjustment needed for Finland and Poland - their respective schemes started in 1990 so 1989 should be the reference year for
## emissions. However, because GHG/CO2 CAIT series start in 1990, shares series start in 1990
firstYear_jur["Finland"] = 1990
firstYear_jur["Poland"] = 1990


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  firstYear.loc[:, "pricing"] = firstYear.loc[:, "tax"] + firstYear.loc[:, "ets"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  firstYear.loc[:, "pricing"] = np.where(firstYear.loc[:, "pricing"] > 0, 1.0,0.0)


In [114]:
# this function does not aggregate subnational prices to national level 

def ecp_constIntro():
        
    df_concat = pd.DataFrame()
    gas = "CO2"

    for jur in inventories[gas].jurisdiction.unique():

        if jur in firstYear_jur.keys():
            weight_year = firstYear_jur[jur]
        else:
            weight_year = 2015

        temp_cp_jur = prices_usd[gas].loc[(prices_usd[gas]["jurisdiction"]==jur), :]
        share_df_jur = inventories[gas][(inventories[gas]["jurisdiction"]==jur) & (inventories[gas]["year"]==weight_year)]
        share_df_jur.drop(["year"], axis=1, inplace=True)
        
        # merging on `prices_temp` keys in this case because this is the dataframe with all years
        temp_df = share_df_jur.merge(temp_cp_jur, on=["jurisdiction", "ipcc_code", "iea_code", "Product"], how="right")

        ecp_variables_map = {"ecp_ets_jurGHG_usd_k":[x for x in list(temp_df.columns) if bool(re.match(re.compile("ets_+price+."), x))==True or bool(re.match(re.compile(gas+"_+jurGHG"), x))==True], 
                            "ecp_ets_jur"+gas+"_usd_k":[x for x in list(temp_df.columns) if bool(re.match(re.compile("ets_+price+."), x))==True or bool(re.match(re.compile(gas+"_+jur"+gas), x))==True], 
                            "ecp_ets_wldGHG_usd_k":[x for x in list(temp_df.columns) if bool(re.match(re.compile("ets_+price+."), x))==True or bool(re.match(re.compile(gas+"_+wldGHG"), x))==True],
                            "ecp_ets_wld"+gas+"_usd_k":[x for x in list(temp_df.columns) if bool(re.match(re.compile("ets_+price+."), x))==True or bool(re.match(re.compile(gas+"_+wld"+gas), x))==True],
                            "ecp_tax_jurGHG_usd_k":[x for x in list(temp_df.columns) if bool(re.match(re.compile("tax.+rate+."), x))==True or bool(re.match(re.compile(gas+"_+jurGHG"), x))==True], 
                            "ecp_tax_jur"+gas+"_usd_k":[x for x in list(temp_df.columns) if bool(re.match(re.compile("tax.+rate+."), x))==True or bool(re.match(re.compile(gas+"_+jur"+gas), x))==True], 
                            "ecp_tax_wldGHG_usd_k":[x for x in list(temp_df.columns) if bool(re.match(re.compile("tax.+rate+."), x))==True or bool(re.match(re.compile(gas+"_+wldGHG"), x))==True], 
                            "ecp_tax_wld"+gas+"_usd_k":[x for x in list(temp_df.columns) if bool(re.match(re.compile("tax.+rate+."), x))==True or bool(re.match(re.compile(gas+"_+wld"+gas), x))==True]}

        for key in ecp_variables_map.keys():
            temp_df[key] = 0
            length = int(len(ecp_variables_map[key])/2)
            
            for i in range(0, length):
                cols = ecp_variables_map[key]
                cols.sort()
                
                temp_df[key] = temp_df[cols[i]]*temp_df[cols[i+length]] #+ #nan values need to be replaced with 0 otherwise the sum won't work
            
            temp_df[key] = temp_df[key].astype(float)
        
        temp_df = temp_df[["jurisdiction", "ipcc_code", "iea_code", "Product", "year"]+list(ecp_variables_map.keys())] 

        temp_df = temp_df.fillna(0) # CHECK WHY "NA" VALUES ARE PRODUCED IN THE FIRST PLACE

        temp_df["ecp_all_jurGHG_usd_k"] = temp_df["ecp_tax_jurGHG_usd_k"]+temp_df["ecp_ets_jurGHG_usd_k"]
        temp_df["ecp_all_jur"+gas+"_usd_k"] = temp_df["ecp_tax_jur"+gas+"_usd_k"]+temp_df["ecp_ets_jur"+gas+"_usd_k"]
        temp_df["ecp_all_wldGHG_usd_k"] = temp_df["ecp_tax_wldGHG_usd_k"]+temp_df["ecp_ets_wldGHG_usd_k"]
        temp_df["ecp_all_wld"+gas+"_usd_k"] = temp_df["ecp_tax_wld"+gas+"_usd_k"]+temp_df["ecp_ets_wld"+gas+"_usd_k"]

        for col in ["ecp_tax_supraGHG_usd_k", "ecp_tax_supraCO2_usd_k", "ecp_ets_supraGHG_usd_k", "ecp_ets_supraCO2_usd_k", 
                    "ecp_all_supraGHG_usd_k", "ecp_all_supraCO2_usd_k"]:
            temp_df[col] = np.nan

        y = ecp_wav.ecp_aggregation(temp_df, gas)

        y = y.loc[y.jurisdiction!="World"]

        y = y.groupby(["jurisdiction", "year"]).sum()
        y.reset_index(inplace=True)

        if df_concat.empty == True:
            df_concat = y
        else:
            df_concat = pd.concat([df_concat, y])

    return df_concat



In [115]:
ecp_intro = {}

for gas in gases:
    ecp_intro[gas] = ecp_constIntro()

    col_sel = ["jurisdiction", "year", "ecp_ets_jurGHG_usd_k", "ecp_tax_jurGHG_usd_k",
               "ecp_all_jurGHG_usd_k", "ecp_ets_jur"+gas+"_usd_k", "ecp_tax_jur"+gas+"_usd_k", "ecp_all_jur"+gas+"_usd_k"]

    ecp_intro[gas].loc[ecp_intro[gas].year<=2020][col_sel].to_csv(path_aux_data+"/data/ecp/ecp_economy/ecp_intro/ecp_intro_"+gas+".csv", index=None)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  share_df_jur.drop(["year"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  share_df_jur.drop(["year"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  share_df_jur.drop(["year"], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  share_df_jur.drop(