# Compute coal mines emissions

## Load new GEM coal dataframe

In [2]:
import numpy as np
import re

from carbon_bombs.io.gem import load_coal_mine_gem_database


def _year_match(x):
    match = re.search(r'\b\d{4}\b', str(x))

    if match:
        return int(match.group())

    return 3000

def _add_rank_columns(df_gem):    
    
    df_gem["year"] = df_gem.Start_year.apply(
        lambda x: _year_match(x)
    )
    df_gem["rank"] = df_gem.groupby("Project Name").year.rank(method="first", ascending=True)



def load_gem_database():
        
    df_gem = load_coal_mine_gem_database()


    # map columns
    GEM_cols_mapping = {
        "GEM Mine ID": "GEM_ID",
        "Mine Name": "Project Name",
        "Country / Area": "Country",
        "Latitude": "Latitude",
        "Longitude": "Longitude",
        "Parent Company": "Parent_Company",
        "Status": "Project_status",
        "Project Type": 'project_type',
        "Project Phase": "Project_Phase",
        "Opening Year": "Start_year",
        "Total Reserves (Proven and Probable, Mt)": "Reserves",
        "Total Resource (Inferred, Indicated, Measured)": "Resources",
        "Coal Type": "Coal_Type",
        "Coal Grade": "Coal_Grade"
    }

    # keep only wanted columns
    df_gem = df_gem.loc[:, GEM_cols_mapping.keys()]
    df_gem = df_gem.rename(columns=GEM_cols_mapping)

    # format reserves and resources columns
    df_gem["Reserves"] = df_gem["Reserves"].replace('-', np.nan) 
    df_gem["Resources"] = df_gem["Resources"].replace('-', np.nan) 

    # Retrieve reserves (to estimate emission) based on the paper
    # "Coal reserves are defined in this dataset as “recoverable reserves”[...] "
    # " When recoverable reserve figures were unavailable, we collected data on coal resources and indicated those in the dataset in Reserve category."
    df_gem["Reserves (Million tons)"] = np.where(df_gem["Reserves"].isna(), df_gem["Resources"], df_gem["Reserves"])
    
    df_gem["Reserve category name"] = np.where(
        ~df_gem["Reserves"].isna(), "Recoverable", np.where(
             ~df_gem["Resources"].isna(), "Resource", None
        )
    )

    # Consider 'Bituminous (Met)' type
    df_gem["Coal_Type"] = np.where(
        (df_gem["Coal_Type"] == 'Bituminous') & (df_gem["Coal_Grade"].str.contains("Met")),
        'Bituminous (Met)',
        df_gem["Coal_Type"]
    )

    # here the dataframe has multiple rows for the same Mine since we can have some expension
    
    # KEEP 1 ROW for each mine, use the one with the first year
    # special cases e.g. Angus Place Coal Mine --> no opening year for both bt Expansion took as first
    _add_rank_columns(df_gem)

    # Coal emission factors based on the paper
    coal_type_factor = {
        'Bituminous': 0.00244068,
        'Bituminous (Met)': 0.002668,
        'Subbituminous': 0.00181629,
        'Lignite': 0.0012019,
        'Anthracite': 0.00262461,
        # 'Bituminous and Subbituminous', <-- not found (TODO LATER)
        # 'Anthracite&Bituminous' <-- not found (TODO LATER)
    }

    # See what to do for these cases
    # 'Anthracite&Bituminous', 'Subbituminous / Lignite', 'Bituminous and Subbituminous'
    df_gem["Coal_Type"] = df_gem["Coal_Type"].replace(
        {
            'Anthracite&Bituminous': 'Anthracite',
            'Subbituminous / Lignite': 'Subbituminous',
            'Bituminous and Subbituminous': 'Bituminous',
        }
    )

    
    # Compute emissions
    df_gem["Emissions factor"] = df_gem["Coal_Type"].apply(lambda x: (coal_type_factor[x] if x in coal_type_factor else None))
    df_gem["Emissions"] = df_gem["Reserves (Million tons)"] * df_gem["Emissions factor"] * 1

    # keep only one row per mine
    df_final = df_gem.loc[df_gem["rank"] == 1]

    assert len(df_final) == df_gem["Project Name"].nunique()
    assert set(df_final["Project Name"]) == set(df_gem["Project Name"])

    # ========== TEMP FOR ANALYSIS ========== #
    # Add some details
    df_gem["detail"] = (
        "[" +
        df_gem["project_type"].fillna("Current") + " - " 
        + df_gem["Coal_Type"].fillna("") + " - " 
        + df_gem["Emissions"].fillna(0).round(3).astype(str)
        + "]"
    )

    # Compute total emissions
    total_em_df = df_gem.groupby("Project Name").agg(
        total_emissions=("Emissions", "sum"),
        details=("detail", "sum"),
    )

    # merge to add total emissions to the final df
    df_final = df_final.merge(
        total_em_df,
        on="Project Name"
    )

    # Final post process
    df_final["total_emissions"] = df_final["total_emissions"].astype(float).round(3)
    df_final["year"] = df_final["year"].replace(3000, None)

    # Sanity check to be sure that we only have one row per mine
    assert len(df_final) == df_gem["Project Name"].nunique()
    assert set(df_final["Project Name"]) == set(df_gem["Project Name"])

    return df_final


  df_gem["Reserves"] = df_gem["Reserves"].replace('-', np.nan)
  df_gem["Resources"] = df_gem["Resources"].replace('-', np.nan)


In [4]:
df_final =  load_gem_database()
df_final.head()

  df_gem["Reserves"] = df_gem["Reserves"].replace('-', np.nan)
  df_gem["Resources"] = df_gem["Resources"].replace('-', np.nan)


Unnamed: 0,GEM_ID,Project Name,Country,Latitude,Longitude,Parent_Company,Project_status,project_type,Project_Phase,Start_year,...,Coal_Type,Coal_Grade,Reserves (Million tons),Reserve category name,year,rank,Emissions factor,Emissions,total_emissions,details
0,M0001,Shabashak Coal Mine,Afghanistan,35.863255,67.596674,North Coal Enterprise (NCE),Operating,,,1967.0,...,Bituminous (Met),Met,,,1967.0,1.0,0.002668,,0.0,[Current - Bituminous (Met) - 0.0]
1,M0002,Airly Coal Mine,Australia,-33.113071,150.014937,Banpu,Operating,,,2009.0,...,Bituminous,Thermal,21.0,Recoverable,2009.0,1.0,0.002441,0.051254,0.051,[Current - Bituminous - 0.051]
2,M0004,Angus Place Coal Mine,Australia,-33.349157,150.199019,Banpu,Proposed,Expansion,,,...,Subbituminous,Thermal,54.0,Recoverable,,1.0,0.001816,0.09808,0.194,[Expansion - Subbituminous - 0.098][Current - ...
3,M0005,Appin Coal Mine,Australia,-34.211194,150.792736,Golden Energy and Resources Pte Ltd (GEAR); M ...,Operating,,,1962.0,...,Bituminous (Met),Met,113.0,Recoverable,1962.0,1.0,0.002668,0.301484,0.301,[Current - Bituminous (Met) - 0.301]
4,M0007,Ashton Coal Mine,Australia,-32.468942,151.078493,Yankuang Energy,Operating,,,2004.0,...,Bituminous (Met),Met,18.0,Recoverable,2004.0,1.0,0.002668,0.048024,0.048,[Current - Bituminous (Met) - 0.048][Expansion...


In [52]:
df_final.to_csv("all_coal_mines_gem.csv", index=False, sep=";")

## Load Paper dataframe and format it for comparison

In [6]:
import pandas as pd
from carbon_bombs.conf import FPATH_SRC_KHUNE_PAPER


df_paper = pd.read_excel(
    FPATH_SRC_KHUNE_PAPER, sheet_name="Coal", engine="openpyxl", skipfooter=3
)

cols_paper = {
    'Project Name': 'Project Name',
    'Country': 'Country',
    'Potential emissions (GtCO2)': 'total_emissions',
    'Status': 'Project_status',
    'Reserves (Million tons)': 'Reserves (Million tons)',
    'Reserve category name': 'Reserve category name',
    'Emissions factor': 'Emissions factor',
    'Coal type': 'Coal_Type',
    "Source": "Source_paper"
    # 'New': 'New'
}

df_paper = df_paper[cols_paper.keys()]
df_paper = df_paper.rename(columns=cols_paper)

manual_matching = {
    "Afşin-Elbistan Coal Mine": "Afşin-Elbistan Coal Mines",
    # "Alpha North Coal Mine": "Alpha North Coal Mine", # --> not found, cancelled? (Alpha West Coal Project, Alpha Coal Project)
    # "BIB Coal Mine": "BIB Coal Mine", # --> not found
    "Bernice-Cygnus Coal Mine": "Berenice-Cygnus Coal Mine",
    "Bogatyr Coal Mine": "Bogatyr Coal Mine (Kazakhstan)",
    # "Borly Coal Mines": "Borly Coal Mines", # --> Kuu-Check_Coal_Mine?
    "Changcheng No.3 Coal Mine": "Changcheng No. 3 Coal Mine",
    "Dananhu No. 7 Coal Mine": "Dananhu No. 7 Mine",
    "Dananhu No.1 Coal Mine": "Dananhu No. 1 Coal Mine",
    "Dananhu West No.2 Coal Mine": "Dananhu No. 2 Surface Mine",
    "Hongshaquan No.1 Coal Mine": "Hongshaquan No. 1 Surface Mine",
    "Huangling No.2 Coal Mine": "Huangling No. 2 Coal Mine",
    "Jiangjun Gebi No.2 Coal Mine": "Jiangjun Gebi No. 2 Coal Mine",
    "Kaniha Coal Mine": "Gopalji Kaniha Coal Mine",
    # "Listvianskaya Coal Mine": "Listvianskaya Coal Mine", # --> not found
    # "Maritsa Coal Mines": "Maritsa Coal Mines", # --> https://www.gem.wiki/Maritsa_Coal_Mines
    "Project Motheo": "Morupule Coal Mine",
    "Shahaiji No.1 Coal Mine": "Shajihai No.1 Coal Mine",
    "Shajihai No.2 Coal Mine": "Xinjiang Shajihai Coal Mine",
    "Shubarkol Coal Mine": "Shubarkol Premium Coal Mine", # to check 
    "Sihe Coal Mine": "East Well of Sihe Coal Mine", # to check 
    "Talike District No. 2 Coal Mine": "Talike No. 2 Coal Mine",
    "Thar Coal Mine": "Thar Block II Coal Mine", # to check
    "Ulug-Khem Project": "Ulug-Khem Coal Mine",
    "West Macedonia Lignite Centre (WMLC)": "West Macedonia Lignite Centre",
    "Wilton and Fairhill Coal Projects": "Wilton Coal Mine",
    "Xinwen Ili No.1 Coal Mine": "Xinwen IlI No.1 Coal Mine",
    "Yallourn": "Yallourn Coal Mine",
    "Yangchangwan No.1 Well Coal Mine": "Yangchangwan Coal Mine",
    "Yangquan No.1 Coal Mine": "Yangquan No. 1 Coal Mine",
    "Yimin Surface Coal Mine": "Yimin Surface Mine",

    # not in GEM
    "Banhardih": "Banhardih Coal Mine",
    # "Bankui": "Bankui",
    # "Elga Coal Mine": "Elga Coal Mine",
    "Fording River": "Fording River Operations",
    "Gare Pelma Sector II": "Gare Palma II Coal Mine",
    "Hamilton County Mine No.1": "Hamilton Mine No. 1",
    # "Inaglinskaya-2 Mine": "Inaglinskaya-2 Mine",
    "Kerandari BC": "Kerendari Coal Mine",
    # "Kolubara Mine Complex": "Kolubara Mine Complex",
    "Mandakini B": "Mandakini–B Coal Mine",
    "Saharpur Jamarpani": "Saharpur-Jamarpani Coal Mine",
    # "Sengwe Colliery": "Sengwe Colliery",

}

df_paper["Project Name"] = df_paper["Project Name"].replace(
    manual_matching
)
df_paper.head()

Unnamed: 0,Project Name,Country,total_emissions,Project_status,Reserves (Million tons),Reserve category name,Emissions factor,Coal_Type,Source_paper
0,Ensham Coal Mine,Australia,1.314994,Operating,724.0,Recoverable,0.001816,Subbituminous,Global Energy Monitor. (2021). Global Coal Min...
1,Hunter Valley North Coal Mine,Australia,1.734018,Operating,650.0,Resource,0.002668,Bituminous (Met),Global Energy Monitor. (2021). Global Coal Min...
2,Byerwen Coal Mine,Australia,1.840727,Operating,690.0,Recoverable,0.002668,Bituminous (Met),Global Energy Monitor. (2021). Global Coal Min...
3,Mount Pleasant Coal Mine,Australia,1.120272,Operating,459.0,Recoverable,0.002441,Bituminous,Global Energy Monitor. (2021). Global Coal Min...
4,Hunter Valley South Coal Mine,Australia,1.173797,Operating,440.0,Resource,0.002668,Bituminous (Met),https://datagovau.s3.amazonaws.com/bioregional...


## Comparison

In [48]:

df_final_merge = df_final.merge(
    df_paper,
    on="Project Name",
    how="outer",
    suffixes=('_gem', '_paper')
)

df_final_merge["In Paper"] = ~df_final_merge["Country_paper"].isnull()
df_final_merge["In GEM"] = ~df_final_merge["GEM_ID"].isnull()


In [54]:
df_final_merge.loc[df_final_merge["In Paper"]][[
    'Project Name',
    'In GEM',
    'In Paper',
    'Country_gem',
    'Country_paper',
    # 'Latitude',
    # 'Longitude',
    # 'Parent_Company',
    # 'Start_year',
    'year',
    'Project_status_gem',
    'Project_status_paper',
    # 'project_type',
    # 'Project_Phase',
    # 'Reserves',
    # 'Resources',
    'Coal_Type_gem',
    'Coal_Type_paper',
    # 'Coal_Grade',
    'Reserves (Million tons)_gem',
    'Reserves (Million tons)_paper',
    # 'Reserve category name',
    # 'rank',
    # 'Emissions factor',
    # 'Emissions',
    'total_emissions_gem',
    'total_emissions_paper',
    'details',
    'Source_paper',
]].to_csv("coal_mines_comparison.csv", sep=";", index=False)

In [27]:
gem_cb = (df_final_merge["total_emissions_gem"] >= 1)
gem_all = (~df_final_merge["GEM_ID"].isna())
paper_cb = (df_final_merge["In Paper"])

both = (gem_cb & paper_cb)
only_paper = (~gem_cb & paper_cb)
only_gem = (gem_cb & ~paper_cb)

both_ = (gem_all & paper_cb)
only_paper_ = (~gem_all & paper_cb)
only_gem_ = (gem_all & ~paper_cb)

print('- Number of mines in GEM:', gem_all.sum())
print('- Number of carbon bombs in GEM:', gem_cb.sum())
print('- Number of carbon bombs in Paper:', paper_cb.sum())
print()
print("Only considering GEM project with >1Gt:")
print(f"- In both: {both.sum()}")
print(f"- Only in GEM: {only_gem.sum()}")
print(f"- Only in paper: {only_paper.sum()}")
print()
print("Considering all GEM projects:")
print(f"- In both: {both_.sum()}")
print(f"- Only in GEM: {only_gem_.sum()}")
print(f"- Only in paper: {only_paper_.sum()}")

print()
# print(pd.crosstab(gem_cb, paper_cb))


- Number of mines in GEM: 5214
- Number of carbon bombs in GEM: 359
- Number of carbon bombs in Paper: 230

Only considering GEM project with >1Gt:
- In both: 173
- Only in GEM: 186
- Only in paper: 57

Considering all GEM projects:
- In both: 220
- Only in GEM: 4994
- Only in paper: 10

