In [13]:
import pandas as pd
import pycountry

In [14]:
plumes = pd.read_csv(
    r'C:\Users\rebek\Ember\plume_perc\Plumes_V2.csv', # New GEM plume dataset
    thousands=",",       # interpret commas as thousand separators inside numbers
    quotechar='"',       # respect quoted fields
    engine="python"      # more flexible parser for tricky CSVs
)

plumes = plumes[plumes['Type of Infrastructure'] == 'coal mine']

mines_with_plumes = plumes[['GEM Infrastructure Name (Nearby)', 'Emissions (kg/hr)']]

mines_with_plumes

Unnamed: 0,GEM Infrastructure Name (Nearby),Emissions (kg/hr)
1192,Hopedale Coal Mine,
1225,Cumberland Coal Mine,840.051700
1228,Cumberland Coal Mine,1681.024000
1229,Cumberland Coal Mine,2084.132000
1230,Cumberland Coal Mine,602.308000
...,...,...
3469,,1207.714316
3470,Shanxi Dongda Coal Mine,
3471,Lu'an Licun Coal Mine,
3472,,


In [15]:
mines = pd.read_csv(
    r'C:\Users\rebek\Ember\Data Tool\cmm-data-tool\query_result_2025-12-01T11_49_18.284701141Z.csv', # Reads in data from gem_mines_raw from metabase
    thousands=",",       # interpret commas as thousand separators inside numbers
    quotechar='"',       # respect quoted fields
    engine="python"      # more flexible parser for tricky CSVs
)

mines["IS_LIGNITE"] = mines["COAL_TYPE"].isin(
    ["Lignite", "Subbituminous / Lignite"]
)

mines = mines[mines['IS_LIGNITE'] == False]

In [19]:
mines = mines.copy()

mines["has_plume"] = mines["MINE_NAME"].isin(mines_with_plumes['GEM Infrastructure Name (Nearby)'])

mines["PRODUCTION__MTPA"] = pd.to_numeric(
    mines["PRODUCTION__MTPA"], errors="coerce"
)

mines = mines.dropna(subset=["COUNTRY", "PRODUCTION__MTPA"])

total_production = (
    mines
    .groupby("COUNTRY", as_index=False)["PRODUCTION__MTPA"]
    .sum()
    .rename(columns={"PRODUCTION__MTPA": "total_production_mtpa"})
)

plume_production = (
    mines[mines["has_plume"]]
    .groupby("COUNTRY", as_index=False)["PRODUCTION__MTPA"]
    .sum()
    .rename(columns={"PRODUCTION__MTPA": "plume_production_mtpa"})
)

# mines['has_plume']

coverage = (
    total_production
    .merge(plume_production, on="COUNTRY", how="left")
    .fillna({"plume_production_mtpa": 0})
)

coverage["percent_covered"] = (
    coverage["plume_production_mtpa"]
    / coverage["total_production_mtpa"]
    * 100
)

coverage = coverage.sort_values("percent_covered", ascending=False)
coverage = coverage[coverage["percent_covered"] > 0]
coverage = coverage[["COUNTRY", "percent_covered"]]
coverage = coverage.rename(columns={"percent_covered": "% hard coal production with plume".upper()})
coverage = coverage.sort_values("% hard coal production with plume".upper(), ascending=True)

coverage

Unnamed: 0,COUNTRY,% HARD COAL PRODUCTION WITH PLUME
17,India,0.161113
41,South Africa,4.513139
49,United States,9.131778
10,China,16.563147
2,Australia,17.925028
39,Russia,22.890653
25,Mexico,25.787402
21,Kazakhstan,37.149933
9,Canada,42.139454
47,Ukraine,46.126984


In [17]:
coverage.to_csv("plume_percentage.csv", index=False)