In [4]:
import pandas as pd
import pycountry

In [5]:
#GEM GCMT to get number of coal mines per country

mines = pd.read_csv(
    r'C:\Users\rebek\Ember\Data Tool\cmm-data-tool\query_result_2025-12-01T11_49_18.284701141Z.csv', # Reads in data from gem_mines_raw from metabase
    thousands=",",       # interpret commas as thousand separators inside numbers
    quotechar='"',       # respect quoted fields
    engine="python"      # more flexible parser for tricky CSVs
)

# flag lignite mines
mines["IS_LIGNITE"] = mines["COAL_TYPE"].isin(
    ["Lignite", "Subbituminous / Lignite"]
)

# aggregate per country
country_counts = (
    mines
    .groupby("COUNTRY")
    .agg(
        NUM_COAL_MINES=("COAL_TYPE", "size"),
        NUM_LIGNITE_MINES=("IS_LIGNITE", "sum")
    )
    .reset_index()
)

country_counts

Unnamed: 0,COUNTRY,NUM_COAL_MINES,NUM_LIGNITE_MINES
0,Afghanistan,4,0
1,Argentina,1,0
2,Australia,207,1
3,Bangladesh,4,0
4,Bhutan,3,0
...,...,...,...
62,Uzbekistan,7,3
63,Venezuela,3,0
64,Vietnam,26,0
65,Zambia,2,0


In [6]:
plumes = pd.read_csv(
    r'C:\Users\rebek\Ember\plume_perc\Plumes_V2.csv', # New GEM plume dataset
    thousands=",",       # interpret commas as thousand separators inside numbers
    quotechar='"',       # respect quoted fields
    engine="python"      # more flexible parser for tricky CSVs
)

result = (
    plumes.dropna(subset=['GEM Infrastructure Name (Nearby)'])   # remove rows with no infrastructure name
      .groupby('Country/Area')['GEM Infrastructure Name (Nearby)']
      .nunique()
      .reset_index(name='NUM_INFRA_WITH_AT_LEAST_1_PLUME')
)

result

Unnamed: 0,Country/Area,NUM_INFRA_WITH_AT_LEAST_1_PLUME
0,Algeria,1
1,Australia,20
2,Botswana,1
3,Canada,3
4,China,137
5,Colombia,3
6,India,4
7,Iran,1
8,Iraq,2
9,Kazakhstan,6


In [7]:
#Merging data to see % of mines with attributed plume per country

# rename column to match other df
result = result.rename(columns={"Country/Area": "COUNTRY"})

# merge on country
merged = country_counts.merge(result, on="COUNTRY", how="left")

# fill countries with no plumes detected
merged["NUM_INFRA_WITH_AT_LEAST_1_PLUME"] = merged["NUM_INFRA_WITH_AT_LEAST_1_PLUME"].fillna(0)

# compute percentage
merged["PERCENT_COAL_MINES_WITH_PLUME"] = (
    merged["NUM_INFRA_WITH_AT_LEAST_1_PLUME"] / merged["NUM_COAL_MINES"] * 100
)

# compute percentage of lignite mines per country
merged["PERCENT_LIGNITE_MINES"] = (
    merged["NUM_LIGNITE_MINES"] / merged["NUM_COAL_MINES"] * 100
)

merged

Unnamed: 0,COUNTRY,NUM_COAL_MINES,NUM_LIGNITE_MINES,NUM_INFRA_WITH_AT_LEAST_1_PLUME,PERCENT_COAL_MINES_WITH_PLUME,PERCENT_LIGNITE_MINES
0,Afghanistan,4,0,0.0,0.000000,0.000000
1,Argentina,1,0,0.0,0.000000,0.000000
2,Australia,207,1,20.0,9.661836,0.483092
3,Bangladesh,4,0,0.0,0.000000,0.000000
4,Bhutan,3,0,0.0,0.000000,0.000000
...,...,...,...,...,...,...
62,Uzbekistan,7,3,1.0,14.285714,42.857143
63,Venezuela,3,0,0.0,0.000000,0.000000
64,Vietnam,26,0,0.0,0.000000,0.000000
65,Zambia,2,0,0.0,0.000000,0.000000


In [10]:
#Remove rows woth 0% for visualisation

filtered = merged[merged["PERCENT_COAL_MINES_WITH_PLUME"] > 0]

# rename the percentage column
filtered = filtered.rename(columns={
    "PERCENT_COAL_MINES_WITH_PLUME": "Coal mines with at least one plume attributed",
    "PERCENT_LIGNITE_MINES": "Lignite coal mines where methane won't be detected"
})

# create new column: mines with no satellite detection
filtered["Coal mines with no attributed plumes"] = (
    100 - filtered["Coal mines with at least one plume attributed"]
)

filtered

Unnamed: 0,COUNTRY,NUM_COAL_MINES,NUM_LIGNITE_MINES,NUM_INFRA_WITH_AT_LEAST_1_PLUME,Coal mines with at least one plume attributed,Lignite coal mines where methane won't be detected,Coal mines with no attributed plumes
2,Australia,207,1,20.0,9.661836,0.483092,90.338164
6,Botswana,4,0,1.0,25.0,0.0,75.0
10,Canada,45,1,3.0,6.666667,2.222222,93.333333
11,China,2334,76,137.0,5.869751,3.256213,94.130249
12,Colombia,14,0,3.0,21.428571,0.0,78.571429
21,India,546,78,4.0,0.732601,14.285714,99.267399
23,Iran,6,0,1.0,16.666667,0.0,83.333333
25,Kazakhstan,27,3,6.0,22.222222,11.111111,77.777778
32,Mexico,5,0,1.0,20.0,0.0,80.0
35,Mozambique,10,0,1.0,10.0,0.0,90.0


In [11]:
filtered.to_csv("plume_percentage.csv", index=False)