In [57]:
import pandas as pd

In [58]:
plumes = pd.read_csv(
    r'C:\Users\rebek\Ember\plume_perc\Plumes.csv', # Reads in data from all emissions metabase file
    thousands=",",       # interpret commas as thousand separators inside numbers
    quotechar='"',       # respect quoted fields
    engine="python"      # more flexible parser for tricky CSVs
)

In [59]:
emissions = pd.read_csv(
    r'C:\Users\rebek\Ember\plume_perc\Emissions.csv', # Reads in data from all emissions metabase file
    thousands=",",       # interpret commas as thousand separators inside numbers
    quotechar='"',       # respect quoted fields
    engine="python"      # more flexible parser for tricky CSVs
)

In [60]:
# ----------------------------------------------------
# Prepare plume data
# ----------------------------------------------------
plumes["Emissions (kg/hr)"] = pd.to_numeric(plumes["Emissions (kg/hr)"], errors="coerce")
plumes["Observation Date"] = pd.to_datetime(plumes["Observation Date"], errors="coerce")
plumes["YEAR"] = plumes["Observation Date"].dt.year

# Drop rows with missing essential data
plumes = plumes.dropna(subset=["Country/Area", "Emissions (kg/hr)", "YEAR"])

# ------------------- FILTER -------------------------
# Keep only coal mines and exclude 2025
plumes = plumes[plumes['Type of Infrastructure'] == 'coal mine']
plumes = plumes[plumes['YEAR'] != 2025]

# ----------------------------------------------------
# Compute average plume kg/hr per country (all years)
# ----------------------------------------------------
plume_avg_country = (
    plumes.groupby("Country/Area")["Emissions (kg/hr)"]
    .mean()
    .reset_index()
    .rename(columns={"Emissions (kg/hr)": "Average_Plumekg_per_hr"})
)

# Standardize USA naming
plume_avg_country["Country/Area"] = plume_avg_country["Country/Area"].replace({
    "United States": "United States of America"
})

In [61]:
# ----------------------------------------------------
# Prepare national emissions
# ----------------------------------------------------
emissions["METHANE EMISSIONS (KT)"] = pd.to_numeric(emissions["METHANE EMISSIONS (KT)"], errors="coerce")
emissions = emissions.dropna(subset=["COUNTRY", "YEAR", "METHANE EMISSIONS (KT)"])

# Convert kt/year â†’ kg/hr
HOURS_PER_YEAR = 24 * 365
emissions["National_kg_per_hr"] = emissions["METHANE EMISSIONS (KT)"] * 1e6 / HOURS_PER_YEAR

# Average national kg/hr per country across available years
national_avg_country = (
    emissions.groupby("COUNTRY")["National_kg_per_hr"]
    .mean()
    .reset_index()
    .rename(columns={"COUNTRY": "Country/Area",
                     "National_kg_per_hr": "Average_National_kg_per_hr"})
)

In [62]:
# ----------------------------------------------------
# Merge plume averages with national averages
# ----------------------------------------------------
merged_avg = plume_avg_country.merge(
    national_avg_country,
    on="Country/Area",
    how="left"
)

# ----------------------------------------------------
# Compute average percent detected per country
# ----------------------------------------------------
merged_avg["Average_Percent_Detected"] = (
    merged_avg["Average_Plumekg_per_hr"] / merged_avg["Average_National_kg_per_hr"] * 100
)

In [63]:
merged_avg.to_csv("plume_percentage.csv", index=False)