In [14]:
import pandas as pd
import numpy as np

In [15]:
df = pd.read_csv(
    r'C:\Users\rebek\Downloads\query_result_2025-10-28T11_08_05.248061868Z.csv', # Reads in data from all emissions metabase file
    thousands=",",       # interpret commas as thousand separators inside numbers
    quotechar='"',       # respect quoted fields
    engine="python"      # more flexible parser for tricky CSVs
)

In [28]:
# Select and rename relevant columns
df_clean = df[["YEAR", "EMISSIONS_CH4_KT", "SOURCE_ALL"]].rename(
    columns={
        "YEAR": "year",
        "EMISSIONS_CH4_KT": "emissions",
        "SOURCE_ALL": "source"
    }
)

# Drop rows without emissions data
df_clean = df_clean.dropna(subset=["emissions"])

df_clean = df_clean[df_clean['source'] != 'EIA-'] #REmove EIA- and EIA-GEM data that we don't want to show on data tool
df_clean = df_clean[df_clean['source'] != 'EIA-GEM']

# Option 1: Show global totals per year
global_by_year = df_clean.groupby("year", as_index=False)["emissions"].sum()

# Option 2: Global totals per year and source
global_by_year_source = (
    df_clean.groupby(["year", "source"], as_index=False)["emissions"].sum()
)

# Aggregate years < 2024
older = (
    global_by_year_source[global_by_year_source["year"] < 2024]
    .groupby("year", as_index=False)["emissions"]
    .sum()
    .assign(source="UNFCCC")
)

# keep >= 2024 untouched
newer = global_by_year_source[global_by_year_source["year"] >= 2024]

# Combine them back together
global_by_year_source_combine = (
    pd.concat([older, newer], ignore_index=True)
        .sort_values("year")
)

# Save results
global_by_year.to_csv("global_emissions_by_year.csv", index=False)
global_by_year_source_combine.to_csv("global_emissions_by_year_source.csv", index=False)

In [24]:
global_by_year_source

Unnamed: 0,year,emissions,source
0,1985,63.96,UNFCCC
1,1986,17.10,UNFCCC
2,1988,1025.58,UNFCCC
3,1989,368.22,UNFCCC
4,1990,21376.97,UNFCCC
...,...,...,...
220,2027,1530.72,IEA-IEA
290,2027,77.24,IEA-UNFCCC
223,2027,68.48,IEA-IEA
211,2027,1170.00,IEA-GEM


In [17]:
# Select and rename relevant columns
df_clean = df[["YEAR", "EMISSIONS_CH4_KT", "SOURCE_ALL"]].rename(
    columns={
        "YEAR": "year",
        "EMISSIONS_CH4_KT": "emissions",
        "SOURCE_ALL": "source"
    }
)

# Drop rows without emissions data
df_clean = df_clean.dropna(subset=["emissions"])

# Global totals per year and source
global_by_year_source = (
    df_clean.groupby(["year", "source"], as_index=False)["emissions"].sum()
)

global_by_year_source

Unnamed: 0,year,source,emissions
0,1985,UNFCCC,63.96
1,1986,UNFCCC,17.10
2,1988,UNFCCC,1025.58
3,1989,UNFCCC,368.22
4,1990,UNFCCC,21376.97
...,...,...,...
144,2026,IEA-IEA,34266.89
145,2026,IEA-UNFCCC,34675.47
146,2027,IEA-GEM,53988.56
147,2027,IEA-IEA,34109.41


In [18]:
# Save results
global_by_year_source.to_csv("global_emissions_by_year_source.csv", index=False)