In [19]:
import pandas as pd
# https://ec.europa.eu/eurostat/databrowser/product/view/env_air_gge
df = pd.read_csv("data/co2-eq-by-sector(EU27).csv") # unit Million tonnes of CO2 equivalent

# These categories contain all other data nested in them
categories = [
    "Energy",
    "Industrial processes and product use",
    "Agriculture",
    "Waste management",
    "Land use, land use change, and forestry (LULUCF)",
    "Other sectors"
]

df = df.rename(columns={
    "src_crf": "sector",
    "geo": "country",
    "TIME_PERIOD": "year",
    "OBS_VALUE": "value"
})

df = df[["sector", "country", "year", "value"]]

In [None]:
df = df[df['sector'].isin(categories)]

# Filter:
# 2022 latest data avaible at the time
# LULUCF sector is not included as it offers negative emission values (it does not pollute)
# "Other sectors" are not included either, as they represent a very residual value (irrelevant to the purpose)
df = df[
    (df['country'] == "European Union - 27 countries (from 2020)") &
    (df['year'] == 2022) &
    (df['sector'].isin(["Energy", "Industrial processes and product use", "Agriculture", "Waste management"]))
]

df22 = df.drop(columns=["country", "year"])
df22['percentage'] = df22['value'] / df22['value'].sum() * 100
df22 = df22.round(2)

In [21]:
# copy-paste to .js
output = "const sectors = [\n"
for i, row in df22.iterrows():
    output += f"    {row.to_dict()},\n"
output += "]"
print(output)

const sectors = [
    {'sector': 'Energy', 'value': 2603.85, 'percentage': 77.24},
    {'sector': 'Industrial processes and product use', 'value': 291.84, 'percentage': 8.66},
    {'sector': 'Agriculture', 'value': 365.72, 'percentage': 10.85},
    {'sector': 'Waste management', 'value': 109.71, 'percentage': 3.25},
]
