# 1.1 Process the PRIMAP data product

[PRIMAP](https://zenodo.org/records/13752654) data repository

In [1]:
import os
from pathlib import Path

import pandas as pd

In [2]:
# load the data, here I am loading the data over HTTP
data_url = "https://zenodo.org/records/13752654/files/Guetschow_et_al_2024a-PRIMAP-hist_v2.6_final_13-Sep-2024.csv"
df_raw = pd.read_csv(data_url)

In [3]:
# output directory
processed_dir = Path(os.path.abspath("../data/processed/"))
processed_dir.mkdir(parents=True, exist_ok=True)

In [4]:
df_raw.head()

Unnamed: 0,source,scenario (PRIMAP-hist),provenance,area (ISO3),entity,unit,category (IPCC2006_PRIMAP),1750,1751,1752,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,PRIMAP-hist_v2.6_final,HISTCR,derived,ABW,CH4,CH4 * gigagram / a,1,0.00564,0.00567,0.00571,...,0.12,0.114,0.119,0.119,0.124,0.125,0.122,0.124,0.125,0.124
1,PRIMAP-hist_v2.6_final,HISTCR,derived,ABW,CH4,CH4 * gigagram / a,1.A,0.00564,0.00567,0.00571,...,0.0513,0.049,0.0515,0.0501,0.0543,0.0549,0.0545,0.0535,0.0548,0.0543
2,PRIMAP-hist_v2.6_final,HISTCR,derived,ABW,CH4,CH4 * gigagram / a,1.B,0.0,0.0,0.0,...,0.0688,0.0654,0.0675,0.0688,0.0699,0.07,0.0673,0.0704,0.0704,0.07
3,PRIMAP-hist_v2.6_final,HISTCR,derived,ABW,CH4,CH4 * gigagram / a,1.B.1,0.0,0.0,0.0,...,0.0688,0.0654,0.0675,0.0688,0.0699,0.07,0.0673,0.0704,0.0704,0.07
4,PRIMAP-hist_v2.6_final,HISTCR,derived,ABW,CH4,CH4 * gigagram / a,1.B.2,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
df_tmp = (
    df_raw
    .loc[df_raw["category (IPCC2006_PRIMAP)"] == "M.0.EL"]
    .loc[df_raw["entity"] == "KYOTOGHG (AR6GWP100)"]
    .loc[df_raw["scenario (PRIMAP-hist)"]== "HISTTP"] # HISTTP is third party and HISTCR is crountry reported
    .drop(columns=["source", "scenario (PRIMAP-hist)", "provenance", "category (IPCC2006_PRIMAP)", "unit", "entity"])
    .rename(columns={"area (ISO3)": "iso3"})
    .sort_values(by="iso3")
)

# ensure all column names are strings
df_tmp.columns = df_tmp.columns.astype(str)

# pivot the dataset (only select data from 1850 onwards)
start_year = 1850
id_vars = [val for val in list(df_tmp.columns) if not val.isdigit()]
value_vars = [val for val in list(df_tmp.columns) if val.isdigit() and int(val)>=start_year]
df_final = (
    df_tmp[id_vars + value_vars]
    .melt(id_vars=id_vars, value_vars=value_vars, var_name="year", value_name="emissions_gg")  
    .sort_values(by=["iso3", "year"])
    .assign(emissions_gt=lambda x: x['emissions_gg'].apply(lambda val: val / 1000000))
    .drop(columns=['emissions_gg'])
)

In [6]:
df_final.head()

Unnamed: 0,iso3,year,emissions_gt
0,ABW,1850,6.51e-07
215,ABW,1851,6.58e-07
430,ABW,1852,6.66e-07
645,ABW,1853,6.78e-07
860,ABW,1854,6.9e-07


In [7]:
df_final.to_csv(processed_dir / "primap-histtp-ghg-without-lulucf-1850-2023.csv", index=False)