# Notebook to download RFF-SP emissions for CO2, CH4 and N2O

Note: this downloads a 1.7 GB file into the cache, but we only use the RFF emissions which are around 240 MB in total. If you want to save disk space, at the expense of downloading the file anew every time you run the script, uncomment the cells below "Delete the zipfile from cache".

In [None]:
import os
import pathlib
import zipfile

import matplotlib.pyplot as pl
import numpy as np
import pooch
import pandas as pd
from tqdm.auto import tqdm

In [None]:
zf = pooch.retrieve(
    "https://zenodo.org/record/5898729/files/RFFSPs-Final.zip",
    known_hash = None,
    progressbar=True,
)

In [None]:
os.makedirs('../data_input', exist_ok=True)

In [None]:
# we don't want to commit these, so merge with notebook 100
with zipfile.ZipFile(zf, mode='r') as z:
    [z.extract(file, path=os.path.join('..', 'data_input')) for file in z.namelist() if 'emissions/' in file]
    #z.extract(extract_files, path=os.path.join('..', 'data_input'))

## Delete the zipfile from cache

In [None]:
# for f in pooch.os_cache('pooch').glob('*RFFSPs-Final.zip'):
#     os.remove(f)

## Process the RFF files into a format easier for FaIR to deal with

We also want to attach the RCMIP/CMIP6 historical emissions on to this, so while we're at it, we'll download the SSP emissions from RCMIP.

I believe that RFF used SSP2-4.5 between 2015 and 2020. **TODO** ask Marcus or re-read the Rennert paper

In [None]:
df_co2 = pd.read_csv('../data_input/emissions/rffsp_co2_emissions.csv')
df_ch4 = pd.read_csv('../data_input/emissions/rffsp_ch4_emissions.csv')
df_n2o = pd.read_csv('../data_input/emissions/rffsp_n2o_emissions.csv')

In [None]:
ssp_emissions = pooch.retrieve(
    "https://zenodo.org/record/4589756/files/rcmip-emissions-annual-means-v5-1-0.csv",
    "md5:4044106f55ca65b094670e7577eaf9b3"
)

In [None]:
df_ssp = pd.read_csv(ssp_emissions)

In [None]:
co2_hist = df_ssp.loc[(df_ssp['Region']=='World')&(df_ssp['Scenario']=='ssp245')&(df_ssp['Variable']=='Emissions|CO2'),'1750':'2020'].interpolate(axis=1).values.squeeze()
ch4_hist = df_ssp.loc[(df_ssp['Region']=='World')&(df_ssp['Scenario']=='ssp245')&(df_ssp['Variable']=='Emissions|CH4'),'1750':'2020'].interpolate(axis=1).values.squeeze()
n2o_hist = df_ssp.loc[(df_ssp['Region']=='World')&(df_ssp['Scenario']=='ssp245')&(df_ssp['Variable']=='Emissions|N2O'),'1750':'2020'].interpolate(axis=1).values.squeeze()

In [None]:
df_co2

In [None]:
os.makedirs('../data_processed/emissions_files', exist_ok=True)

In [None]:
# would be slightly better to load in default molwts from fair, and much better to use fair's inbuilt
# unit converter
molwt_co2 = 44.009
molwt_c   = 12.011
molwt_n2o = 44.013
molwt_n2  = 28.014
mt_to_gt  = 0.001

for sample in tqdm(range(1, 10001)):
    emissions = np.zeros((551, 3))
    co2 = df_co2[df_co2['sample']==sample].value.values
    ch4 = df_ch4[df_ch4['sample']==sample].value.values
    n2o = df_n2o[df_n2o['sample']==sample].value.values
    emissions[:270, 0] = co2_hist[:-1] * mt_to_gt
    emissions[:270, 1] = ch4_hist[:-1]
    emissions[:270, 2] = n2o_hist[:-1] * mt_to_gt
    emissions[270:, 0] = co2 * molwt_co2 / molwt_c
    emissions[270:, 1] = ch4
    emissions[270:, 2] = n2o * molwt_n2o / molwt_n2
    df_out = pd.DataFrame(emissions, columns=['CO2', 'CH4', 'N2O'], index=range(1750,2301))
    df_out.to_csv('../data_processed/emissions_files/emissions%05d.csv' % sample)

In [None]:
pl.plot(np.arange(1750, 2301), emissions[:,0], label='CO2 (GtCO2)')
pl.plot(np.arange(1750, 2301), emissions[:,1], label='CH4 (MtCH4)')
pl.plot(np.arange(1750, 2301), emissions[:,2], label='N2O (MtN2O)')
pl.legend()

## Remove intermediate RFF datafiles

Reclaim 240 MB

In [None]:
os.remove('../data_input/emissions/rffsp_co2_emissions.csv')
os.remove('../data_input/emissions/rffsp_ch4_emissions.csv')
os.remove('../data_input/emissions/rffsp_n2o_emissions.csv')
os.removedirs('../data_input/emissions')