# Make concentrations time series

- use AR6
- interpolate
- fill
- document!

A TODO is to fill in the gases that aren't in the Meinshausen et al. dataset but are in AR6 (a few CFCs and HFCs). Since they are not included in SSP projections, it may be unnecessary for now.

In [None]:
import numpy as np
import pandas as pd
import pooch

In [None]:
df_conc = pd.read_csv('../data/ar6_ghg_concentrations/LLGHG_history_AR6_v9_for_archive.csv', skiprows=22, index_col=0)
for year in range(1751, 1850):
    df_conc.loc[year] = np.nan #.interpolate()
df_conc.sort_index(inplace=True)
df_conc.index.name = None

In [None]:
# Interpolate 1750-1850 years with scaled Meinshausen for CO2, CH4, N2O
rcmip_concentration_file = pooch.retrieve(
    url=(
        "doi:10.5281/zenodo.4589756/"
        "rcmip-concentrations-annual-means-v5-1-0.csv"
    ),
    known_hash="md5:0d82c3c3cdd4dd632b2bb9449a5c315f",
)

df_conc_cmip6 = pd.read_csv(rcmip_concentration_file)

In [None]:
for gas in ['CO2', 'CH4', 'N2O']:
    cmip6 = df_conc_cmip6.loc[
        (df_conc_cmip6['Variable']==f"Atmospheric Concentrations|{gas}")&
        (df_conc_cmip6['Region']=='World')&
        (df_conc_cmip6['Scenario']=='historical'),
        '1750':'1850'
    ].values.squeeze()
    df_conc.loc[1751:1849, gas] = (
        (cmip6-cmip6[0]) / 
        (cmip6[-1]-cmip6[0]) * 
        (df_conc.loc[1850, gas] - df_conc.loc[1750, gas]) 
        + df_conc.loc[1750, gas]
    )[1:-1]

In [None]:
# for minor GHGs where 1750 and 1850 are provided and the same, fill in intermediate years
df_conc.loc[1751:1849, 'HFC-134a':'n-C6F14'] = df_conc.loc[1750, 'HFC-134a':'n-C6F14'].values
df_conc.loc[1751:1849, 'C7F16':'C8F18'] = df_conc.loc[1750, 'C7F16':'C8F18'].values
df_conc

In [None]:
# drop gases not in Meinshausen dataset
df_conc.drop(
    columns=[
        'i-C6F14', 'CFC-112', 'CFC-112a', 'CFC-113a', 'CFC-114a', 'HCFC-133a', 'HCFC-31', 'HCFC-124'
    ], 
inplace=True)

In [None]:
# For gases where concentration time series does not run to 2019, just assume persistence for now.
# it's likely not a terrible approximation. Some are rising, some are stable, some are falling, but the rates of change
# in most cases aren't big, and the overall contribution to forcing will be tiny.

# Note that for the gases not part of CMIP6, there are many missing years of data between 1750 and the first 
# measurement. When we come to filling these in eventually, linear interpolation would not be suitable here.

df_conc = df_conc.interpolate()

In [None]:
df_conc = df_conc.rename(columns={'n-C4F10': 'C4F10', 'n-C5F12': 'C5F12', 'n-C6F14': 'C6F14'})

In [None]:
df_conc = df_conc.T

In [None]:
df_conc.to_csv('../data/ar6_ghg_concentrations/wmghgs_1750-2019.csv')