# Harmonize RCPs

because recalibrating the historical is too damn hard

In [None]:
import aneris
import scmdata
import datetime
import pyam
import pandas as pd
import numpy as np

import aneris.convenience

In [None]:
aneris.__version__

In [None]:
# only try and harmonize species common to both!
variables = [
    'Emissions|CO2|MAGICC Fossil and Industrial',
    'Emissions|CO2|MAGICC AFOLU',
    'Emissions|CH4',
    'Emissions|N2O',
    'Emissions|Sulfur',
    'Emissions|CO',
    'Emissions|VOC',
    'Emissions|NOx',
    'Emissions|BC',
    'Emissions|OC',
    'Emissions|NH3',
    'Emissions|F-Gases|PFC|CF4',
    'Emissions|F-Gases|PFC|C2F6',
    'Emissions|F-Gases|PFC|C6F14',
    'Emissions|F-Gases|HFC|HFC23',
    'Emissions|F-Gases|HFC|HFC32',
    'Emissions|F-Gases|HFC|HFC4310mee',
    'Emissions|F-Gases|HFC|HFC125',
    'Emissions|F-Gases|HFC|HFC134a',
    'Emissions|F-Gases|HFC|HFC143a',
    'Emissions|F-Gases|HFC|HFC227ea',
    'Emissions|F-Gases|HFC|HFC245fa',
    'Emissions|F-Gases|SF6',
    'Emissions|Montreal Gases|CFC|CFC11',
    'Emissions|Montreal Gases|CFC|CFC12',
    'Emissions|Montreal Gases|CFC|CFC113',
    'Emissions|Montreal Gases|CFC|CFC114',
    'Emissions|Montreal Gases|CFC|CFC115',
    'Emissions|Montreal Gases|CCl4',
    'Emissions|Montreal Gases|CH3CCl3',
    'Emissions|Montreal Gases|HCFC22',
    'Emissions|Montreal Gases|HCFC141b',
    'Emissions|Montreal Gases|HCFC142b',
    'Emissions|Montreal Gases|Halon1211',
    'Emissions|Montreal Gases|Halon1202',
    'Emissions|Montreal Gases|Halon1301',
    'Emissions|Montreal Gases|Halon2402',
    'Emissions|Montreal Gases|CH3Br',
    'Emissions|Montreal Gases|CH3Cl',
]

In [None]:
len(variables)

In [None]:
df_hist = scmdata.ScmRun(
    '../data/rcmip/rcmip-emissions-annual-means-v5-1-0.csv',
    lowercase_cols=True
).filter(region='World', scenario='historical', variable=variables)

In [None]:
df_hist

In [None]:
times = []
yearfaff = range(1750, 2016)
for year in yearfaff:
    times.append(datetime.datetime(year, 1, 1))

In [None]:
df_hist = df_hist.interpolate(target_times=times)

In [None]:
def add_year_historical_percentage_offset(df, dfhist, yr=2015, low_yr=2010):
    """
    add a harmonization year based on the percentage difference with historical data
    """
    if yr not in df.columns:
        df[yr] = None
        df[yr] = pd.to_numeric(df[yr])

    df2015 = df[~df[yr].isnull()]
    dfno2015 = df[df[yr].isnull()].copy()
    
    if low_yr in dfno2015.columns:
        dfhist_low = dfhist[low_yr].reset_index(["model", "scenario"], drop=True)
        dfhist_yr = dfhist[yr].reset_index(["model", "scenario"], drop=True)

        dfno2015_low = dfno2015[[low_yr]]
        
        relative_diff = (
            dfno2015_low.subtract(dfhist_low, axis=0)
            .divide(dfhist_low, axis=0)
            .dropna()
        )
        if relative_diff.shape[0] != dfno2015.shape[0]:
            raise AssertionError("Some data will not get adjusted properly")

        fill_values = (
            relative_diff.multiply(dfhist_yr, axis=0)
            .add(dfhist_yr, axis=0)
            .dropna()
            .rename({low_yr: yr}, axis="columns")
        )
        if fill_values.shape[0] != dfno2015.shape[0]:
            raise AssertionError("Some data will not get adjusted properly")

        dfno2015[yr] = fill_values[yr].reorder_levels(dfno2015.index.names)
        df = pd.concat([df2015, dfno2015])
    else:
        raise KeyError(f"{low_yr} not in `dfno2015`")

    return df

In [None]:
harmonisation_year = 2015

In [None]:
df_rcp = scmdata.ScmRun(
    '../data/rcmip/rcmip-emissions-annual-means-v5-1-0.csv',
    lowercase_cols=True
).filter(region='World', scenario='rcp*', variable=variables)

In [None]:
times = []
yearfaff = range(2000, 2301)
for year in yearfaff:
    times.append(datetime.datetime(year, 1, 1))
df_rcp = df_rcp.interpolate(target_times=times)

In [None]:
df_rcp

In [None]:
prefix = ''

scenarios = df_rcp.copy()
scenarios = scenarios.timeseries(time_axis="year")
# df_hist["variable"] = df_hist["variable"].apply(
#     lambda x: x.replace(f"{prefix}|", "").replace("|Unharmonized", "")
# )
df_hist["unit"] = df_hist["unit"].str.replace("-equiv", "").str.replace("-", "")
history = df_hist.filter(year=range(1990, 2020)).timeseries(time_axis="year")

# TODO: remove hard-coding
# not sure about this. can we put 2019 in?
historical_offset_add_year = 2015
historical_offset_base_year = 2010

if harmonisation_year == historical_offset_add_year:
    scenarios = add_year_historical_percentage_offset(
        scenarios,
        history,
        yr=historical_offset_add_year,
        low_yr=historical_offset_base_year,
    )
    
output_timesteps = range(harmonisation_year, 2300 + 1)
scenarios = pyam.IamDataFrame(scenarios).interpolate(output_timesteps)
scenarios = scenarios.filter(year=output_timesteps).timeseries()

In [None]:
overrides = pd.DataFrame(
    [
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|BC",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2150_cov",
            "variable": "Emissions|PFC",
        },  # high historical variance (cov=16.2)
        {
            "method": "reduce_ratio_2150_cov",
            "variable": "Emissions|F-Gases|PFC|C2F6",
        },  # high historical variance (cov=16.2)
        {
            "method": "reduce_ratio_2150_cov",
            "variable": "Emissions|F-Gases|PFC|C6F14",
        },  # high historical variance (cov=15.4)
        {
            "method": "reduce_ratio_2150_cov",
            "variable": "Emissions|F-Gases|PFC|CF4",
        },  # high historical variance (cov=11.2)
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|CH4",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2150_cov",
            "variable": "Emissions|CO",
        },  # high historical variance (cov=15.4)
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|CO2",
        },  # always ratio method by choice
        {
            "method": "reduce_offset_2150_cov",
            "variable": "Emissions|CO2|MAGICC AFOLU",
        },  # high historical variance, but using offset method to prevent diff from increasing when going negative rapidly (cov=23.2)
        {
            "method": "reduce_ratio_2050",  # always ratio method by choice
            "variable": "Emissions|CO2|MAGICC Fossil and Industrial",
        },
        {
            "method": "constant_ratio",
            "variable": "Emissions|F-Gases",
        },  # basket not used in infilling (sum of f-gases with low model reporting confidence)
        {
            "method": "constant_ratio",
            "variable": "Emissions|HFC",
        },  # basket not used in infilling (sum of subset of f-gases with low model reporting confidence)
        {
            "method": "constant_ratio",
            "variable": "Emissions|F-Gases|HFC|HFC125",
        },  # minor f-gas with low model reporting confidence
        {
            "method": "constant_ratio",
            "variable": "Emissions|F-Gases|HFC|HFC134a",
        },  # minor f-gas with low model reporting confidence
        {
            "method": "constant_ratio",
            "variable": "Emissions|F-Gases|HFC|HFC143a",
        },  # minor f-gas with low model reporting confidence
        {
            "method": "constant_ratio",
            "variable": "Emissions|F-Gases|HFC|HFC227ea",
        },  # minor f-gas with low model reporting confidence
        {
            "method": "constant_ratio",
            "variable": "Emissions|F-Gases|HFC|HFC245fa",
        },  # minor f-gas with low model reporting confidence
        {
            "method": "constant_ratio",
            "variable": "Emissions|F-Gases|HFC|HFC23",
        },  # minor f-gas with low model reporting confidence
        {
            "method": "constant_ratio",
            "variable": "Emissions|F-Gases|HFC|HFC32",
        },  # minor f-gas with low model reporting confidence
        {
            "method": "constant_ratio",
            "variable": "Emissions|F-Gases|HFC|HFC4310mee",
        },  # minor f-gas with low model reporting confidence
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|N2O",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|NH3",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|NOx",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2150_cov",
            "variable": "Emissions|OC",
        },  # high historical variance (cov=18.5)
        {
            "method": "constant_ratio",
            "variable": "Emissions|F-Gases|SF6",
        },  # minor f-gas with low model reporting confidence
        #     {'method': 'default_aneris_tree', 'variable': 'Emissions|Sulfur'}, # depending on the decision tree in aneris/method.py
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|Sulfur",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2150_cov",
            "variable": "Emissions|VOC",
        },  # high historical variance (cov=12.0)
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|Montreal Gases|CFC|CFC11",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|Montreal Gases|CFC|CFC12",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|Montreal Gases|CFC|CFC113",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|Montreal Gases|CFC|CFC114",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|Montreal Gases|CFC|CFC115",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|Montreal Gases|CFC|CCl4",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|Montreal Gases|CFC|CH3CCl3",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|Montreal Gases|CFC|HCFC22",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|Montreal Gases|CFC|HCFC141b",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|Montreal Gases|CFC|HCFC142b",
        },  # always ratio method by choi
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|Montreal Gases|CFC|Halon1211",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|Montreal Gases|CFC|Halon1202",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|Montreal Gases|CFC|Halon1301",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|Montreal Gases|CFC|Halon2402",
        },  # always ratio method by choice
                {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|Montreal Gases|CFC|CH3Br",
        },  # always ratio method by choice
        {
            "method": "reduce_ratio_2050",
            "variable": "Emissions|Montreal Gases|CFC|CH3Cl",
        },  # always ratio method by choice
    ]
)

In [None]:
for _, msdf in scenarios.groupby(["model", "scenario"]):
    aneris.convenience.harmonise_all(
        msdf,
        history=history,
        harmonisation_year=harmonisation_year,
        overrides=overrides,
    )

In [None]:
scenarios_harmonised = [
    aneris.convenience.harmonise_all(
        msdf,
        history=history,
        harmonisation_year=harmonisation_year,
        overrides=overrides,
    )
    for _, msdf in scenarios.groupby(["model", "scenario"])
]

In [None]:
#scenarios_harmonised

In [None]:
pd.options.display.max_columns = 500
scenarios_harmonised[0].iloc[scenarios_harmonised[0].index.get_level_values('variable') == "Emissions|BC"]

In [None]:
scenarios_harmonised[0]

In [None]:
len(scenarios_harmonised[0])

In [None]:
scenarios_harmonised = pd.concat(scenarios_harmonised).reset_index()

In [None]:
np.tile(df_hist.values, (4,1))

In [None]:
scenarios_harmonised

In [None]:
for i in range(265):
    scenarios_harmonised.insert(i+7, 1750+i, np.tile(df_hist.values, (4,1))[:, i])

In [None]:
scenarios_harmonised

In [None]:
scenarios_harmonised = pyam.IamDataFrame(scenarios_harmonised)
scenarios_harmonised.to_csv('../data/rcmip/rcps_harmonized.csv')