# Infill scenarios

Use the SSP projections for non-CO2, non-CH4 and non-N2O emissions.

Use CO2 as a lead variable. This might need a two-step infilling process:
1. split CO2 into FFI and AFOLU
2. use either total or FFI to infill other species


There is NO GOOD REASON why the infiller database shouldn't be public.

In [None]:
import multiprocessing
import warnings

import matplotlib.pyplot as pl
import numpy as np
import pandas as pd
import psutil
import pooch
import pyam
import silicone.database_crunchers
from silicone.stats import rolling_window_find_quantiles
from silicone import multiple_infillers#.decompose_collection_with_time_dep_ratio.DecomposeCollectionTimeDepRatio
from silicone.utils import return_cases_which_consistently_split
from tqdm.auto import tqdm

In [None]:
WORKERS = multiprocessing.cpu_count()

In [None]:
silicone.__version__

In [None]:
pyam.__version__

In [None]:
# # this is so slow, I saved the data in a binary format
# df = pd.read_excel('../data_input/20220314_ar6emissions_harmonized_infilled.xlsx')
# df.to_pickle('../data_input/20220314_ar6emissions_harmonized_infilled.pkl')

In [None]:
# this file isn't public, but we want it to be : place on Zenodo, point all to Kikstra et al citation
# also, why is reading this in so painful?
df = pd.read_pickle('../data_input/20220314_ar6emissions_harmonized_infilled.pkl')
infiller_database = pyam.IamDataFrame(df)

In [None]:
infiller_database.timeseries()

In [None]:
infiller_database = infiller_database.filter(variable="AR6 climate diagnostics|Infilled|Emissions|*")

In [None]:
df_scens = pd.read_csv('../data_input/ar6_model_scenario_passed_vetting.csv')
model_scen_pairs = []
for irow, row in df_scens.iterrows():
    model_scen_pairs.append((row['model'], row['scenario']))
model_scen_pairs = sorted(model_scen_pairs)

In [None]:
the_slowness = []

for model, scen in tqdm(model_scen_pairs):
    the_slowness.append(infiller_database.filter(model=model, scenario=scen))

infiller_database = pyam.concat(the_slowness)

In [None]:
infiller_database.add(
    "AR6 climate diagnostics|Infilled|Emissions|CO2|AFOLU", 
    "AR6 climate diagnostics|Infilled|Emissions|CO2|Energy and Industrial Processes", 
    "AR6 climate diagnostics|Infilled|Emissions|CO2",
    axis='variable', 
    fillna=None, 
    ignore_units='Mt CO2/yr',
    append=True
)

In [None]:
database_species = infiller_database.variable

# Remove CH4 and N2O which are not being infilled
database_species.remove('AR6 climate diagnostics|Infilled|Emissions|CH4')
database_species.remove('AR6 climate diagnostics|Infilled|Emissions|N2O')

# Remove species which do not vary
database_species.remove('AR6 climate diagnostics|Infilled|Emissions|CCl4')
database_species.remove('AR6 climate diagnostics|Infilled|Emissions|CFC11')
database_species.remove('AR6 climate diagnostics|Infilled|Emissions|CFC113')
database_species.remove('AR6 climate diagnostics|Infilled|Emissions|CFC114')
database_species.remove('AR6 climate diagnostics|Infilled|Emissions|CFC115')
database_species.remove('AR6 climate diagnostics|Infilled|Emissions|CFC12')

# Remove species that are all zero
database_species.remove('AR6 climate diagnostics|Infilled|Emissions|HFC|HFC245ca')
database_species.remove('AR6 climate diagnostics|Infilled|Emissions|CH3CCl3')
database_species.remove('AR6 climate diagnostics|Infilled|Emissions|Halon1202')

# Remove aggregates
database_species.remove('AR6 climate diagnostics|Infilled|Emissions|HFC')
database_species.remove('AR6 climate diagnostics|Infilled|Emissions|PFC')
database_species.remove('AR6 climate diagnostics|Infilled|Emissions|F-Gases')

In [None]:
# zzzz
dfs = []

for sample in tqdm(range(1, 5)):
    df_in = pd.read_csv('../data_processed/emissions_files/emissions%05d.csv' % sample, index_col=0)
    co2 = df_in['CO2']
    co2_data = pd.DataFrame(co2, index=np.arange(2020, 2101)) 
    dfs.append(
        pyam.IamDataFrame(
            co2_data.T*1000, 
            model="RFF-SP",
            scenario="{:05d}".format(sample),
            region="World",
            unit="Mt CO2/yr",
            variable='AR6 climate diagnostics|Infilled|Emissions|CO2',
        )
    )

In [None]:
# this is unbearable

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    pyam_co2_data = pyam.concat(dfs)
pyam_co2_data.timeseries()

In [None]:
components = [
    "AR6 climate diagnostics|Infilled|Emissions|CO2|Energy and Industrial Processes",
    "AR6 climate diagnostics|Infilled|Emissions|CO2|AFOLU",
]
aggregate = "AR6 climate diagnostics|Infilled|Emissions|CO2"
to_infill = pyam_co2_data.filter(variable=aggregate)

decomposer = multiple_infillers.DecomposeCollectionTimeDepRatio(infiller_database)
results = decomposer.infill_components(aggregate, components, to_infill)

In [None]:
results.timeseries()

In [None]:
database_species_except_total_co2 = [
    specie for specie in database_species if specie not in [
        "AR6 climate diagnostics|Infilled|Emissions|CO2",
        "AR6 climate diagnostics|Infilled|Emissions|CO2|Energy and Industrial Processes",
        "AR6 climate diagnostics|Infilled|Emissions|CO2|AFOLU",
    ]
]

In [None]:
# infilled = multiple_infillers.infill_all_required_variables(
#     to_infill, database, [target], output_timesteps=list(range(2020, 2101, 10))
# )

In [None]:
pyam_emissions = []
inner_list = []

lead = ["AR6 climate diagnostics|Infilled|Emissions|CO2"]
for sample in tqdm(range(1, 5)):
    cruncher = silicone.database_crunchers.QuantileRollingWindows(infiller_database)
    for follow in tqdm(database_species_except_total_co2, leave=False):
        filler = cruncher.derive_relationship(follow, lead)  # the quantile could be randomly selected , quantile=quantiles[scen]
        filler_input = pyam_co2_data.filter(
            model="RFF-SP", scenario="{:05d}".format(sample)
        )
        filler_data = filler_input.filter(
            year=infiller_database["year"].unique(), variable=lead
        )
        qrw_infilled = filler(filler_data)
        inner_list.append(qrw_infilled.filter(variable=follow))
pyam_emissions = pyam.concat(inner_list)

In [None]:
def run_fair(args):
    thisC, thisF, thisT, _, thisOHU, _, thisAF = fair.forward.fair_scm(**args)
    return (
        thisC[:,0], 
        thisF[:,31], 
        np.sum(thisF[:,35:40], axis=1),
        thisF[:,40],
        np.sum(thisF[:,:43], axis=1),
        np.sum(thisF, axis=1),
        thisT,
        thisOHU,
        thisAF
    )

if __name__ == '__main__':
    with Pool(28) as pool:
        result = list(tqdm(pool.imap(run_fair, arglist), total=SAMPLES))

In [None]:
def run(sample):
    inner_list = []

    lead = ["AR6 climate diagnostics|Infilled|Emissions|CO2"]
    cruncher = silicone.database_crunchers.QuantileRollingWindows(infiller_database)
    for follow in tqdm(database_species_except_total_co2, leave=False):
        filler = cruncher.derive_relationship(follow, lead)  # the quantile could be randomly selected , quantile=quantiles[scen]
        filler_input = pyam_co2_data.filter(
            model="RFF-SP", scenario="{:05d}".format(sample)
        )
        filler_data = filler_input.filter(
            year=infiller_database["year"].unique(), variable=lead
        )
        qrw_infilled = filler(filler_data)
        inner_list.append(qrw_infilled.filter(variable=follow))
    pyam_emissions = pyam.concat(inner_list)
    return(pyam_emissions)

In [None]:
if __name__ == '__main__':
    with multiprocessing.Pool(WORKERS) as pool:
        result = list(tqdm(pool.imap(run, np.arange(1, 5, dtype=int)), total=4))

In [None]:
pyam_emissions.timeseries()

In [None]:
pyam_emissions = pyam_emissions.append(results)

In [None]:
pyam_emissions.to_csv('../data_processed/infilled_emissions_scenarios.csv')

In [None]:
psutil.cpu_count(logical=False)