# Silicone
### Erica Simon, 02/13/24
## Purpose: use the Silicone infilling tool to fill missing emissions data for certain species
Credit: 
- Lamboll, R. D., Nicholls, Z. R. J., Kikstra, J. S., Meinshausen, M., & Rogelj, J. (2020). Silicone v1.0.0: an open-source Python package for inferring missing emissions data for climate change research. *Geoscientific Model Development, 13(11),* 5259–5275. https://doi.org/10.5194/gmd-13-5259-2020
- Meinshausen, M., Lewis, J., McGlade, C. et al. Realization of Paris Agreement pledges may limit warming just below 2 °C. *Nature* 604, 304–309 (2022). https://doi.org/10.1038/s41586-022-04553-z
    - code at https://github.com/climate-resource/ndc-realisations-2021





In [1]:
import pandas as pd
import numpy as np
import pyam

import silicone.multiple_infillers as mi
import silicone.database_crunchers as cr

<IPython.core.display.Javascript object>

In [2]:
import silicone.database_crunchers
from silicone.time_projectors import ExtendLatestTimeQuantile
import scmdata
import scmdata.database
import matplotlib.pyplot as plt

from tqdm.autonotebook import tqdm

  import tqdm.autonotebook as tqdman


In [3]:
future_df = pd.read_csv('~/outputs/GCAM_infilling2.csv')
hist_df = pd.read_csv('~/outputs/hist_emis_ALL.csv')

df_to_infill = pyam.IamDataFrame(future_df)
df = pyam.IamDataFrame(hist_df)

  df.set_index(index + REQUIRED_COLS + extra_cols)
  df.set_index(index + REQUIRED_COLS + extra_cols)


In [4]:
missing_vars = np.setdiff1d(hist_df['Variable'].unique(), future_df['Variable'].unique())

In [5]:
lead = ['Emissions|CO2 FFI']
variables_of_interest = ['Emissions|C3F8']
years_list = list(range(2022, 2101))

In [6]:
unavailable_variables = [
        variab for variab in variables_of_interest if variab not in df.variable
    ]


In [7]:
unavailable_variables

[]

In [8]:
df

<class 'pyam.core.IamDataFrame'>
Index:
 * model    : Historical (1)
 * scenario : GCP+CEDS+PRIMAP+GFED (1)
Timeseries data coordinates:
   region   : World (1)
   variable : Emissions|BC, Emissions|C2F6, Emissions|C3F8, ... Emissions|c-C4F8 (51)
   unit     : Gt CO2/yr, Mt BC/yr, Mt CH4/yr, Mt CO/yr, Mt N2O/yr, ... kt cC4F8/yr (50)
   year     : 1750, 1751, 1752, 1753, 1754, 1755, 1756, 1757, ... 2022 (273)

In [9]:
df_to_infill

<class 'pyam.core.IamDataFrame'>
Index:
 * model    : GCAM 6.0 NGFS (1)
 * scenario : Below 2 C, Current Policies, Delayed transition, ... Net Zero 2050 (7)
Timeseries data coordinates:
   region   : World (1)
   variable : Emissions|BC, Emissions|C2F6, Emissions|C3F8, ... Emissions|c-C4F8 (51)
   unit     : Gt CO2/yr, Mt BC/yr, Mt CH4/yr, Mt CO/yr, Mt N2O/yr, ... kt cC4F8/yr (50)
   year     : 1750, 1751, 1752, 1753, 1754, 1755, 1756, 1757, ... 2100 (351)

In [10]:
df_infilled = mi.infill_all_required_variables(
    df_to_infill,
    df,
    variable_leaders=lead,
    required_variables_list=variables_of_interest, # If None, would infill a default list
    cruncher=cr.QuantileRollingWindows,
    output_timesteps=years_list,
    infilled_data_prefix=None,
    to_fill_old_prefix=None,
    check_data_returned=False,
)

  wide_db = wide_db.applymap(lambda x: np.nan if isinstance(x, str) else x)
  self.meta[name] = meta[name].combine_first(self.meta[name])
Filling required variables: 100%|██████████| 1/1 [00:00<00:00, 29.21it/s]


In [17]:
df_infilled.filter(variable=variables_of_interest[0]).timeseries()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,2022
model,scenario,region,variable,unit,Unnamed: 5_level_1
GCAM 6.0 NGFS,Below 2 C,World,Emissions|C3F8,kt C3F8/yr,0.380074
GCAM 6.0 NGFS,Current Policies,World,Emissions|C3F8,kt C3F8/yr,0.380074
GCAM 6.0 NGFS,Delayed transition,World,Emissions|C3F8,kt C3F8/yr,0.380074
GCAM 6.0 NGFS,Fragmented World,World,Emissions|C3F8,kt C3F8/yr,0.380074
GCAM 6.0 NGFS,Low demand,World,Emissions|C3F8,kt C3F8/yr,0.380074
GCAM 6.0 NGFS,NDCs,World,Emissions|C3F8,kt C3F8/yr,0.380074
GCAM 6.0 NGFS,Net Zero 2050,World,Emissions|C3F8,kt C3F8/yr,0.380074


In [12]:
df_to_infill.filter(variable=variables_of_interest[0]).timeseries().head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,1750,1751,1752,1753,1754,1755,1756,1757,1758,1759,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
model,scenario,region,variable,unit,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
GCAM 6.0 NGFS,Below 2 C,World,Emissions|C3F8,kt C3F8/yr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.314138,0.323186,0.304124,0.319107,0.324656,0.324586,0.346019,0.36034,0.377938,0.380074
GCAM 6.0 NGFS,Current Policies,World,Emissions|C3F8,kt C3F8/yr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.314138,0.323186,0.304124,0.319107,0.324656,0.324586,0.346019,0.36034,0.377938,0.380074
GCAM 6.0 NGFS,Delayed transition,World,Emissions|C3F8,kt C3F8/yr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.314138,0.323186,0.304124,0.319107,0.324656,0.324586,0.346019,0.36034,0.377938,0.380074
GCAM 6.0 NGFS,Fragmented World,World,Emissions|C3F8,kt C3F8/yr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.314138,0.323186,0.304124,0.319107,0.324656,0.324586,0.346019,0.36034,0.377938,0.380074
GCAM 6.0 NGFS,Low demand,World,Emissions|C3F8,kt C3F8/yr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.314138,0.323186,0.304124,0.319107,0.324656,0.324586,0.346019,0.36034,0.377938,0.380074


In [13]:
def extend_timeseries(infilling_database, scenario, lead='Emissions|CO2 FFI', smoothing=0):
    cruncher = silicone.time_projectors.ExtendLatestTimeQuantile(
        # infilling_database.filter(year=range(2022, 2101, 5)).to_iamdataframe()
        infilling_database.filter(year=range(2022, 2101, 5))
        
    )
    
    filler = cruncher.derive_relationship(lead, smoothing=smoothing)

    # scenario["variable"] = lead
    # extended_scenario = filler(scenario.to_iamdataframe())
    extended_scenario = filler(scenario)
    extended_scenario = scmdata.ScmRun(scenario.append(extended_scenario)).resample(
        "AS"
    )
    extended_scenario["stage"] = "extended"
    return extended_scenario


In [14]:
selected_scenarios = df_to_infill.filter(
    year=range(2022, 2100 + 1)
)

In [15]:
extended_scenario_all = scmdata.run_append(
    [
        extend_timeseries(df_to_infill, selected_scenarios)
        # for p in tqdm(pathways)
    ]
)
# extended_scenario_2050 = extended_scenario_all.filter(pathway_id=SELECTED_PATHWAY)

ValueError: The infiller database does not extend in time past the target database, so no infilling can occur.