# scmrun-decoder

Intention: to take scmrun emissions file and convert it into an xarray emissions file suitable for running fair.

This interface will be built into the fair adapter in openscm-runner

Question: will the species names always follow what is in RCMIP? I think so, because the "infilled" variable names are transformed as in https://github.com/iiasa/climate-assessment/blob/main/src/climate_assessment/climate/wg3.py in climate-assessment

In [None]:
from scmdata import ScmRun
import numpy as np
import pandas as pd
import pooch
import datetime as dt
import xarray as xr

In [None]:
rcmip_emissions = pooch.retrieve(
    url = "https://zenodo.org/records/4589756/files/rcmip-emissions-annual-means-v5-1-0.csv",
    known_hash = "md5:4044106f55ca65b094670e7577eaf9b3"
)

In [None]:
scmrun = ScmRun(rcmip_emissions, lowercase_cols=True)

In [None]:
scmrun.timeseries()

In [None]:
# isolate only ssp scenarios; tier 1 will do for demonstration
scenarios = ('ssp119', 'ssp126', 'ssp245', 'ssp370', 'ssp585')

In [None]:
variables = (
    'Emissions|BC',
    'Emissions|CH4',
    'Emissions|CO',
    'Emissions|CO2|MAGICC AFOLU',
    'Emissions|CO2|MAGICC Fossil and Industrial',
    'Emissions|F-Gases|HFC|HFC125',
    'Emissions|F-Gases|HFC|HFC134a',
    'Emissions|F-Gases|HFC|HFC143a',
    'Emissions|F-Gases|HFC|HFC152a',
    'Emissions|F-Gases|HFC|HFC227ea',
    'Emissions|F-Gases|HFC|HFC23',
    'Emissions|F-Gases|HFC|HFC236fa',
    'Emissions|F-Gases|HFC|HFC245fa',
    'Emissions|F-Gases|HFC|HFC32',
    'Emissions|F-Gases|HFC|HFC365mfc',
    'Emissions|F-Gases|HFC|HFC4310mee',
    'Emissions|F-Gases|NF3',
    'Emissions|F-Gases|PFC|C2F6',
    'Emissions|F-Gases|PFC|C3F8',
    'Emissions|F-Gases|PFC|C4F10',
    'Emissions|F-Gases|PFC|C5F12',
    'Emissions|F-Gases|PFC|C6F14',
    'Emissions|F-Gases|PFC|C7F16',
    'Emissions|F-Gases|PFC|C8F18',
    'Emissions|F-Gases|PFC|CF4',
    'Emissions|F-Gases|PFC|cC4F8',
    'Emissions|F-Gases|SF6',
    'Emissions|F-Gases|SO2F2',
    'Emissions|Montreal Gases|CCl4',
    'Emissions|Montreal Gases|CFC|CFC11',
    'Emissions|Montreal Gases|CFC|CFC113',
    'Emissions|Montreal Gases|CFC|CFC114',
    'Emissions|Montreal Gases|CFC|CFC115',
    'Emissions|Montreal Gases|CFC|CFC12',
    'Emissions|Montreal Gases|CH2Cl2',
    'Emissions|Montreal Gases|CH3Br',
    'Emissions|Montreal Gases|CH3CCl3',
    'Emissions|Montreal Gases|CH3Cl',
    'Emissions|Montreal Gases|CHCl3',
    'Emissions|Montreal Gases|HCFC141b',
    'Emissions|Montreal Gases|HCFC142b',
    'Emissions|Montreal Gases|HCFC22',
    'Emissions|Montreal Gases|Halon1202',
    'Emissions|Montreal Gases|Halon1211',
    'Emissions|Montreal Gases|Halon1301',
    'Emissions|Montreal Gases|Halon2402',
    'Emissions|N2O',
    'Emissions|NH3',
    'Emissions|NOx',
    'Emissions|OC',
    'Emissions|Sulfur',
    'Emissions|VOC',
)

In [None]:
scmrun = scmrun.filter(scenario=scenarios, variable=variables, region='World')

In [None]:
# # this is the input that openscm-runner wants
#     replacements_variables = {
#         r".*\|Infilled\|": "",
#         "AFOLU": "MAGICC AFOLU",
#         "Energy and Industrial Processes": "MAGICC Fossil and Industrial",
#         "HFC43-10": "HFC4310mee",
#         # "Sulfur": "SOx",
#         # "VOC": "NMVOC",
#         r"HFC\|": "",
#         r"PFC\|": "",
#         "HFC245ca": "HFC245fa",  # still needed?
#     }

In [None]:
(scmrun.time_points.years()[0])

In [None]:
for variable in scmrun.get_unique_meta("variable"):
    in_unit = scmrun.filter(variable=variable).get_unique_meta(
        "unit", no_duplicates=True
    )
    print(in_unit)

In [None]:
in_unit = scmrun.filter(variable='Emissions|CO2|MAGICC Fossil and Industrial').get_unique_meta(
    "unit", no_duplicates=True
)
print(in_unit)

scmrun.convert_unit('Gt CO2/yr', variable='Emissions|CO2|MAGICC Fossil and Industrial').filter(variable='Emissions|CO2|MAGICC Fossil and Industrial').timeseries()

In [None]:
EMISSIONS_SPECIES_UNITS_CONTEXT = dict(
    (
        ("|CO2|MAGICC Fossil and Industrial", "GtCO2 / yr"),
        ("|CO2|MAGICC AFOLU", "GtCO2 / yr"),
        ("|CH4", "MtCH4 / yr"),
        ("|N2O", "MtN2O / yr"),
        ("|Sulfur", "MtSO2 / yr"),
        ("|CO", "MtCO / yr"),
        ("|VOC", "MtNMVOC / yr"),
        ("|NOx", "MtNOx / yr"),
        ("|BC", "MtBC / yr"),
        ("|OC", "MtOC / yr"),
        ("|NH3", "MtNH3 / yr"),
        ("|CF4", "ktCF4 / yr"),
        ("|C2F6", "ktC2F6 / yr"),
        ("|C3F8", "ktC3F8 / yr"),
        ("|C4F10", "ktC4F10 / yr"),
        ("|C5F12", "ktC5F12 / yr"),
        ("|C6F14", "ktC6F14 / yr"),
        ("|C7F16", "ktC7F16 / yr"),
        ("|C8F18", "ktC8F18 / yr"),
        ("|cC4F8", "ktcC4F8 / yr"),
        ("|HFC23", "ktHFC23 / yr"),
        ("|HFC32", "ktHFC32 / yr"),
        ("|HFC4310mee", "ktHFC4310mee / yr"),
        ("|HFC125", "ktHFC125 / yr"),
        ("|HFC134a", "ktHFC134a / yr"),
        ("|HFC143a", "ktHFC143a / yr"),
        ("|HFC152a", "ktHFC152a / yr"),
        ("|HFC227ea", "ktHFC227ea / yr"),
        ("|HFC236fa", "ktHFC236fa / yr"),
        ("|HFC245fa", "ktHFC245fa / yr"),
        ("|HFC365mfc", "ktHFC365mfc / yr"),
        ("|SF6", "ktSF6 / yr"),
        ("|NF3", "ktNF3 / yr"),
        ("|SO2F2", "ktSO2F2 / yr"),
        ("|CFC11", "ktCFC11 / yr"),
        ("|CFC12", "ktCFC12 / yr"),
        ("|CFC113", "ktCFC113 / yr"),
        ("|CFC114", "ktCFC114 / yr"),
        ("|CFC115", "ktCFC115 / yr"),
        ("|CCl4", "ktCCl4 / yr"),
        ("|CH3CCl3", "ktCH3CCl3 / yr"),
        ("|HCFC22", "ktHCFC22 / yr"),
        ("|HCFC141b", "ktHCFC141b / yr"),
        ("|HCFC142b", "ktHCFC142b / yr"),
        ("|Halon1211", "ktHalon1211 / yr"),
        ("|Halon1202", "ktHalon1202 / yr"),
        ("|Halon1301", "ktHalon1301 / yr"),
        ("|Halon2402", "ktHalon2402 / yr"),
        ("|CH3Br", "ktCH3Br / yr"),
        ("|CH3Cl", "ktCH3Cl / yr"),
        ("|CH2Cl2", "ktCH2Cl2 / yr"),
        ("|CHCl3", "ktCHCl3 / yr"),
    )
)

In [None]:
EMISSIONS_SPECIES_UNITS_CONTEXT

In [None]:
for variable in scmrun.get_unique_meta("variable"):
    in_unit = scmrun.filter(variable=variable).get_unique_meta(
        "unit", no_duplicates=True
    )
    if variable.endswith(('|CO2|MAGICC Fossil and Industrial', '|CO2|MAGICC AFOLU')):
        prepend = '|CO2|'
    else:
        prepend = '|'
    try:
        fair_unit = EMISSIONS_SPECIES_UNITS_CONTEXT[prepend + variable.split('|')[-1]]
    except AssertionError:
        raise
    
    scmrun = scmrun.convert_unit(fair_unit, variable=variable)

In [None]:
scmrun.filter(variable='Emissions|NOx').timeseries()

In [None]:
scmrun.filter(variable='Emissions|CO2|MAGICC Fossil and Industrial').timeseries()

In [None]:
scmrun.meta[["model", "scenario"]].drop_duplicates().shape[0]

In [None]:
mod___scen = [f'{model}____{scenario}' for model, scenario in scmrun.meta[["model", "scenario"]].drop_duplicates().values]
mod___scen

In [None]:
[var[1:] for var in EMISSIONS_SPECIES_UNITS_CONTEXT.keys()]

In [None]:
variables

In [None]:
scmrun = scmrun.interpolate(
    [dt.datetime(y, 1, 1) for y in range(1750, 2501)]
)

In [None]:
for model, scenario in scmrun.meta[["model", "scenario"]].drop_duplicates().values:
    for variable in variables:
        print(scmrun.filter(model=model, scenario=scenario, variable=variable).values)

In [None]:
n_species = len(EMISSIONS_SPECIES_UNITS_CONTEXT)
years_in = scmrun.time_points.years()
timepoints = np.arange(years_in[0] + 0.5, years_in[-1] + 1)
n_timepoints = len(timepoints)
n_scenarios = scmrun.meta[["model", "scenario"]].drop_duplicates().values.shape[0]

In [None]:
# now make the emissions array
scenarios = []
species = []
data_out = np.ones((n_timepoints, n_scenarios, 1, n_species)) * np.nan
for i_scenario, (model, scenario) in enumerate(scmrun.meta[["model", "scenario"]].drop_duplicates().values):
    scenarios.append(f'{model}____{scenario}')
    for i_variable, variable in enumerate(variables):
        data_out[:, i_scenario, 0, i_variable] = scmrun.filter(model=model, scenario=scenario, variable=variable).values
        if i_scenario==0:
            species.append(variable)

In [None]:
emissions_da = xr.DataArray(
    data=data_out,
    dims=["timepoints", "scenario", "config", "specie"],
    coords=dict(
        timepoints=timepoints,
        scenario=scenarios,
        config=["generic"],
        specie=species,
    ),
)

In [None]:
emissions_da

In [None]:
emissions_da[265, 0, 0, :]