# future_emis
## Purpose: input IAM projections of future emissions under different policy scenarios
Data Credit: 
- Richters, O. *et al.* (2023). *NGFS Climate Scenarios Data Set* (4.1). Zenodo. https://doi.org/10.5281/ZENODO.10079020



### 01/29/24, Erica Simon

In [116]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

## Import historical emissions dataset 
`hist_emis`

In [134]:
hist_emis = pd.read_csv('~/outputs/hist_emis_ALL.csv')

hist_emis.head()

Unnamed: 0,Model,Scenario,Region,Variable,Unit,1750,1751,1752,1753,1754,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Historical,GCP+CEDS+PRIMAP+GFED,World,Emissions|BC,Mt BC/yr,2.096766,2.071972,2.067178,2.070382,2.098586,...,7.842259,7.934828,7.926371,7.56806,7.562319,7.459095,7.781842,6.525021,6.871947,6.701702
1,Historical,GCP+CEDS+PRIMAP+GFED,World,Emissions|C2F6,kt C2F6/yr,0.0,0.0,0.0,0.0,0.0,...,1.003458,1.035565,0.933936,0.979945,0.99698,0.996764,1.062577,1.10655,1.160588,1.167145
2,Historical,GCP+CEDS+PRIMAP+GFED,World,Emissions|C3F8,kt C3F8/yr,0.0,0.0,0.0,0.0,0.0,...,0.314138,0.323186,0.304124,0.319107,0.324656,0.324586,0.346019,0.36034,0.377938,0.380074
3,Historical,GCP+CEDS+PRIMAP+GFED,World,Emissions|C4F10,kt C4F10/yr,0.0,0.0,0.0,0.0,0.0,...,0.070672,0.070641,0.067024,0.070327,0.07155,0.071536,0.07626,0.079417,0.083292,0.083759
4,Historical,GCP+CEDS+PRIMAP+GFED,World,Emissions|C5F12,kt C5F12/yr,0.0,0.0,0.0,0.0,0.0,...,0.035588,0.031141,0.03003,0.031513,0.032065,0.032062,0.034184,0.035603,0.037338,0.037545


## Import & clean future emissions datasets
MESSAGE and GCAM outputs from NGFS

In [118]:
def NGFS_clean(df):
    
    # remove final row- does not contain emission data
    df = df.drop(index=df.index[-1])
    
    # rename to match FaIR spceies
    df.loc[df.Variable == 'Emissions|CO2|AFOLU', 'Variable'] = 'Emissions|CO2 AFOLU'
    df.loc[df.Variable == 'Emissions|CO2|Energy and Industrial Processes', 'Variable'] = 'Emissions|CO2 FFI'
    df.loc[df.Variable == 'Emissions|HFC|HFC125', 'Variable'] = 'Emissions|HFC-125'
    df.loc[df.Variable == 'Emissions|HFC|HFC134a', 'Variable'] = 'Emissions|HFC-134a'
    df.loc[df.Variable == 'Emissions|HFC|HFC227ea', 'Variable'] = 'Emissions|HFC-227ea'
    df.loc[df.Variable == 'Emissions|HFC|HFC143a', 'Variable'] = 'Emissions|HFC-143a'
    df.loc[df.Variable == 'Emissions|HFC|HFC23', 'Variable'] = 'Emissions|HFC-23'
    df.loc[df.Variable == 'Emissions|HFC|HFC245fa', 'Variable'] = 'Emissions|HFC-245fa'
    df.loc[df.Variable == 'Emissions|HFC|HFC32', 'Variable'] = 'Emissions|HFC-32'
    
    # get rid of species not included in hist dataset (F-Gases, HFC, PFC)
    df = df[~df['Variable'].isin(['Emissions|F-Gases', 'Emissions|HFC', 'Emissions|PFC', 'Emissions|CO2'])]
    
    # remove '(version: 1)' from scenario names
    for i in range(len(df.Scenario.values)): 
        df.Scenario.values[i] = df.Scenario.values[i][:-13]

    # rename scenarios for clarity
    df.loc[df.Scenario == 'Below 2?C', 'Scenario'] = 'Below 2 C'
    df.loc[df.Scenario == 'Nationally Determined Contributions (NDCs)', 'Scenario'] = 'NDCs'
    
    return df

In [119]:
# read in df
MSG_proj_emis = pd.read_csv('~/inputs/NGFS_MESSAGE.csv')

# clean data
MSG_proj_emis = NGFS_clean(MSG_proj_emis)
    
# rename to match FaIR spceies
MSG_proj_emis.loc[MSG_proj_emis.Variable == 'Emissions|HFC|HFC43-10', 'Variable'] = 'Emissions|HFC-4310mee'

In [120]:
# read in df
GCAM_proj_emis = pd.read_csv('~/inputs/NGFS_GCAM.csv')

# clean df
GCAM_proj_emis = NGFS_clean(GCAM_proj_emis)

Ensure that variable names in projected dataset are consistent with those of future dataset. Also identify missing species from projected dataset.

In [121]:
a = hist_emis['variable'].unique()
b = GCAM_proj_emis['Variable'].unique()
c = MSG_proj_emis['Variable'].unique()

print('species in hist but not proj:')
print('\nGCAM')
print(np.setdiff1d(a, b))
print('\nMESSAGE')
print(np.setdiff1d(a, c))

print('\nspecies in proj but not hist:')
print('\nGCAM')
print(np.setdiff1d(b, a))
print('\nMESSAGE')
print(np.setdiff1d(c, a))

species in hist but not proj:

GCAM
['Emissions|C3F8' 'Emissions|C4F10' 'Emissions|C5F12' 'Emissions|C6F14'
 'Emissions|C7F16' 'Emissions|C8F18' 'Emissions|CCl4' 'Emissions|CFC-11'
 'Emissions|CFC-113' 'Emissions|CFC-114' 'Emissions|CFC-115'
 'Emissions|CFC-12' 'Emissions|CH2Cl2' 'Emissions|CH3Br'
 'Emissions|CH3CCl3' 'Emissions|CH3Cl' 'Emissions|CHCl3'
 'Emissions|HCFC-141b' 'Emissions|HCFC-142b' 'Emissions|HCFC-22'
 'Emissions|HFC-152a' 'Emissions|HFC-236fa' 'Emissions|HFC-365mfc'
 'Emissions|HFC-4310mee' 'Emissions|Halon-1211' 'Emissions|Halon-1301'
 'Emissions|Halon-2402' 'Emissions|NF3' 'Emissions|SO2F2'
 'Emissions|c-C4F8']

MESSAGE
['Emissions|C2F6' 'Emissions|C3F8' 'Emissions|C4F10' 'Emissions|C5F12'
 'Emissions|C6F14' 'Emissions|C7F16' 'Emissions|C8F18' 'Emissions|CCl4'
 'Emissions|CF4' 'Emissions|CFC-11' 'Emissions|CFC-113'
 'Emissions|CFC-114' 'Emissions|CFC-115' 'Emissions|CFC-12'
 'Emissions|CH2Cl2' 'Emissions|CH3Br' 'Emissions|CH3CCl3'
 'Emissions|CH3Cl' 'Emissions|CHCl3'

### Fix Units

In [133]:
a = hist_emis['unit'].unique()
b = GCAM_proj_emis['Unit'].unique()
c = MSG_proj_emis['Unit'].unique()

print('\nunits in proj but not hist:')
print('\nGCAM')
print(np.setdiff1d(b, a))
print('\nMESSAGE\n')
print(np.setdiff1d(c, a))


units in proj but not hist:

GCAM
['Mt CO2/yr' 'kt N2O/yr']

MESSAGE

['Mt CO2/yr' 'kt HFC43-10/yr' 'kt N2O/yr']


Need to fix units for the following variables: 
- GCAM & MSG: 'CO2|FFI', 'CO2|AFOLU', 'N2O' 
- just MSG: 'HFC43-10'

In [129]:

# need to change N2O from kt to Mt!
# numeric_columns = GCAM_proj_emis.columns[5:] 
# c1 = GCAM_proj_emis['Variable'] == 'Emissions|N2O'
# GCAM_proj_emis.loc[c1, numeric_columns] = GCAM_proj_emis.loc[c1, numeric_columns] / 1000

# GCAM_proj_emis.loc[hist_emis.variable == 'Emissions|N2O'][2:] = hist_emis.loc[hist_emis.variable == 'Emissions|N2O'][2:] / 1000
# GCAM_proj_emis.iloc[41, 0] = 'Emissions|N2O'
# GCAM_proj_emis.iloc[41, 1] = 'kt N2O/yr'

## Save cleaned dfs

In [None]:
MSG_proj_emis.to_csv('~/outputs/NGFS_MSG_cleaned.csv', index=False)
GCAM_proj_emis.to_csv('~/outputs/NGFS_GCAM_cleaned.csv', index=False)

## Interpolate projections
- create a new df with annual timesteps as column names
- for each row in the old df
    - interpolate values
    - add new values to the new df

In [123]:
yrs = np.arange(2020, 2101, 5)
all_yrs = np.arange(2020, 2101)

In [124]:
def NGFS_interp(old_df, new_df, yrs, all_yrs):
    mdl = old_df.Model[0]
    reg = old_df.Region[0]
    
    
    # interpolate and add all values in timeseries
    for scen in old_df.Scenario.unique():
        for var in old_df.Variable.unique():
            proj = old_df.loc[old_df.Variable == var].loc[old_df.Scenario == scen]
            proj = proj.values[0][5:].astype(float)
            proj_interp = np.interp(all_yrs, yrs, proj)

            # create a list of the values we want to add to our new_df
            unit = old_df.loc[old_df.Variable == var].loc[old_df.Scenario == scen]['Unit'].values[0]
            lst = [mdl, scen, reg, var, unit] 
            for val in proj_interp.tolist():
                lst.append(val)

            # add to new_df
            new_df.loc[len(new_df)] = lst

### GCAM

In [125]:
old_df = pd.read_csv('~/outputs/NGFS_GCAM_cleaned.csv')

new_df = pd.DataFrame()
new_df[['Model', 'Scenario', 'Region', 'Variable', 'Unit']] = np.nan
new_df[all_yrs] = np.nan

mdl = old_df.Model[0]
reg = old_df.Region[0]

In [126]:
# interpolate and add all values in timeseries
for scen in old_df.Scenario.unique():
    for var in old_df.Variable.unique():
        proj = old_df.loc[old_df.Variable == var].loc[old_df.Scenario == scen]
        proj = proj.values[0][5:].astype(float)
        proj_interp = np.interp(all_yrs, yrs, proj)
        
        # create a list of the values we want to add to our new_df
        unit = old_df.loc[old_df.Variable == var].loc[old_df.Scenario == scen]['Unit'].values[0]
        lst = [mdl, scen, reg, var, unit] 
        for val in proj_interp.tolist():
            lst.append(val)
            
        # add to new_df
        new_df.loc[len(new_df)] = lst

# save as csv!
new_df.to_csv('~/outputs/NGFS_GCAM_interp.csv', index=False)

### MESSAGE

In [127]:
yrs2 = [2020, 2025, 2030, 2035, 2040,
       2045, 2050, 2055, 2060, 2070, 2080, 2090, 2100] 

old_df2 = pd.read_csv('~/outputs/NGFS_MSG_cleaned.csv')

new_df2 = pd.DataFrame()
new_df2[['Model', 'Scenario', 'Region', 'Variable', 'Unit']] = np.nan
new_df2[all_yrs] = np.nan

mdl = old_df2.Model[0]

In [128]:
# interpolate and add all values in timeseries
for scen in old_df2.Scenario.unique():
    for var in old_df2.Variable.unique():
        proj = old_df2.loc[old_df2.Variable == var].loc[old_df2.Scenario == scen]
        proj = proj.values[0][5:].astype(float)
        proj_interp = np.interp(all_yrs, yrs2, proj)
        
        # create a list of the values we want to add to our new_df
        unit = old_df2.loc[old_df2.Variable == var].loc[old_df2.Scenario == scen]['Unit'].values[0]
        lst = [mdl, scen, reg, var, unit] 
        for val in proj_interp.tolist():
            lst.append(val)
            
        # add to new_df
        new_df2.loc[len(new_df2)] = lst

# save as csv!
new_df2.to_csv('~/outputs/NGFS_MSG_interp.csv', index=False)