# future_emis
## Purpose: input IAM projections of future emissions under different policy scenarios
- Data from NGFS

### 01/29/24, Erica Simon

In [47]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

## Import historical emissions dataset 
`hist_emis`

In [48]:
hist_emis = pd.read_csv('~/inputs/all_scaled_1750-2022.csv')

# remove 'Emissions|' from variable name
for i in range(len(hist_emis.variable.values)): 
    hist_emis.variable.values[i] = hist_emis.variable.values[i][10:]

# rename to match FaIR spceies
hist_emis.loc[hist_emis.variable == 'CO2|AFOLU', 'variable'] = 'CO2 AFOLU'
hist_emis.loc[hist_emis.variable == 'CO2|Energy and Industrial Processes', 'variable'] = 'CO2 FFI'

hist_emis.head()

Unnamed: 0,model,scenario,region,variable,unit,1750,1751,1752,1753,1754,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Historical,GCP+CEDS+PRIMAP+GFED,World,BC,Mt BC/yr,2.096766,2.071972,2.067178,2.070382,2.098586,...,7.842259,7.934828,7.926371,7.56806,7.562319,7.459095,7.781842,6.525021,6.871947,6.701702
1,Historical,GCP+CEDS+PRIMAP+GFED,World,C2F6,kt C2F6/yr,0.0,0.0,0.0,0.0,0.0,...,1.003458,1.035565,0.933936,0.979945,0.99698,0.996764,1.062577,1.10655,1.160588,1.167145
2,Historical,GCP+CEDS+PRIMAP+GFED,World,C3F8,kt C3F8/yr,0.0,0.0,0.0,0.0,0.0,...,0.314138,0.323186,0.304124,0.319107,0.324656,0.324586,0.346019,0.36034,0.377938,0.380074
3,Historical,GCP+CEDS+PRIMAP+GFED,World,C4F10,kt C4F10/yr,0.0,0.0,0.0,0.0,0.0,...,0.070672,0.070641,0.067024,0.070327,0.07155,0.071536,0.07626,0.079417,0.083292,0.083759
4,Historical,GCP+CEDS+PRIMAP+GFED,World,C5F12,kt C5F12/yr,0.0,0.0,0.0,0.0,0.0,...,0.035588,0.031141,0.03003,0.031513,0.032065,0.032062,0.034184,0.035603,0.037338,0.037545


## Import & clean future emissions datasets
MESSAGE and GCAM outputs from NGFS

In [49]:
MSG_proj_emis = pd.read_csv('~/inputs/NGFS_MESSAGE.csv')

# remove final row- does not contain emission data
MSG_proj_emis = MSG_proj_emis.drop(index=MSG_proj_emis.index[-1])

# remove 'Region' column- not needed
MSG_proj_emis = MSG_proj_emis.drop(columns=['Region', 'Model'])

# remove 'Emissions|' from variable names
for i in range(len(MSG_proj_emis.Variable.values)): 
    MSG_proj_emis.Variable.values[i] = MSG_proj_emis.Variable.values[i][10:]
    
# rename to match FaIR spceies
MSG_proj_emis.loc[MSG_proj_emis.Variable == 'CO2|AFOLU', 'Variable'] = 'CO2 AFOLU'
MSG_proj_emis.loc[MSG_proj_emis.Variable == 'CO2|Energy and Industrial Processes', 'Variable'] = 'CO2 FFI'
MSG_proj_emis.loc[MSG_proj_emis.Variable == 'HFC|HFC125', 'Variable'] = 'HFC-125'
MSG_proj_emis.loc[MSG_proj_emis.Variable == 'HFC|HFC134a', 'Variable'] = 'HFC-134a'
MSG_proj_emis.loc[MSG_proj_emis.Variable == 'HFC|HFC227ea', 'Variable'] = 'HFC-227ea'
MSG_proj_emis.loc[MSG_proj_emis.Variable == 'HFC|HFC143a', 'Variable'] = 'HFC-143a'
MSG_proj_emis.loc[MSG_proj_emis.Variable == 'HFC|HFC23', 'Variable'] = 'HFC-23'
MSG_proj_emis.loc[MSG_proj_emis.Variable == 'HFC|HFC245fa', 'Variable'] = 'HFC-245fa'
MSG_proj_emis.loc[MSG_proj_emis.Variable == 'HFC|HFC32', 'Variable'] = 'HFC-32'
MSG_proj_emis.loc[MSG_proj_emis.Variable == 'HFC|HFC43-10', 'Variable'] = 'HFC-4310mee'

# get rid of species not included in hist dataset (F-Gases, HFC, PFC)
MSG_proj_emis = MSG_proj_emis[~MSG_proj_emis['Variable'].isin(['F-Gases', 'HFC', 'PFC', 'CO2'])]

# remove '(version: 1)' from scenario names
for i in range(len(MSG_proj_emis.Scenario.values)): 
    MSG_proj_emis.Scenario.values[i] = MSG_proj_emis.Scenario.values[i][:-13]
    
# rename scenarios for clarity
MSG_proj_emis.loc[MSG_proj_emis.Scenario == 'Below 2?C', 'Scenario'] = 'Below 2 C'
MSG_proj_emis.loc[MSG_proj_emis.Scenario == 'Nationally Determined Contributions (NDCs)', 'Scenario'] = 'NDCs'

MSG_proj_emis.to_csv('~/outputs/NGFS_MSG_cleaned.csv', index=False)

In [50]:
GCAM_proj_emis = pd.read_csv('~/inputs/NGFS_GCAM.csv')

# remove final row- does not contain emission data
GCAM_proj_emis = GCAM_proj_emis.drop(index=175)

# remove 'Region' column- not needed
GCAM_proj_emis = GCAM_proj_emis.drop(columns=['Region', 'Model'])

# remove 'Emissions|' from variable names
for i in range(len(GCAM_proj_emis.Variable.values)): 
    GCAM_proj_emis.Variable.values[i] = GCAM_proj_emis.Variable.values[i][10:]
    
# rename to match FaIR spceies
GCAM_proj_emis.loc[GCAM_proj_emis.Variable == 'CO2|AFOLU', 'Variable'] = 'CO2 AFOLU'
GCAM_proj_emis.loc[GCAM_proj_emis.Variable == 'CO2|Energy and Industrial Processes', 'Variable'] = 'CO2 FFI'
GCAM_proj_emis.loc[GCAM_proj_emis.Variable == 'HFC|HFC125', 'Variable'] = 'HFC-125'
GCAM_proj_emis.loc[GCAM_proj_emis.Variable == 'HFC|HFC134a', 'Variable'] = 'HFC-134a'
GCAM_proj_emis.loc[GCAM_proj_emis.Variable == 'HFC|HFC227ea', 'Variable'] = 'HFC-227ea'
GCAM_proj_emis.loc[GCAM_proj_emis.Variable == 'HFC|HFC143a', 'Variable'] = 'HFC-143a'
GCAM_proj_emis.loc[GCAM_proj_emis.Variable == 'HFC|HFC23', 'Variable'] = 'HFC-23'
GCAM_proj_emis.loc[GCAM_proj_emis.Variable == 'HFC|HFC245fa', 'Variable'] = 'HFC-245fa'
GCAM_proj_emis.loc[GCAM_proj_emis.Variable == 'HFC|HFC32', 'Variable'] = 'HFC-32'

# get rid of species not included in hist dataset (F-Gases, HFC, PFC)
GCAM_proj_emis = GCAM_proj_emis[~GCAM_proj_emis['Variable'].isin(['F-Gases', 'HFC', 'PFC', 'CO2'])]

# remove '(version: 1)' from scenario names
for i in range(len(GCAM_proj_emis.Scenario.values)): 
    GCAM_proj_emis.Scenario.values[i] = GCAM_proj_emis.Scenario.values[i][:-13]
    
# rename scenarios for clarity
GCAM_proj_emis.loc[GCAM_proj_emis.Scenario == 'Below 2?C', 'Scenario'] = 'Below 2 C'
GCAM_proj_emis.loc[GCAM_proj_emis.Scenario == 'Nationally Determined Contributions (NDCs)', 'Scenario'] = 'NDCs'

GCAM_proj_emis.to_csv('~/outputs/NGFS_GCAM_cleaned.csv', index=False)

Ensure that variable names in projected dataset are consistent with those of future dataset. Also identify missing species from projected dataset.

In [51]:
a = hist_emis['variable'].unique()
b = GCAM_proj_emis['Variable'].unique()
c = MSG_proj_emis['Variable'].unique()

print('strings in hist but not proj:')
print('\nGCAM')
print(np.setdiff1d(a, b))
print('\nMESSAGE')
print(np.setdiff1d(a, c))

print('\nstrings in proj but not hist:')
print('\nGCAM')
print(np.setdiff1d(b, a))
print('\nMESSAGE')
print(np.setdiff1d(c, a))

strings in hist but not proj:

GCAM
['C3F8' 'C4F10' 'C5F12' 'C6F14' 'C7F16' 'C8F18' 'CCl4' 'CFC-11' 'CFC-113'
 'CFC-114' 'CFC-115' 'CFC-12' 'CH2Cl2' 'CH3Br' 'CH3CCl3' 'CH3Cl' 'CHCl3'
 'HCFC-141b' 'HCFC-142b' 'HCFC-22' 'HFC-152a' 'HFC-236fa' 'HFC-365mfc'
 'HFC-4310mee' 'Halon-1211' 'Halon-1301' 'Halon-2402' 'NF3' 'SO2F2'
 'c-C4F8']

MESSAGE
['C2F6' 'C3F8' 'C4F10' 'C5F12' 'C6F14' 'C7F16' 'C8F18' 'CCl4' 'CF4'
 'CFC-11' 'CFC-113' 'CFC-114' 'CFC-115' 'CFC-12' 'CH2Cl2' 'CH3Br'
 'CH3CCl3' 'CH3Cl' 'CHCl3' 'HCFC-141b' 'HCFC-142b' 'HCFC-22' 'HFC-152a'
 'HFC-236fa' 'HFC-365mfc' 'Halon-1211' 'Halon-1301' 'Halon-2402' 'NF3'
 'SO2F2' 'c-C4F8']

strings in proj but not hist:

GCAM
[]

MESSAGE
[]


## Interpolate projections
- create a new df with annual timesteps as column names
- for each row in the old df
    - interpolate values
    - add new values to the new df

In [52]:
yrs = np.arange(2020, 2101, 5)
all_yrs = np.arange(2020, 2101)

In [53]:
old_df = pd.read_csv('~/outputs/NGFS_GCAM_cleaned.csv')

new_df = pd.DataFrame()
new_df[['Scenario', 'Variable', 'Unit']] = np.nan
new_df[all_yrs] = np.nan

In [54]:
# interpolate and add all values in timeseries
for scen in old_df.Scenario.unique():
    for var in old_df.Variable.unique():
        proj = old_df.loc[old_df.Variable == var].loc[old_df.Scenario == scen]
        proj = proj.values[0][3:].astype(float)
        proj_interp = np.interp(all_yrs, yrs, proj)
        
        # create a list of the values we want to add to our new_df
        unit = old_df.loc[old_df.Variable == var].loc[old_df.Scenario == scen]['Unit'].values[0]
        lst = [scen, var, unit] 
        for val in proj_interp.tolist():
            lst.append(val)
            
        # add to new_df
        new_df.loc[len(new_df)] = lst

# save as csv!
new_df.to_csv('~/outputs/NGFS_GCAM_interp.csv', index=False)

In [63]:
yrs2 = [2020, 2025, 2030, 2035, 2040,
       2045, 2050, 2055, 2060, 2070, 2080, 2090, 2100] 

old_df2 = pd.read_csv('~/outputs/NGFS_MSG_cleaned.csv')

new_df2 = pd.DataFrame()
new_df2[['Scenario', 'Variable', 'Unit']] = np.nan
new_df2[all_yrs] = np.nan

In [64]:
new_df2

Unnamed: 0,Scenario,Variable,Unit,2020,2021,2022,2023,2024,2025,2026,...,2091,2092,2093,2094,2095,2096,2097,2098,2099,2100


In [65]:
# interpolate and add all values in timeseries
for scen in old_df2.Scenario.unique():
    for var in old_df2.Variable.unique():
        proj = old_df2.loc[old_df2.Variable == var].loc[old_df2.Scenario == scen]
        proj = proj.values[0][3:].astype(float)
        proj_interp = np.interp(all_yrs, yrs2, proj)
        
        # create a list of the values we want to add to our new_df
        unit = old_df2.loc[old_df2.Variable == var].loc[old_df2.Scenario == scen]['Unit'].values[0]
        lst = [scen, var, unit] 
        for val in proj_interp.tolist():
            lst.append(val)
            
        # add to new_df
        new_df2.loc[len(new_df)] = lst

# save as csv!
# new_df2.to_csv('~/outputs/NGFS_MSG_interp.csv', index=False)

In [66]:
new_df2

Unnamed: 0,Scenario,Variable,Unit,2020,2021,2022,2023,2024,2025,2026,...,2091,2092,2093,2094,2095,2096,2097,2098,2099,2100
147,Net Zero 2050,HFC-4310mee,kt HFC43-10/yr,0.922651,0.928293,0.933935,0.939577,0.94522,0.950862,0.956504,...,1.15149,1.15085,1.150209,1.149568,1.148928,1.148287,1.147647,1.147006,1.146365,1.145725
