# Investigate a pure log fit to aerosol cloud interactions


\begin{equation}
F = \beta \log \left( 1 + \sum_{i} n_i A_i \right)
\end{equation}


where 
- $A_i$ is the atmospheric input (concentrations or emissions of a specie),
- $\beta_i$ is a scale factor,
- $n_i$ dictates how much emissions of a specie contributes to CDNC.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as pl
import json
import scipy.stats
import pooch
import os
import glob
from tqdm.autonotebook import tqdm
from scipy.optimize import curve_fit

In [None]:
pl.rcParams['figure.figsize'] = (11.75, 5.875)
pl.rcParams['font.size'] = 12
pl.rcParams['font.family'] = 'Arial'
pl.rcParams['ytick.direction'] = 'in'
pl.rcParams['ytick.minor.visible'] = True
pl.rcParams['ytick.major.right'] = True
pl.rcParams['ytick.right'] = True
pl.rcParams['xtick.direction'] = 'in'
pl.rcParams['xtick.minor.visible'] = True
pl.rcParams['xtick.major.top'] = True
pl.rcParams['xtick.top'] = True
pl.rcParams['axes.spines.top'] = True
pl.rcParams['axes.spines.bottom'] = True
pl.rcParams['figure.dpi'] = 300

In [None]:
files = glob.glob('../data/smith2021aerosol/*.csv')

ari = {}
aci = {}
models = []
years = {}
for file in files:
    model = os.path.split(file)[1][:-4]
    if model=='sumlog':
        continue
    models.append(model)
    df = pd.read_csv(file, index_col='year')
    ari[model] = (df['aprp_ERFariSW'] + df['aprp_ERFariLW']).values.squeeze()
    aci[model] = (df['aprp_ERFaciSW'] + df['aprp_ERFaciLW']).values.squeeze()
    years[model] = df.index
#pd.read_csv('../data/smith2021aerosol/')

In [None]:
int(np.floor(years['E3SM'][0]-1850))

In [None]:
rcmip_emissions_file = pooch.retrieve(
    url="doi:10.5281/zenodo.4589756/rcmip-emissions-annual-means-v5-1-0.csv",
    known_hash="md5:4044106f55ca65b094670e7577eaf9b3",
)

In [None]:
emis_df = pd.read_csv(rcmip_emissions_file)

In [None]:
bc = emis_df.loc[(emis_df['Scenario']=='ssp245')&
                 (emis_df['Region']=='World')&
                 (emis_df['Variable']=='Emissions|BC'), '1750':'2100'].interpolate(axis=1).squeeze().values

oc = emis_df.loc[(emis_df['Scenario']=='ssp245')&
                 (emis_df['Region']=='World')&
                 (emis_df['Variable']=='Emissions|OC'), '1750':'2100'].interpolate(axis=1).squeeze().values

so2 = emis_df.loc[(emis_df['Scenario']=='ssp245')&
                 (emis_df['Region']=='World')&
                 (emis_df['Variable']=='Emissions|Sulfur'), '1750':'2100'].interpolate(axis=1).squeeze().values

In [None]:
aci['UKESM1-0-LL']

In [None]:
#def aci_log(x, beta0, beta1, beta2, n0, n1, n2):
def aci_log(x, beta, n0, n1, n2):
#def aci_log(x, beta0, n0):
    #aci = beta0*np.log(1 + x[0]/n0) + beta1*np.log(1 + x[1]/n1) + beta2*np.log(1 + x[2]/n2)
    #aci = beta0*np.log(1 + x/n0)
    aci = beta*np.log(1 + x[0]*n0 + x[1]*n1 + x[2]*n2)# + beta1*np.log(1 + x[1]/n1) + beta2*np.log(1 + x[2]/n2)
    #aci_1850 = beta0*np.log(1 + so2[100]/n0) + beta1*np.log(1 + bc[100]/n1) + beta2*np.log(1 + oc[100]/n2)
    aci_1850 = beta*np.log(1 + so2[100]*n0 + bc[100]*n1 + oc[100]*n2)
    return (aci - aci_1850)

In [None]:
param_fits = {}

for model in models:
    ist = int(np.floor(years[model][0]-1750))
    ien = int(np.ceil(years[model][-1]-1750))
    print(model)
    param_fits[model], cov = curve_fit(
        aci_log, 
        [so2[ist:ien], bc[ist:ien], oc[ist:ien]],
        #so2[ist:ien],
        aci[model],
        #bounds = ((-np.inf, -np.inf, -np.inf, 0, 0, 0), (0, 0, 0, np.inf, np.inf, np.inf)),
        #bounds = ((-np.inf, 0), (0, np.inf))
        bounds = ((-np.inf, 0, 0, 0), (0, np.inf, np.inf, np.inf)),
        max_nfev = 10000
    )
#    p[0], p[1], p[2], p[3], p[4], p[5]

In [None]:
param_fits[model]

In [None]:
# param_fits['UKESM1-0-LL'] = np.array([-2.539700806, 0.003980065, 0.002869812, 0.001653145])
# param_fits['MIROC6'] = np.array([-7.642476315, 0.000774497, 0.000138215, 0.00080687])
# param_fits['GISS-E2-1-G'] = np.array([-1.873251487, 0.00207261, 0.010251979, 0.011606903])
# param_fits['MRI-ESM2-0'] = np.array([-0.502976496, 0.012416925, 0.048429429, 0.073393427])
# param_fits['GFDL-ESM4'] = np.array([-16.32454756, 0.000391837, -0.000240587, 0.000505624])
# param_fits['CNRM-ESM2-1'] = np.array([-9.318243747, 0.000566375, -4.88714E-05, 0.000335176])
# param_fits['MPI-ESM-1-2-HAM'] = np.array([-1.976693681, 0.001599969, 0.006260918, 0.003781761])
# param_fits['NorESM2-LM'] = np.array([-3.10948714, 0.003243032, -0.00290163, 0.003265711])
# param_fits['IPSL-CM6A-LR-INCA'] = np.array([-4.317085509, 0.000519199, 0, 0.000567797])

In [None]:
# class fitClass:

#     def __init__(self):
#         pass

#     def refit_beta(self, x, beta):
#         aci = beta*np.log(1 + x[0]*self.n0 + x[1]*self.n1 + x[2]*self.n2)
#         aci_1850 = beta*np.log(1 + so2[100]*self.n0 + bc[100]*self.n1 + oc[100]*self.n2)
#         return (aci - aci_1850)

# inst = fitClass()

# for model in ['UKESM1-0-LL', 'MIROC6', 'GISS-E2-1-G', 'MRI-ESM2-0', 'GFDL-ESM4', 'NorESM2-LM']:
#     ist = int(np.floor(years[model][0]-1750))
#     ien = int(np.ceil(years[model][-1]-1750))
#     inst.n0 = param_fits[model][1]
#     inst.n1 = param_fits[model][2]
#     inst.n2 = param_fits[model][3]
#     coeffs, _ = curve_fit(inst.refit_beta, [so2[ist:ien], bc[ist:ien], oc[ist:ien]], aci[model])
#     param_fits[model][0] = coeffs[0]

In [None]:
# param_fits['UKESM1-0-LL'][0], param_fits['MIROC6'][0], param_fits['GISS-E2-1-G'][0], param_fits['MRI-ESM2-0'][0], param_fits['GFDL-ESM4'][0], param_fits['NorESM2-LM'][0]

In [None]:
#def aci_log1750(x, beta0, beta1, beta2, n0, n1, n2):
def aci_log1750(x, beta, n0, n1, n2):
    #aci = beta0*np.log(1 + x[0]/n0) + beta1*np.log(1 + x[1]/n1) + beta2*np.log(1 + x[2]/n2)
    #aci_1750 = beta0*np.log(1 + so2[0]/n0) + beta1*np.log(1 + bc[0]/n1) + beta2*np.log(1 + oc[0]/n2)
    aci = beta*np.log(1 + x[0]*n0 + x[1]*n1 + x[2]*n2)
    aci_1750 = beta*np.log(1 + so2[0]*n0 + bc[0]*n1 + oc[0]*n2)
    return (aci - aci_1750)

In [None]:
df_ar6 = pd.read_csv('../data/forcing/table_A3.3_historical_ERF_1750-2019_best_estimate.csv')
#df_ar6['aerosol-cloud_interactions'].values

params_ar6, cov = curve_fit(
    aci_log1750, 
    [so2[:270], bc[:270], oc[:270]],
    df_ar6['aerosol-cloud_interactions'].values,
    bounds = ((-np.inf, 0, 0, 0), (0, np.inf, np.inf, np.inf)),
    max_nfev = 10000
)

In [None]:
params_ar6

In [None]:
fig, ax = pl.subplots(3, 4, figsize=(16, 12))
for imodel, model in enumerate(models):
    #ax[imodel//4, imodel%4].plot(np.arange(1750.5, 2101), aci_log(so2, *param_fits[model]))
    ax[imodel//4, imodel%4].plot(years[model], aci[model])
    ax[imodel//4, imodel%4].plot(np.arange(1750.5, 2101), aci_log([so2, bc, oc], *param_fits[model]))
    ax[imodel//4, imodel%4].set_title(model)
ax[2,3].plot(np.arange(1750.5, 2020), df_ar6['aerosol-cloud_interactions'].values)
ax[2,3].plot(np.arange(1750.5, 2101), aci_log1750([so2, bc, oc], *params_ar6))

In [None]:
colors = {
    'CanESM5'    :  'red',#'#1e4c24',
    'E3SM'       : 'darkorange',
    'GFDL-ESM4'  : 'yellowgreen', 
    'GFDL-CM4'   : 'yellow',#'green',
    'GISS-E2-1-G':  'green',#'#771d7b',
    'HadGEM3-GC31-LL': 'turquoise',
    'IPSL-CM6A-LR': 'teal',
    'MIROC6'     :  'blue',#b85fb7',
    'MRI-ESM2-0' : 'blueviolet',
    'NorESM2-LM' :  'purple',#'red',
    'UKESM1-0-LL': 'crimson',
    'mean'       : 'black',
    'AR5'         : '0.6',
    'AR6'         : '0.3',
    'Lund'        : 'pink',
}

endyear = {
    'CanESM5'    : 2100,
    'MIROC6'     : 2100,
    'NorESM2-LM' : 2100,
    'GISS-E2-1-G': 2100,
    'HadGEM3-GC31-LL': 2099,
    'GFDL-CM4'   : 2100,
    'E3SM'       : 2014,
    'UKESM1-0-LL': 2014,
    'GFDL-ESM4'  : 2014,
    'IPSL-CM6A-LR': 2100,
    'MRI-ESM2-0' : 2014,
}

startyear = {
    'CanESM5'    : 1850,
    'MIROC6'     : 1850,
    'NorESM2-LM' : 1850,
    'GISS-E2-1-G': 1850,
    'HadGEM3-GC31-LL': 1850,
    'GFDL-CM4'   : 1850,
    'E3SM'       : 1870,
    'GFDL-ESM4'  : 1850,
    'UKESM1-0-LL': 1850,
    'IPSL-CM6A-LR': 1850,
    'MRI-ESM2-0': 1850,
}

In [None]:
fig, ax = pl.subplots(3,4, squeeze=False)
for imodel, model in enumerate(sorted(models)):
    i = imodel//4
    j = imodel%4
    ax[i,j].plot(years[model], aci[model], 
                 color='k', ls='-', alpha=0.5)
    ax[i,j].plot(np.arange(1750.5,2101), aci_log([so2, bc, oc], *param_fits[model]), color=colors[model], zorder=7)

    ax[i,j].set_xlim(1750,2100)
    ax[i,j].set_ylim(-1.7, 0.5)
    ax[i,j].axhline(0, lw=0.5, ls=':', color='k')
    ax[i,j].fill_between(np.arange(1850,2015),-10,10, color='#e0e0e0', zorder=-20)
    ax[i,j].get_xticklabels()[-1].set_ha("right")
    if model!='HadGEM3-GC31-LL':
        modlab = model
    else:
        modlab = 'HadGEM3'
    ax[i,j].text(0.05,0.05,modlab,transform=ax[i,j].transAxes, fontweight='bold')
    #ax[i,1].text(0.95,0.09,'ERFaci',transform=ax[i,1].transAxes, ha='right')

ax[0,0].set_ylabel('W m$^{-2}$')
ax[1,0].set_ylabel('W m$^{-2}$')
ax[2,0].set_ylabel('W m$^{-2}$')
ax[2,3].axis('off')

pl.suptitle('Aerosol-cloud interaction forcing emulations in FaIR v2.1')
fig.tight_layout()
pl.savefig('../plots/aci_calibration.png')
pl.savefig('../plots/aci_calibration.pdf')

In [None]:
#df_params = pd.DataFrame(param_fits, index=['beta0', 'beta1', 'beta2', 'n0', 'n1', 'n2']).T
df_params = pd.DataFrame(param_fits, index=['beta', 'n0', 'n1', 'n2']).T

In [None]:
fig, ax = pl.subplots(3, 4, figsize=(16, 12))
for imodel, model in enumerate(models):
    ax[imodel//4, imodel%4].plot(np.arange(1750.5, 2101), aci_log([so2, bc, oc], *param_fits[model]))
    ax[imodel//4, imodel%4].plot(np.arange(1750.5, 2101), aci_log1750([so2, bc, oc], *param_fits[model]))

In [None]:
fig, ax = pl.subplots(3, 4, figsize=(16, 12))
for imodel, model in enumerate(models):
    ax[imodel//4, imodel%4].plot(np.arange(1750.5, 2101), aci_log([so2, bc, oc], *param_fits[model]) - aci_log1750([so2, bc, oc], *param_fits[model]))

In [None]:
df_params.to_csv('../data/smith2021aerosol/sumlog.csv')

In [None]:
samples = 1500000

In [None]:
df_params

In [None]:
df_params.corr()

In [None]:
beta_samp = df_params['beta']
n0_samp = (df_params['n0'])
n1_samp = (df_params['n1'])
n2_samp = (df_params['n2'])

In [None]:
kde = scipy.stats.gaussian_kde([n0_samp, n1_samp, n2_samp])
aci_sample=kde.resample(size=samples*4, seed=63648708)

In [None]:
aci_sample[1,:]
#np.exp(-436.18073312)

In [None]:
aci_sample[0, aci_sample[0,:] < 0] = np.nan
aci_sample[1, aci_sample[1,:] < 0] = np.nan
aci_sample[2, aci_sample[2,:] < 0] = np.nan

In [None]:
mask = np.any(np.isnan(aci_sample), axis=0)
aci_sample = aci_sample[:, ~mask]

In [None]:
mask.sum()

In [None]:
aci_sample

In [None]:
NINETY_TO_ONESIGMA = scipy.stats.norm.ppf(0.95)
erfaci_sample = scipy.stats.uniform.rvs(size=samples, loc=-2.0, scale=2.0, random_state=71271)

In [None]:
beta = np.zeros(samples)
erfaci = np.zeros((351,samples))
for i in tqdm(range(samples)):
    ts2010 = np.mean(
        aci_log(
            [so2[255:265], bc[255:265], oc[255:265]], 
            0.92,
            aci_sample[0,i], aci_sample[1,i], aci_sample[2,i]
        )
    )
    ts1850 = aci_log(
        [so2[100], bc[100], oc[100]], 
        0.92,
        aci_sample[0,i], aci_sample[1,i], aci_sample[2,i]
    )
    ts1750 = aci_log(
        [so2[0], bc[0], oc[0]], 
        0.92,
        aci_sample[0,i], aci_sample[1,i], aci_sample[2,i]
    )
    erfaci[:,i] = (
        aci_log(
            [so2, bc, oc], 
            0.92,
            aci_sample[0,i], aci_sample[1,i], aci_sample[2,i]
        ) - ts1750
    )/(ts2010-ts1850)*(erfaci_sample[i])
    beta[i] = erfaci_sample[i] / (ts2010-ts1750)

In [None]:
erfaci_sample[-1]

In [None]:
# pl.fill_between(np.arange(1750, 2101), np.nanpercentile(erfaci, 95, axis=1), np.nanpercentile(erfaci, 5, axis=1), color='lightgray');
# pl.plot(np.arange(1750, 2101), np.nanpercentile(erfaci, 50, axis=1));

In [None]:
pl.plot(erfaci[:, :20]);

In [None]:
pl.plot(erfaci[100:, :20]/erfaci[100:, 0:1]);

In [None]:
pl.hist(aci_sample[0,:])

In [None]:
pl.hist(aci_sample[1,:])

In [None]:
pl.hist(aci_sample[2,:])

In [None]:
pl.hist(beta)

In [None]:
pl.plot(erfaci[:, 7])
pl.plot(aci_log1750([so2, bc, oc], beta[7], *aci_sample[0:,7]))

In [None]:
aci_log1750([so2, bc, oc], beta[0], *aci_sample[:,0])

In [None]:
aci_sample.shape

In [None]:
df = pd.DataFrame({
    'shape_so2' : aci_sample[0,:samples],
    'shape_bc' : aci_sample[1,:samples], 
    'shape_oc' : aci_sample[2,:samples], 
    'beta' : beta
})

In [None]:
df.to_csv('../data/parameter_sets/erfaci.csv', index=False)