# Investigate a pure log fit to aerosol cloud interactions


\begin{equation}
F = \sum_{i} \beta_i \log \left( 1 + \frac{A_i}{n_i} \right)
\end{equation}


where 
- $A_i$ is the atmospheric input (concentrations or emissions of a specie),
- $\beta_i$ is a scale factor,
- $n_i$ is a shape factor that describes how linear or logarithmic the relationship is.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as pl
import json
import scipy.stats
import pooch
import os
import glob
from tqdm.autonotebook import tqdm
from scipy.optimize import curve_fit

In [None]:
files = glob.glob('../data/smith2021aerosol/*.csv')

ari = {}
aci = {}
models = []
years = {}
for file in files:
    model = os.path.split(file)[1][:-4]
    if model=='sumlog':
        continue
    models.append(model)
    df = pd.read_csv(file, index_col='year')
    ari[model] = (df['aprp_ERFariSW'] + df['aprp_ERFariLW']).values.squeeze()
    aci[model] = (df['aprp_ERFaciSW'] + df['aprp_ERFaciLW']).values.squeeze()
    years[model] = df.index
#pd.read_csv('../data/smith2021aerosol/')

In [None]:
int(np.floor(years['E3SM'][0]-1850))

In [None]:
rcmip_emissions_file = pooch.retrieve(
    url="doi:10.5281/zenodo.4589756/rcmip-emissions-annual-means-v5-1-0.csv",
    known_hash="md5:4044106f55ca65b094670e7577eaf9b3",
)

In [None]:
emis_df = pd.read_csv(rcmip_emissions_file)

In [None]:
bc = emis_df.loc[(emis_df['Scenario']=='ssp245')&
                 (emis_df['Region']=='World')&
                 (emis_df['Variable']=='Emissions|BC'), '1750':'2100'].interpolate(axis=1).squeeze().values

oc = emis_df.loc[(emis_df['Scenario']=='ssp245')&
                 (emis_df['Region']=='World')&
                 (emis_df['Variable']=='Emissions|OC'), '1750':'2100'].interpolate(axis=1).squeeze().values

so2 = emis_df.loc[(emis_df['Scenario']=='ssp245')&
                 (emis_df['Region']=='World')&
                 (emis_df['Variable']=='Emissions|Sulfur'), '1750':'2100'].interpolate(axis=1).squeeze().values

In [None]:
aci['UKESM1-0-LL']

In [None]:
#def aci_log(x, beta0, beta1, beta2, n0, n1, n2):
def aci_log(x, beta, n0, n1, n2):
#def aci_log(x, beta0, n0):
    #aci = beta0*np.log(1 + x[0]/n0)# + beta1*np.log(1 + x[1]/n1) + beta2*np.log(1 + x[2]/n2)
    #aci = beta0*np.log(1 + x/n0)
    aci = beta*np.log(1 + x[0]*n0 + x[1]*n1 + x[2]*n2)# + beta1*np.log(1 + x[1]/n1) + beta2*np.log(1 + x[2]/n2)
    #aci_1850 = beta0*np.log(1 + so2[100]/n0)# + beta1*np.log(1 + bc[100]/n1) + beta2*np.log(1 + oc[100]/n2)
    aci_1850 = beta*np.log(1 + so2[100]*n0 + bc[100]*n1 + oc[100]*n2)
    return (aci - aci_1850)

In [None]:
param_fits = {}

for model in models:
    ist = int(np.floor(years[model][0]-1750))
    ien = int(np.ceil(years[model][-1]-1750))
    print(model)
    param_fits[model], cov = curve_fit(
        aci_log, 
        [so2[ist:ien], bc[ist:ien], oc[ist:ien]],
        #so2[ist:ien],
        aci[model],
        #bounds = ((-np.inf, -np.inf, -np.inf, 0, 0, 0), (0, 0, 0, np.inf, np.inf, np.inf))
        #bounds = ((-np.inf, 0), (0, np.inf))
        bounds = ((-np.inf, 0, 0, 0), (0, np.inf, np.inf, np.inf)),
        max_nfev = 10000
    )
#    p[0], p[1], p[2], p[3], p[4], p[5]

In [None]:
param_fits[model]

In [None]:
fig, ax = pl.subplots(3, 4, figsize=(16, 12))
for imodel, model in enumerate(models):
    #ax[imodel//4, imodel%4].plot(np.arange(1750.5, 2101), aci_log(so2, *param_fits[model]))
    ax[imodel//4, imodel%4].plot(np.arange(1750.5, 2101), aci_log([so2, bc, oc], *param_fits[model]))
    ax[imodel//4, imodel%4].plot(years[model], aci[model])

In [None]:
#df_params = pd.DataFrame(param_fits, index=['beta0', 'beta1', 'beta2', 'n0', 'n1', 'n2']).T
df_params = pd.DataFrame(param_fits, index=['beta', 'n0', 'n1', 'n2']).T

In [None]:
#def aci_log1750(x, beta0, beta1, beta2, n0, n1, n2):
def aci_log1750(x, beta, n0, n1, n2):
    #aci = beta0*np.log(1 + x[0]/n0) + beta1*np.log(1 + x[1]/n1) + beta2*np.log(1 + x[2]/n2)
    #aci_1750 = beta0*np.log(1 + so2[0]/n0) + beta1*np.log(1 + bc[0]/n1) + beta2*np.log(1 + oc[0]/n2)
    aci = beta*np.log(1 + x[0]*n0 + x[1]*n1 + x[2]*n2)
    aci_1750 = beta*np.log(1 + so2[0]*n0 + bc[0]*n1 + oc[0]*n2)
    return (aci - aci_1750)

In [None]:
fig, ax = pl.subplots(3, 4, figsize=(16, 12))
for imodel, model in enumerate(models):
    ax[imodel//4, imodel%4].plot(np.arange(1750.5, 2101), aci_log([so2, bc, oc], *param_fits[model]))
    ax[imodel//4, imodel%4].plot(np.arange(1750.5, 2101), aci_log1750([so2, bc, oc], *param_fits[model]))

In [None]:
fig, ax = pl.subplots(3, 4, figsize=(16, 12))
for imodel, model in enumerate(models):
    ax[imodel//4, imodel%4].plot(np.arange(1750.5, 2101), aci_log([so2, bc, oc], *param_fits[model]) - aci_log1750([so2, bc, oc], *param_fits[model]))

In [None]:
df_params.to_csv('../data/smith2021aerosol/sumlog.csv')

In [None]:
samples = 1000

In [None]:
df_params

In [None]:
# log_beta0_samp = np.log(-df_params['beta0'])
# log_beta1_samp = np.log(-df_params['beta1'])
# log_beta2_samp = np.log(-df_params['beta2'])
beta_samp = df_params['beta']
log_n0_samp = np.log(df_params['n0'])
log_n1_samp = np.log(df_params['n1'])
log_n2_samp = np.log(df_params['n2'])
log_n0_samp, log_n1_samp, log_n2_samp

In [None]:
#log_beta0_samp

In [None]:
#kde = scipy.stats.gaussian_kde([log_beta0_samp, log_beta1_samp, log_beta2_samp, log_n0_samp, log_n1_samp, log_n2_samp])
kde = scipy.stats.gaussian_kde([beta_samp, log_n0_samp, log_n1_samp, log_n2_samp])
aci_sample=kde.resample(size=samples*5, seed=63648708)

In [None]:
aci_sample[1, aci_sample[1,:] > -3.295975/2] = np.nan
aci_sample[2, aci_sample[2,:] > -0.553641/2] = np.nan
aci_sample[3, aci_sample[3,:] > -3.582930/2] = np.nan

In [None]:
mask = np.any(np.isnan(aci_sample), axis=0)
aci_sample = aci_sample[:, ~mask]

In [None]:
mask.sum()

In [None]:
aci_sample

In [None]:
NINETY_TO_ONESIGMA = scipy.stats.norm.ppf(0.95)
erfaci_sample = scipy.stats.uniform.rvs(size=samples, loc=-2.0, scale=2.0, random_state=71271)

In [None]:
beta = np.zeros(samples)
erfaci = np.zeros((351,samples))
for i in tqdm(range(samples)):
    ts2010 = np.mean(
        aci_log(
            [so2[255:265], bc[255:265], oc[255:265]], 
            #-np.exp(aci_sample[0,i]), -np.exp(aci_sample[1,i]), -np.exp(aci_sample[2,i]), 
            #np.exp(aci_sample[3,i]), np.exp(aci_sample[4,i]), np.exp(aci_sample[5,i])
            aci_sample[0,i],
            np.exp(aci_sample[1,i]), np.exp(aci_sample[2,i]), np.exp(aci_sample[3,i])
        )
    )
    ts1850 = aci_log(
        [so2[100], bc[100], oc[100]], 
        aci_sample[0,i],
        np.exp(aci_sample[1,i]), np.exp(aci_sample[2,i]), np.exp(aci_sample[3,i])
        #-np.exp(aci_sample[0,i]), -np.exp(aci_sample[1,i]), -np.exp(aci_sample[2,i]),
        #np.exp(aci_sample[3,i]), np.exp(aci_sample[4,i]), np.exp(aci_sample[5,i])
    )
    ts1750 = aci_log(
        [so2[0], bc[0], oc[0]], 
        aci_sample[0,i],
        np.exp(aci_sample[1,i]), np.exp(aci_sample[2,i]), np.exp(aci_sample[3,i])
        #-np.exp(aci_sample[0,i]), -np.exp(aci_sample[1,i]), -np.exp(aci_sample[2,i]),
        #np.exp(aci_sample[3,i]), np.exp(aci_sample[4,i]), np.exp(aci_sample[5,i])
    )
    erfaci[:,i] = (
        aci_log(
            [so2, bc, oc], 
            #-np.exp(aci_sample[0,i]), -np.exp(aci_sample[1,i]), -np.exp(aci_sample[2,i]),
            #np.exp(aci_sample[3,i]), np.exp(aci_sample[4,i]), np.exp(aci_sample[5,i])
            aci_sample[0,i],
            np.exp(aci_sample[1,i]), np.exp(aci_sample[2,i]), np.exp(aci_sample[3,i])
        ) - ts1750
    )/(ts2010-ts1850)*(erfaci_sample[i])
    beta[i] = erfaci_sample[i] / (ts2010-ts1750)

In [None]:
pl.plot(np.arange(1750, 2101), np.percentile(erfaci, 95, axis=1));
pl.plot(np.arange(1750, 2101), np.percentile(erfaci, 50, axis=1));
pl.plot(np.arange(1750, 2101), np.percentile(erfaci, 5, axis=1));

In [None]:
pl.plot(erfaci[:, 6])
aci_sample[0,6],np.exp(aci_sample[1,6]), np.exp(aci_sample[2,6]), np.exp(aci_sample[3,6])

In [None]:
pl.hist(aci_sample[1,:])

In [None]:
so2

In [None]:
-np.exp(aci_sample[2,:])

In [None]:
np.log(1 + 0.54 * 14)

In [None]:
np.log(1 + 0.54 * 15)

In [None]:
np.log(1 + 0.008 * 30)