# Model ensemble to estimate confidence confidence intervals for DIC estimates
Created by Ivan Lima on Sat Feb  4 2023 14:33:00 -0500

In this notebook we use a model ensemble to estimate confidence intervals for DIC predictions on the test set.

In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os, datetime, warnings
print('Last updated on {}'.format(datetime.datetime.now().ctime()))

Last updated on Tue Feb  7 20:10:59 2023


In [2]:
import sns_settings
sns.set_context('paper')
pd.options.display.max_columns = 50
warnings.filterwarnings('ignore')

## Load DIC bottle data & select features & target

In [3]:
df_bottle_dic = pd.read_csv('data/bottle_data_DIC_prepared.csv', parse_dates=['Date'],
                            index_col=0, na_values=['<undefined>',-9999.])
df_bottle_dic = df_bottle_dic.loc[df_bottle_dic.Oxygen_flag.isin([2, 6])]
df_bottle_dic = df_bottle_dic.loc[df_bottle_dic.Oxygen.notnull()]
df_bottle_dic['log_Chl'] = np.log(df_bottle_dic.Chl)
df_bottle_dic['log_KD490'] = np.log(df_bottle_dic.KD490)

features = ['Depth', 'Temperature', 'Salinity', 'Oxygen', 'pCO2_atm', 'ADT', 'SST_hires', 'log_KD490']
target = ['DIC']

suffix = 'all_vars'

In [4]:
# df_bottle_dic = pd.read_csv('data/bottle_data_DIC_prepared.csv', parse_dates=['Date'],
#                             index_col=0, na_values=['<undefined>',-9999.])
# df_bottle_dic['log_Chl'] = np.log(df_bottle_dic.Chl)
# df_bottle_dic['log_KD490'] = np.log(df_bottle_dic.KD490)

# features = ['Depth', 'Temperature', 'Salinity', 'pCO2_atm', 'ADT', 'SST_hires', 'log_KD490']
# target = ['DIC']

# suffix = 'noO2'

In [5]:
# df_bottle_dic = pd.read_csv('data/bottle_data_DIC_prepared.csv', parse_dates=['Date'],
#                             index_col=0, na_values=['<undefined>',-9999.])
# df_bottle_dic['log_Chl'] = np.log(df_bottle_dic.Chl)
# df_bottle_dic['log_KD490'] = np.log(df_bottle_dic.KD490)

# features = ['Depth', 'Temperature', 'Salinity', 'pCO2_atm']
# target = ['DIC']

# suffix = 'nosat'

## Split data into training and test sets

In [6]:
from sklearn.model_selection import train_test_split, cross_val_score

data = df_bottle_dic[features + target + ['Season']].dropna()

X = data[features].values
y = data[target].values

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=data.Season.values, random_state=77)
X.shape, X_train.shape, X_test.shape, y_train.shape, y_test.shape

((3970, 8), (2977, 8), (993, 8), (2977, 1), (993, 1))

## Run model ensemble 

In [7]:
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import r2_score, mean_squared_error
import time

# keras.utils.set_random_seed(42) # make things reproducible
n_hidden = 256 # number of nodes in hidden layers
alpha=0.01

base_model = keras.models.Sequential([
    keras.layers.BatchNormalization(),
    keras.layers.Dense(n_hidden, input_shape=X_train.shape[1:]),
    keras.layers.LeakyReLU(alpha=alpha),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(n_hidden),
    keras.layers.LeakyReLU(alpha=alpha),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(y_train.shape[1])
])

early_stopping_cb = keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True)

score_vals = []       # store score values
y_test_pred_list = [] # store predictions
resid = []            # store residuals

start = time.time()
ntot = 100 # number of ensemble members

for k in range(ntot):
    new_model = keras.models.clone_model(base_model) # cloning resets the model weights
    new_model.compile(loss='mean_squared_error', optimizer=keras.optimizers.Adam())
    history = new_model.fit(X_train, y_train, epochs=700, verbose=0, validation_split=0.2, callbacks=[early_stopping_cb])
    y_pred = new_model.predict(X_test)
    y_test_pred_list.append(y_pred.ravel())
    # resid.append((y_test - y_pred).ravel()) # compute residuals on test set
    score = r2_score(y_test, y_pred)
    score_vals.append(score)
    print('Ensemble {:03d}/{} test set R squared = {:.3f}'.format(k+1, ntot, score))

end = time.time()
print('\nExecution time = {:.2f} minutes'.format((end-start)/60.))

scores = np.array(score_vals)
print('\nBest R squared =  {:.3f}'.format(scores.max()))
print('Worst R squared = {:.3f}'.format(scores.min()))
print('Mean R squared =  {:.3f}'.format(scores.mean()))

# save ensemble predictions on test set to CSV file
ensemble_preds = np.array(y_test_pred_list).transpose()
cols = ['DIC_pred_{:02d}'.format(n+1) for n in range(ensemble_preds.shape[1])]
df_ensemble_preds = pd.DataFrame(ensemble_preds, columns=cols)
df_ensemble_preds['DIC_observed'] = y_test
df_ensemble_preds.to_csv('results/ensemble_preds_dic_{}.csv'.format(suffix))

# # save ensemble residuals on test set to CSV file
# ensemble_resids = np.array(resid).transpose()
# cols = ['DIC_resid_{:02d}'.format(n+1) for n in range(ensemble_resids.shape[1])]
# df_ensemble_resids = pd.DataFrame(ensemble_resids, columns=cols)
# df_ensemble_resids.to_csv('results/ensemble_resids_dic_{}.csv'.format(suffix))

2023-02-07 20:11:01.652197: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Ensemble 001/100 test set R squared = 0.963
Ensemble 002/100 test set R squared = 0.963
Ensemble 003/100 test set R squared = 0.964
Ensemble 004/100 test set R squared = 0.964
Ensemble 005/100 test set R squared = 0.964
Ensemble 006/100 test set R squared = 0.963
Ensemble 007/100 test set R squared = 0.964
Ensemble 008/100 test set R squared = 0.964
Ensemble 009/100 test set R squared = 0.965
Ensemble 010/100 test set R squared = 0.963
Ensemble 011/100 test set R squared = 0.964
Ensemble 012/100 test set R squared = 0.965
Ensemble 013/100 test set R squared = 0.962
Ensemble 014/100 test set R squared = 0.965
Ensemble 015/100 test set R squared = 0.963
Ensemble 016/100 test set R squared = 0.964
Ensemble 017/100 test set R squared = 0.963
Ensemble 018/100 test set R squared = 0.964
Ensemble 019/100 test set R squared = 0.965
Ensemble 020/100 test set R squared = 0.963
Ensemble 021/100 test set R squared = 0.963
Ensemble 022/100 test set R squared = 0.965
Ensemble 023/100 test set R squa

In [8]:
# lower_bound = [np.quantile(ensemble_preds[n], 0.05) for n in range(ensemble_preds.shape[0])]
# upper_bound = [np.quantile(ensemble_preds[n], 0.95) for n in range(ensemble_preds.shape[0])]
# df_ci = pd.DataFrame({'DIC_observed':y_test.ravel(), 'lower_bound': lower_bound, 'upper_bound': upper_bound})
# df_ci.loc[df_ci.DIC_observed < df_ci.lower_bound].shape[0], df_ci.loc[df_ci.DIC_observed > df_ci.upper_bound].shape[0]

In [10]:
# np.abs(df_ci.DIC_observed - df_ci.lower_bound).max(), np.abs(df_ci.DIC_observed - df_ci.upper_bound).max()

(92.37309570312505, 99.79123535156259)