# Estimate uncertainty using model ensemble
Created by Ivan Lima on Mon Apr  4 2022 13:29:57 -0400

In this notebook we evaluate uncertainty for the model estimates of DIC and TA by training an ensemble of 50 models on the training dataset and using those models to make predictions on the test dataset. Model node weights are randomly initialized dusring model instatiation. Standard deviation is computed for each DIC and TA prediction on the test dataset.

In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os, datetime, warnings
from tqdm import notebook
print('Last updated on {}'.format(datetime.datetime.now().ctime()))

Last updated on Mon Apr 18 10:47:54 2022


In [2]:
sns.set_theme(context='paper', style='ticks', palette='tab10', rc={'figure.dpi':100, 'figure.figsize':[5, 5], 'axes.grid':True})
pd.options.display.max_columns = 50
warnings.filterwarnings('ignore')

## Read merged bottle satellite data 

In [3]:
df = pd.read_csv('data/bottle_satellite_data_clean.csv', parse_dates=['Date'], index_col=0)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3623 entries, 0 to 3779
Data columns (total 34 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   EXPOCODE       2488 non-null   object        
 1   Accession      1124 non-null   float64       
 2   Cruise_ID      3044 non-null   object        
 3   Date           3623 non-null   datetime64[ns]
 4   Year           3623 non-null   int64         
 5   Month          3623 non-null   int64         
 6   Day            3623 non-null   int64         
 7   Time_UTC       3543 non-null   object        
 8   Latitude       3623 non-null   float64       
 9   Longitude      3623 non-null   float64       
 10  Depth          3623 non-null   float64       
 11  Salinity       3623 non-null   float64       
 12  Temperature    3623 non-null   float64       
 13  Oxygen         3533 non-null   float64       
 14  DIC            3623 non-null   float64       
 15  DIC_FLAG       3329 n

## Select input features and split data into training and test sets 

In [4]:
from sklearn.model_selection import train_test_split, cross_val_score, KFold

# Set input and output variables
# features = ['Depth', 'bottom_depth', 'Temperature', 'Salinity', 'Oxygen', 'pCO2_monthave', 'SLA', 'SST_hires', 'log_KD490']
# features = ['Depth', 'bottom_depth', 'Temperature', 'Salinity', 'pCO2_monthave', 'SLA', 'SST_hires', 'log_KD490']
features = ['Depth', 'bottom_depth', 'Temperature', 'Salinity', 'pCO2_monthave']
targets  = ['DIC', 'TA']

data = df[features + targets].dropna()

X_numpy = data[features].values
Y_numpy = data[targets].values

# Split data into training and test sets
X_numpy_train, X_numpy_test, Y_numpy_train, Y_numpy_test = train_test_split(X_numpy, Y_numpy, random_state=42)

print('Training set: {}\nTest set: {}'.format(X_numpy_train.shape, X_numpy_test.shape))

# set suffix for output file names
if 'Oxygen' not in features:
    if 'SLA' not in features:
        suffix = '_no_sat'
    else:
        suffix = '_noO2'
else:
    suffix = ''

Training set: (2717, 5)
Test set: (906, 5)


## Rescale data 

Neural networks are very sensitive to the scale and distribution of each feature. Therefore, we rescale input features so they have $\overline{x}=0$ and $\sigma=1$.

In [5]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler

scaler = StandardScaler().fit(X_numpy_train)
X_numpy_train_scaled = scaler.transform(X_numpy_train)
X_numpy_test_scaled = scaler.transform(X_numpy_test)
X_numpy_scaled = scaler.transform(X_numpy)

## Run ensemble of models 

In [6]:
import torch
import torch.nn as nn
from sklearn.metrics import r2_score, mean_squared_error

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device

# convert numpy arrays to pytorch tensors
X_train = torch.from_numpy(X_numpy_train_scaled.astype(np.float32))
Y_train = torch.from_numpy(Y_numpy_train.astype(np.float32))
X_test = torch.from_numpy(X_numpy_test_scaled.astype(np.float32))

# set neural network parameters
n_features = X_train.size()[1] # number of input variables
n_targets = Y_train.size()[1]  # number of output variables
n_hidden = 256                 # number of hidden layers
learning_rate = 0.001

# torch.manual_seed(42) # set random number seed to make things reproducible

# create neural network regression model
class MLPReg(nn.Module):
    def __init__(self, n_features, n_hidden, n_targets):
        super(MLPReg, self).__init__()
        self.l1    = nn.Linear(n_features, n_hidden)
        self.l2    = nn.Linear(n_hidden, n_hidden)
        self.l3    = nn.Linear(n_hidden, n_targets)
        self.activ = nn.LeakyReLU()
        # nn.init.normal_(self.l1.weight, mean=0.0, std=0.01)
        # nn.init.normal_(self.l2.weight, mean=0.0, std=0.01)
        # nn.init.normal_(self.l3.weight, mean=0.0, std=0.01)        
    
    def forward(self, x):
        out = self.l1(x)
        out = self.activ(out)
        out = self.l2(out)
        out = self.activ(out)
        out = self.l3(out)
        return out

loss_func = nn.MSELoss()                                                      # loss function (mean square error)

loss_vals = []   # keep loss function values for plotting

# store predictions
y_train_pred_DIC_list = []
y_train_pred_TA_list = []
y_test_pred_DIC_list = []
y_test_pred_TA_list = []

n_passes = 10000

def reset_weights(m):
    for layer in m.children():
        if hasattr(layer, 'reset_parameters'):
            print(f'Reset trainable parameters of layer = {layer}')
            layer.reset_parameters()

for k in notebook.tnrange(50):
    # create model instance
    nn_reg = MLPReg(n_features=n_features, n_hidden=n_hidden, n_targets=n_targets)
    
    # optimizer
    optimizer = torch.optim.Adam(nn_reg.parameters(), lr=learning_rate)
    
    # reset model weights (not necessary)
    # nn_reg.apply(reset_weights)
    # for name, param in nn_reg.named_parameters():
    #     if name == 'l2.weight':
    #         print(name, param)
    
    # train the model
    for n in range(n_passes):
        # forward pass
        prediction = nn_reg(X_train)
        loss = loss_func(prediction, Y_train)
        # backward pass
        optimizer.zero_grad()   # clear accumulated gradients for next pass
        loss.backward()         # do backpropagation to compute gradients
        optimizer.step()        # apply gradients to update parameters
        loss_vals.append(loss.item())
        # if (n==0) or ((n+1) % 1000 == 0):
        #     print('pass {:5d}/{}, MSE={:.2f}'.format(n+1, n_passes, loss.item()))

    # evaluate model on training & test set
    with torch.no_grad():
        Y_pred_train = nn_reg(X_train)
        Y_pred_test  = nn_reg(X_test)

#     print('Ensemble {} training set R squared: {:.3f}'.format(k+1, r2_score(Y_numpy_train, Y_pred_train)))
#     print('Ensemble {} test set R squared:     {:.3f}\n'.format(k+1, r2_score(Y_numpy_test, Y_pred_test)))
    
    # store predictions on test set
    y_train_pred_DIC_list.append(Y_pred_train[:,0].numpy())
    y_train_pred_TA_list.append(Y_pred_train[:,1].numpy())
    y_test_pred_DIC_list.append(Y_pred_test[:,0].numpy())
    y_test_pred_TA_list.append(Y_pred_test[:,1].numpy())

  0%|          | 0/50 [00:00<?, ?it/s]

In [7]:
# for name, param in nn_reg.named_parameters():
#     if name == 'l2.weight':
#         print(name, param)

## Compute standard deviations for DIC and TA predictions 

In [40]:
def minmax(x):
    return pd.Series([x.min(), x.max(), x.median()], index=['min', 'max', 'median'])

### Training set

In [41]:
y_train_pred_DIC = np.array(y_train_pred_DIC_list).transpose()
y_train_pred_TA = np.array(y_train_pred_TA_list).transpose()

std_DIC_train = y_train_pred_DIC.std(axis=1)
avg_DIC_train = y_train_pred_DIC.mean(axis=1)

std_TA_train = y_train_pred_TA.std(axis=1)
avg_TA_train = y_train_pred_TA.mean(axis=1)

df_std_train = pd.DataFrame({'std_DIC': std_DIC_train, 'std_TA': std_TA_train})
df_std_train.apply(minmax).style.format('{:.3f}')

Unnamed: 0,std_DIC,std_TA
min,1.283,0.771
max,14.66,10.847
median,3.252,2.55


### Test set

In [42]:
y_test_pred_DIC = np.array(y_test_pred_DIC_list).transpose()
y_test_pred_TA = np.array(y_test_pred_TA_list).transpose()

std_DIC_test = y_test_pred_DIC.std(axis=1)
avg_DIC_test = y_test_pred_DIC.mean(axis=1)

std_TA_test = y_test_pred_TA.std(axis=1)
avg_TA_test = y_test_pred_TA.mean(axis=1)

df_std_test = pd.DataFrame({'std_DIC': std_DIC_test, 'std_TA': std_TA_test})
df_std_test.apply(minmax).style.format('{:.3f}')

Unnamed: 0,std_DIC,std_TA
min,1.294,0.789
max,13.091,13.527
median,3.613,2.833


### Save computed standard deviations to CSV file

In [43]:
outfile_train =  'data/uncertainty_trainset{}.csv'.format(suffix)
df_train = pd.DataFrame(np.c_[X_numpy_train, Y_numpy_train], columns = features + targets)
df_train['std_DIC'] = std_DIC_train
df_train['std_TA'] = std_TA_train
df_train['avg_DIC'] = avg_DIC_train
df_train['avg_TA'] = avg_TA_train
df_train.to_csv(outfile_train) # save stats to csv file
df_train.apply(minmax).style.format('{:.3f}')

Unnamed: 0,Depth,bottom_depth,Temperature,Salinity,pCO2_monthave,DIC,TA,std_DIC,std_TA,avg_DIC,avg_TA
min,0.0,8.508,1.288,26.218,374.84,1748.72,1909.76,1.283,0.771,1782.446,1924.835
max,2200.652,4587.0,29.553,36.798,414.86,2215.17,2407.88,14.66,10.847,2205.63,2413.679
median,38.285,131.0,11.138,33.31,404.15,2069.56,2237.6,3.252,2.55,2068.117,2238.004


In [44]:
outfile_test =  'data/uncertainty_testset{}.csv'.format(suffix)
df_test = pd.DataFrame(np.c_[X_numpy_test, Y_numpy_test], columns = features + targets)
df_test['std_DIC'] = std_DIC_test
df_test['std_TA'] = std_TA_test
df_test['avg_DIC'] = avg_DIC_test
df_test['avg_TA'] = avg_TA_test
df_test.to_csv(outfile_test) # save stats to csv file
df_test.apply(minmax).style.format('{:.3f}')

Unnamed: 0,Depth,bottom_depth,Temperature,Salinity,pCO2_monthave,DIC,TA,std_DIC,std_TA,avg_DIC,avg_TA
min,0.0,8.508,2.88,26.266,382.24,1784.83,1916.933,1.294,0.789,1823.02,1926.786
max,1499.557,4587.0,29.515,36.743,414.86,2233.891,2402.9,13.091,13.527,2206.837,2412.018
median,40.0,131.0,11.234,33.482,403.34,2070.5,2242.55,3.613,2.833,2069.708,2242.181


## Compute $R^2$ and RMSE for each ensemble member 

### Training set

In [45]:
ens_out_train =  'data/ensemble_trainset{}.csv'.format(suffix)

dic_r2_train = [r2_score(Y_numpy_train[:,0], y_train_pred_DIC[:,c]) for c in range(y_train_pred_DIC.shape[1])]
ta_r2_train  = [r2_score(Y_numpy_train[:,1], y_train_pred_TA[:,c]) for c in range(y_train_pred_TA.shape[1])]
dic_rmse_train = [mean_squared_error(Y_numpy_train[:,0], y_train_pred_DIC[:,c], squared=False) for c in range(y_train_pred_DIC.shape[1])]
ta_rmse_train  = [mean_squared_error(Y_numpy_train[:,1], y_train_pred_TA[:,c], squared=False) for c in range(y_train_pred_TA.shape[1])]

df_ensemble_train = pd.DataFrame({'r2_DIC': dic_r2_train, 'rmse_DIC': dic_rmse_train, 'r2_TA': ta_r2_train, 'rmse_TA': ta_rmse_train})
df_ensemble_train.to_csv(ens_out_train)
df_ensemble_train.apply(minmax).style.format('{:.3f}')

Unnamed: 0,r2_DIC,rmse_DIC,r2_TA,rmse_TA
min,0.943,15.338,0.985,7.962
max,0.958,17.821,0.988,9.089
median,0.952,16.322,0.987,8.444


### Test set

In [46]:
ens_out_test =  'data/ensemble_testset{}.csv'.format(suffix)

dic_r2_test = [r2_score(Y_numpy_test[:,0], y_test_pred_DIC[:,c]) for c in range(y_test_pred_DIC.shape[1])]
ta_r2_test  = [r2_score(Y_numpy_test[:,1], y_test_pred_TA[:,c]) for c in range(y_test_pred_TA.shape[1])]
dic_rmse_test = [mean_squared_error(Y_numpy_test[:,0], y_test_pred_DIC[:,c], squared=False) for c in range(y_test_pred_DIC.shape[1])]
ta_rmse_test  = [mean_squared_error(Y_numpy_test[:,1], y_test_pred_TA[:,c], squared=False) for c in range(y_test_pred_TA.shape[1])]

df_ensemble_test = pd.DataFrame({'r2_DIC': dic_r2_test, 'rmse_DIC': dic_rmse_test, 'r2_TA': ta_r2_test, 'rmse_TA': ta_rmse_test})
df_ensemble_test.to_csv(ens_out_test)
df_ensemble_test.apply(minmax).style.format('{:.3f}')

Unnamed: 0,r2_DIC,rmse_DIC,r2_TA,rmse_TA
min,0.941,16.489,0.981,8.895
max,0.95,17.807,0.986,10.336
median,0.946,17.117,0.984,9.539
