# Estimate uncertainty using model ensemble
Created by Ivan Lima on Mon Apr  4 2022 13:29:57 -0400

In this notebook we evaluate uncertainty for the model estimates of DIC and TA by training an ensemble of 50 models on the training dataset and using those models to make predictions on the test dataset. Model node weights are randomly initialized dusring model instatiation. Standard deviation is computed for each DIC and TA prediction on the test dataset.

In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os, datetime, warnings
from tqdm import notebook
print('Last updated on {}'.format(datetime.datetime.now().ctime()))

Last updated on Mon Apr 11 11:40:33 2022


In [2]:
sns.set_theme(context='paper', style='ticks', palette='tab10', rc={'figure.dpi':100, 'figure.figsize':[5, 5], 'axes.grid':True})
pd.options.display.max_columns = 50
warnings.filterwarnings('ignore')

## Read merged bottle satellite data 

In [3]:
df = pd.read_csv('data/bottle_satellite_data_clean.csv', parse_dates=['Date'], index_col=0)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3623 entries, 0 to 3779
Data columns (total 34 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   EXPOCODE       2488 non-null   object        
 1   Accession      1124 non-null   float64       
 2   Cruise_ID      3044 non-null   object        
 3   Date           3623 non-null   datetime64[ns]
 4   Year           3623 non-null   int64         
 5   Month          3623 non-null   int64         
 6   Day            3623 non-null   int64         
 7   Time_UTC       3543 non-null   object        
 8   Latitude       3623 non-null   float64       
 9   Longitude      3623 non-null   float64       
 10  Depth          3623 non-null   float64       
 11  Salinity       3623 non-null   float64       
 12  Temperature    3623 non-null   float64       
 13  Oxygen         3533 non-null   float64       
 14  DIC            3623 non-null   float64       
 15  DIC_FLAG       3329 n

## Select input features and split data into training and test sets 

In [4]:
from sklearn.model_selection import train_test_split, cross_val_score, KFold

# Set input and output variables
# features = ['Depth', 'bottom_depth', 'Temperature', 'Salinity', 'Oxygen', 'pCO2_monthave', 'SLA', 'SST_hires', 'log_KD490']
# features = ['Depth', 'bottom_depth', 'Temperature', 'Salinity', 'pCO2_monthave', 'SLA', 'SST_hires', 'log_KD490']
features = ['Depth', 'bottom_depth', 'Temperature', 'Salinity', 'pCO2_monthave']
targets  = ['DIC', 'TA']

data = df[features + targets].dropna()

X_numpy = data[features].values
Y_numpy = data[targets].values

# Split data into training and test sets
X_numpy_train, X_numpy_test, Y_numpy_train, Y_numpy_test = train_test_split(X_numpy, Y_numpy, random_state=42)

print('Training set: {}\nTest set: {}'.format(X_numpy_train.shape, X_numpy_test.shape))

# set suffix for output file names
if 'Oxygen' not in features:
    if 'SLA' not in features:
        suffix = '_no_sat'
    else:
        suffix = '_noO2'
else:
    suffix = ''

outfile_train =  'data/uncertainty_trainset{}.csv'.format(suffix)
outfile_test =  'data/uncertainty_testset{}.csv'.format(suffix)

Training set: (2717, 5)
Test set: (906, 5)


## Rescale data 

Neural networks are very sensitive to the scale and distribution of each feature. Therefore, we rescale input features so they have $\overline{x}=0$ and $\sigma=1$.

In [5]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler

scaler = StandardScaler().fit(X_numpy_train)
X_numpy_train_scaled = scaler.transform(X_numpy_train)
X_numpy_test_scaled = scaler.transform(X_numpy_test)
X_numpy_scaled = scaler.transform(X_numpy)

## Run ensemble of models 

In [6]:
import torch
import torch.nn as nn
from sklearn.metrics import r2_score

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device

# convert numpy arrays to pytorch tensors
X_train = torch.from_numpy(X_numpy_train_scaled.astype(np.float32))
Y_train = torch.from_numpy(Y_numpy_train.astype(np.float32))
X_test = torch.from_numpy(X_numpy_test_scaled.astype(np.float32))

# set neural network parameters
n_features = X_train.size()[1] # number of input variables
n_targets = Y_train.size()[1]  # number of output variables
n_hidden = 256                 # number of hidden layers
learning_rate = 0.001

# torch.manual_seed(42) # set random number seed to make things reproducible

# create neural network regression model
class MLPReg(nn.Module):
    def __init__(self, n_features, n_hidden, n_targets):
        super(MLPReg, self).__init__()
        self.l1    = nn.Linear(n_features, n_hidden)
        self.l2    = nn.Linear(n_hidden, n_hidden)
        self.l3    = nn.Linear(n_hidden, n_targets)
        self.activ = nn.LeakyReLU()
        # nn.init.normal_(self.l1.weight, mean=0.0, std=0.01)
        # nn.init.normal_(self.l2.weight, mean=0.0, std=0.01)
        # nn.init.normal_(self.l3.weight, mean=0.0, std=0.01)        
    
    def forward(self, x):
        out = self.l1(x)
        out = self.activ(out)
        out = self.l2(out)
        out = self.activ(out)
        out = self.l3(out)
        return out

loss_func = nn.MSELoss()                                                      # loss function (mean square error)

loss_vals = []   # keep loss function values for plotting

# store predictions
y_train_pred_DIC_list = []
y_train_pred_TA_list = []
y_test_pred_DIC_list = []
y_test_pred_TA_list = []

n_passes = 10000

def reset_weights(m):
    for layer in m.children():
        if hasattr(layer, 'reset_parameters'):
            print(f'Reset trainable parameters of layer = {layer}')
            layer.reset_parameters()

for k in notebook.tnrange(50):
    # create model instance
    nn_reg = MLPReg(n_features=n_features, n_hidden=n_hidden, n_targets=n_targets)
    
    # optimizer
    optimizer = torch.optim.Adam(nn_reg.parameters(), lr=learning_rate)
    
    # reset model weights (not necessary)
    # nn_reg.apply(reset_weights)
    # for name, param in nn_reg.named_parameters():
    #     if name == 'l2.weight':
    #         print(name, param)
    
    # train the model
    for n in range(n_passes):
        # forward pass
        prediction = nn_reg(X_train)
        loss = loss_func(prediction, Y_train)
        # backward pass
        optimizer.zero_grad()   # clear accumulated gradients for next pass
        loss.backward()         # do backpropagation to compute gradients
        optimizer.step()        # apply gradients to update parameters
        loss_vals.append(loss.item())
        # if (n==0) or ((n+1) % 1000 == 0):
        #     print('pass {:5d}/{}, MSE={:.2f}'.format(n+1, n_passes, loss.item()))

    # evaluate model on training & test set
    with torch.no_grad():
        Y_pred_train = nn_reg(X_train)
        Y_pred_test  = nn_reg(X_test)

#     print('Ensemble {} training set R squared: {:.3f}'.format(k+1, r2_score(Y_numpy_train, Y_pred_train)))
#     print('Ensemble {} test set R squared:     {:.3f}\n'.format(k+1, r2_score(Y_numpy_test, Y_pred_test)))
    
    # store predictions on test set
    y_train_pred_DIC_list.append(Y_pred_train[:,0].numpy())
    y_train_pred_TA_list.append(Y_pred_train[:,1].numpy())
    y_test_pred_DIC_list.append(Y_pred_test[:,0].numpy())
    y_test_pred_TA_list.append(Y_pred_test[:,1].numpy())

  0%|          | 0/50 [00:00<?, ?it/s]

In [7]:
# for name, param in nn_reg.named_parameters():
#     if name == 'l2.weight':
#         print(name, param)

## Compute standard deviations for DIC and TA predictions 

In [8]:
y_train_pred_DIC = np.array(y_train_pred_DIC_list).transpose()
y_train_pred_TA = np.array(y_train_pred_TA_list).transpose()

std_DIC_train = y_train_pred_DIC.std(axis=1)
avg_DIC_train = y_train_pred_DIC.mean(axis=1)

std_TA_train = y_train_pred_TA.std(axis=1)
avg_TA_train = y_train_pred_TA.mean(axis=1)

print('Training set DIC std: min = {:.3f}, max = {:.3f}'.format(std_DIC_train.min(), std_DIC_train.max()))
print('Training set TA std:  min = {:.3f}, max = {:.3f}'.format(std_TA_train.min(), std_TA_train.max()))

Training set DIC std: min = 1.149, max = 14.044
Training set TA std:  min = 0.903, max = 12.622


In [9]:
y_test_pred_DIC = np.array(y_test_pred_DIC_list).transpose()
y_test_pred_TA = np.array(y_test_pred_TA_list).transpose()

std_DIC_test = y_test_pred_DIC.std(axis=1)
avg_DIC_test = y_test_pred_DIC.mean(axis=1)

std_TA_test = y_test_pred_TA.std(axis=1)
avg_TA_test = y_test_pred_TA.mean(axis=1)

print('Test set DIC std: min = {:.3f}, max = {:.3f}'.format(std_DIC_test.min(), std_DIC_test.max()))
print('Test set TA std:  min = {:.3f}, max = {:.3f}'.format(std_TA_test.min(), std_TA_test.max()))

Test set DIC std: min = 1.236, max = 17.770
Test set TA std:  min = 0.864, max = 16.168


## Save computed standard deviations to CSV file

In [10]:
df_train = pd.DataFrame(np.c_[X_numpy_train, Y_numpy_train], columns= features + targets)
df_train['std_DIC'] = std_DIC_train
df_train['std_TA'] = std_TA_train
df_train['avg_DIC'] = avg_DIC_train
df_train['avg_TA'] = avg_TA_train
df_train.to_csv(outfile_train) # save stats to csv file
df_train.sort_values(['std_DIC','std_TA'], ascending=False)[:15]

Unnamed: 0,Depth,bottom_depth,Temperature,Salinity,pCO2_monthave,DIC,TA,std_DIC,std_TA,avg_DIC,avg_TA
2068,164.0,139.0,17.366,35.2278,395.36,2054.4,2332.8,14.044241,8.447131,2107.233398,2316.937744
569,4.0,779.0,6.1888,35.533,397.55,2072.58,2347.87,12.49977,7.501103,2110.588867,2322.039795
975,2.8,22.0,26.251,30.0,402.45,1916.22,2118.24,10.987578,12.622075,1886.645996,2098.029297
748,25.1,1363.0,20.119,34.682,392.59,1960.2,2287.57,10.745625,7.278294,2012.022217,2287.900391
851,2.297,14.504581,25.6692,26.96115,408.9,1748.72,1998.156953,10.332966,8.951789,1781.143311,1965.39209
1898,89.0,85.0,9.65,32.4053,395.36,2042.5,2179.9,9.779042,5.483344,2063.976318,2180.061035
2397,64.0,197.0,16.7018,33.7383,398.49,2035.4,2247.43,9.538222,6.851133,2044.109741,2253.744141
1686,9.8,17.0,22.598,32.12,392.59,2032.44,2235.38,9.404996,8.187477,1946.848267,2183.099365
1947,62.0,61.0,15.1961,32.8131,398.81,1990.5,2171.5,9.268402,6.909104,2035.239258,2198.707031
414,89.0,85.0,9.5931,32.462,395.36,2044.8,2185.0,9.13046,5.145361,2067.318848,2183.177246


In [11]:
df_test = pd.DataFrame(np.c_[X_numpy_test, Y_numpy_test], columns= features + targets)
df_test['std_DIC'] = std_DIC_test
df_test['std_TA'] = std_TA_test
df_test['avg_DIC'] = avg_DIC_test
df_test['avg_TA'] = avg_TA_test
df_test.to_csv(outfile_test) # save stats to csv file
df_test.sort_values(['std_DIC','std_TA'], ascending=False)[:15]

Unnamed: 0,Depth,bottom_depth,Temperature,Salinity,pCO2_monthave,DIC,TA,std_DIC,std_TA,avg_DIC,avg_TA
572,19.7,3801.0,16.324,33.386,382.24,1995.5,2242.7,17.770222,16.168198,2008.063599,2244.909912
358,3.0,103.0,19.933,35.36,414.86,2061.42,2333.15,13.847317,10.28201,2051.794434,2342.241943
4,328.0,836.0,13.4761,35.7263,398.81,2138.1,2340.0,12.675707,8.718031,2169.504639,2343.120361
308,1.5,1336.0,19.33,33.78,395.91,2001.8,2232.3,12.262308,10.14931,1998.741821,2256.231445
224,4.6,32.0,27.407,30.322,392.59,1868.57,2127.27,12.187196,11.274981,1866.457764,2112.842529
626,6.456,14.504581,24.867,28.7159,408.9,1805.03,2047.991054,11.410501,9.896626,1833.133179,2032.956665
323,3.0,50.0,16.941,31.13,414.86,1950.68,2128.61,10.825128,8.550876,1919.39917,2123.265869
595,2.0,1954.0,13.274,33.73,411.44,2017.0,2253.6,10.242925,8.177688,2045.712524,2262.039307
804,3.0,21.0,17.242,30.84,414.86,1941.88,2114.22,10.064697,8.420003,1912.744629,2110.172363
664,500.0,3367.0,17.534,36.465,384.73,2115.2,2387.4,10.063664,8.485571,2135.22583,2386.012207
