This notebook uses Bayesian framework of Roy et al (2018) in combination with the MultiResUNet architecture, but performs a grid search over the hyperparameters specific to this framework:
 - r, the dropout probability
 - the position of the dropout layers (either for every convolutional layer, or at the end of every residual block)
 - N, the number of samples drawn from the stochastic network for each prediction/QC

In [None]:
import numpy as np

import os

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import initializers
from tensorflow.keras.models import Model

from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as keras#
from tensorflow.keras import callbacks
from tensorflow.keras import metrics

from scipy.stats import pearsonr

# from custom_losses import binary_crossentropy_weight_balance, binary_crossentropy_weight_dict, binary_crossentropy_closeness_to_foreground,dice_coef_loss

from mask_utils import show_image_with_masks,iou,symmetric_hausdorff_distance,mean_contour_distance,dsc

from network_utils import gpu_memory_limit,augmentImageSequence

from MultiResUNet.MultiResUNet import MultiResUnet

import itertools

import pickle
import pandas as pd
from IPython.display import clear_output

In [None]:
#limit how much GPU RAM can be allocated by this notebook... 8GB is 1/3 of available
gpu_memory_limit(8000)

In [None]:
DataDir = './data/pericardial/wsx_round2/'

#load data - these files created by extract_dcm_for_wsx.ipynb
X = np.load(os.path.join(DataDir,'X.npy'))
Y = np.load(os.path.join(DataDir,'Y.npy')).astype('float')
pxArea = np.load(os.path.join(DataDir,'pxSize.npy'))
pxSpacing = np.sqrt(pxArea)

#ensure the shape is correct arrays saved were rank 3, so this changes to rank 4 (last dimension represents channels)
X = X.reshape([*X.shape,1])
Y = Y.reshape([*Y.shape,1])

#do train/test split!
X, X_test, Y, Y_test,pxArea,pxArea_test,pxSpacing,pxSpacing_test = train_test_split(X, Y, pxArea,pxSpacing, test_size=0.2,random_state=101)

#
M = X.shape[0]
MTest = X_test.shape[0]

In [None]:
#properties for data augmentation - that does nothing except randomise the order
# dataGenArgs = dict(rotation_range=0,
#                    width_shift_range=0,
#                    height_shift_range=0,
#                    shear_range=0,#0.05,
#                    zoom_range=0,
#                    horizontal_flip=False,
#                    vertical_flip=False,
#                    fill_mode='nearest',
#                    data_format= 'channels_last',
#                    featurewise_center=False,
#                    featurewise_std_normalization=False,
#                    zca_whitening=False,
#                   )

# #REAL properties for data augmentation
dataGenArgs = dict(rotation_range=10,
                   width_shift_range=0.1,
                   height_shift_range=0.1,
                   shear_range=0.05,
                   zoom_range=0.1,
                   horizontal_flip=False, #DO NOT FLIP THE IMAGES FFS
                   vertical_flip=False,
                   fill_mode='nearest',
                   data_format= 'channels_last',
                   featurewise_center=False,
                   featurewise_std_normalization=False,
                   zca_whitening=False,
                  )



earlyStop = callbacks.EarlyStopping(patience=10, #be a bit patient...
                                    min_delta=0,
                                    monitor='loss',
                                    restore_best_weights=True,
                                    mode='min',
                                   )

reduceLR = callbacks.ReduceLROnPlateau(monitor='val_loss',
                                       patience=5,
                                       factor=0.3,
                                       verbose=1,
                                       cooldown=5,
                                      )

CALLBACKS = [earlyStop,
             reduceLR
            ]

OPT = Adam(learning_rate = 1e-2,
           beta_1 = 0.9,
           beta_2 = 0.999,
           amsgrad = False
          )

#other hyperparameters
BATCHSIZE = 8 #THIS MATTERS A LOT

Instantiate and train the model.

In [None]:

#function which takes only arguments regarding the stochastic network components as inputs, and returns a fitted model object 
def train_model(dropout_rate,dropout_position):
        
    if dropout_position == 'layer':
        layer_dropout_rate = dropout_rate
        block_dropout_rate = None
    elif dropout_position == 'block':
        layer_dropout_rate = None
        block_dropout_rate = dropout_rate

    keras.clear_session()

    tf.random.set_seed(101) #FIXME!!! this is not sufficient to guarantee deterministic behaviour during fitting.

    model = MultiResUnet(height=X.shape[1],
                         width=X.shape[2],
                         n_channels=1,
                         layer_dropout_rate=layer_dropout_rate,
                         block_dropout_rate=block_dropout_rate
                        )

    model.compile(optimizer = OPT, 
                  loss = 'binary_crossentropy',
    #               loss = binary_crossentropy_weight_balance,
    #               loss = binary_crossentropy_closeness_to_foreground(sigma=SIGMA),
#                   loss = dice_coef_loss,
#                   metrics = ['accuracy',metrics.MeanIoU(num_classes=2)],
                  metrics = ['accuracy']
                 )

    fitHistory = model.fit(augmentImageSequence(X,Y,dataGenArgs,batchSize=BATCHSIZE),
                           epochs = 300, #normally training stops at like 50/60 epochs, so it is very unlikely this will ever be used
                           steps_per_epoch= M//BATCHSIZE, #obvs
                           workers=2,
                           use_multiprocessing=True,
                           validation_data=(X_test,Y_test.astype('float')),
                           callbacks=CALLBACKS,
                           verbose=0,
                          )
    
    return model

In [None]:
#FUNCTIONS FOR DOING STOCHASTIC PREDICTIONS...

#FIXMMEEEEEEEE make it so these can be called on arrays where M>1!!!!! BECAUSE THIS SUCKS

def global_iou(predictions):
    
    '''takes the iou of multiple different segmentations'''
    
    intersection = np.min(predictions,axis=0).sum()
    union = np.max(predictions,axis=0).sum()
    
    return intersection / union

def global_dsc(predictions):
    
    N = predictions.shape[0]
    numerator = N * np.min(predictions,axis=0).sum()
    denominator = predictions.sum()
    
    return numerator/denominator
    
def mean_pairwise_iou(predictions):
    
    #all combinations of inputs
    ious = [iou(a,b) for a,b in itertools.combinations(predictions,2)]
    
    return np.mean(ious)

def mean_pairwise_dsc(predictions):
    
    #all combinations of samples, which will be axis 0
    dscs = [dsc(a,b) for a,b in itertools.combinations(predictions,2)]
    
    return np.mean(dscs)
    
def voxel_uncertainty(predictions):
    
    '''voxel-wise uncertainty as defined in Roy et al (2018)'''
    
    #strcture-and-voxel-wise uncertainty (compresses over the sample axis
    feature_uncertainty = -np.sum(predictions*np.log(predictions),axis = 0)
    #global uncertainty is the sum over the feature axis
    global_uncertainty = np.sum(feature_uncertainty,axis=-1)
    
    return global_uncertainty
    
def mean_std_area(predictions):
    
    '''the area occupied by each segmented channel. outputs two array: mean and standard deviation
    RETURNS ANSWERS IN PIXELS WHICH MUST BE RESCALED LATER!!!!!!
    '''
    #get the dims
    N = predictions.shape[0]
    nPixels = np.product(predictions.shape[1:-1])
    nFeatures = predictions.shape[-1]
    
    #reshape array so that it is (N,pixels,features) and thrshold.
    predictions = predictions.reshape((N,nPixels,nFeatures)) > 0.5
    
    #sum of voxels for each 
    areas = np.sum(predictions,axis = 1)
    
    #mean, returning a value for each segmentation channel
    mu = np.mean(areas,axis=0)
    sigma = np.std(areas,axis=0)
    
    return mu,sigma

def predict_stochastic(model,N,X):
    
    '''draw and summarise multiple predictions from a model
    Arguments:
        model {a model, for example a Keras model, with a predict method} -- is assumed to have some stochastic component, i.e. multiple
        N {int} -- the number of sample predictions to be drawn from the stochastic model
        X {numpy array, probably float} -- assumed to be already consistent with inputs to the model. MUST ONLY BE A SINGLE IMAGE AND NOT MULTIPLE STACKED>
        
    Returns:
        consensus {numpy array, boolean} -- pixelwise segmentation of x
        also various floats, representing different metrics for uncertainty and the outputs.
    '''
    
    #draw N predictions from the model over x
    predictions = np.stack([model.predict(X) for n in range(N)],axis=0)
    
    predictions = predictions

    #binarise the summary
    consensus = np.mean(predictions,axis=0)>0.5 
    
    #metrics described in Roy et al...
    uncertainty = voxel_uncertainty(predictions)
    
    mpDsc = mean_pairwise_dsc(predictions)
    gDsc = global_dsc(predictions)
    
    mpIou = mean_pairwise_iou(predictions)
    gIou = global_iou(predictions)
    meanArea,stdArea = mean_std_area(predictions)
    
    return consensus,uncertainty,meanArea,stdArea,mpDsc,gDsc,mpIou,gIou

In [None]:
def summary_metrics(model,N):
    
    '''takes a trained model as input, and returns summary metrics for the test set'''
    
    predTest,uncertainty,meanArea,stdArea,mpDsc,gDsc,mpIou,gIou = map(np.array,zip(*[predict_stochastic(model,N,x.reshape(1,208,208,1)) for x in X_test]))
    predTest = predTest.reshape(87,208,208,1) #hack
    
    #loop over th eexample axis, calculating metrics for each image separately
    TestIOU = [iou(Y_test[m,:,:,:], predTest[m,:,:,:]) for m in range(MTest)]
    TestDSC = [dsc(Y_test[m,:,:,:], predTest[m,:,:,:]) for m in range(MTest)]
    
    #
    
    #needs to return MODEL PERFORMANCE - actual metrics not required for this...
    
    return np.mean(TestIOU),np.mean(TestDSC),TestIOU,TestDSC,mpDsc,gDsc,mpIou,gIou
    
    

In [None]:
resNames = ['dropoutPosition',
            'r',
            'N',
            'true mean IOU',
            'true mean DSC',
            'true IOU',
            'true DSC',
            'mean pairwise DSC',
            'global DSC',
            'mean pairwise IOU',
            'global IOU'
           ]

In [None]:
#all hyperparameter combinations
dropoutPositions = ['layer','block']
dropoutRates = [0.01,0.05,0.1,0.15,0.2,0.25,0.3,0.4,0.5]
Ns = [5,10,15,20,25,30,50,100]
#all combinations - this ORDER is good as we want trained models to be reused as far as possible...
dropoutPositions,dropoutRates,Ns = [x.flatten() for x in np.meshgrid(dropoutPositions,dropoutRates,Ns)]


#set "previous" vars to values not used.
previousPos = '808'
previousR = 909

inputParams = list(zip(dropoutPositions,dropoutRates,Ns))

DATAFILE = './data/Bayesian_hyperparameter_tuning.pickle'
#initialise results list - check if it's been done already and trim list of parameters to be executed if it has
if os.path.isfile(DATAFILE):
    results = pickle.load(open(DATAFILE,'rb'))
    inputParams = inputParams[len(results):]
else:
    results = []

#loop over
for ind,params in enumerate(inputParams):
    clear_output()
    print('/'.join((str(ind+1),str(len(inputParams)))))
    
    pos,r,N = params
    
    if previousPos != pos and previousR != r: #i.e. if we need to train a new model
        model = train_model(r,pos)
    
    #overwrite previous values
    previousPos = pos
    previousR = r
    
    #now use the current value of N to sample from the model and record how well it does, add to params for a dict output
    res = params + summary_metrics(model,N)
    
    #create a dict for this iteration
    resultDict = dict(zip(resNames,res))
    
    #add to list and write a pickle out..
    results.append(resultDict)
    
    pickle.dump(results,open(DATAFILE,'wb+'))    