This notebook is for exploring ideas of using different levels of dropout for making predictions and testing whether they are good or not

In [None]:
import numpy as np

import os

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

import tensorflow as tf

from tensorflow.keras.models import Model,model_from_json

from scipy.stats import pearsonr

from mask_utils import show_image_with_masks,iou,symmetric_hausdorff_distance,mean_contour_distance,dsc

from network_utils import gpu_memory_limit,augmentImageSequence

from MultiResUNet.MultiResUNet import MultiResUnet

import itertools

import pandas as pd

from IPython.display import clear_output

In [None]:
#limit how much GPU RAM can be allocated by this notebook... 8GB is 1/3 of available
gpu_memory_limit(8000)

In [None]:
#graph outputs...
DataDir = './data/pericardial/wsx_round2/'

splitDataFile = os.path.join(DataDir,'splitData.pickle')

if os.path.isfile(splitDataFile):
    splitData = pickle.load(open(splitDataFile,'rb'))
    X, X_test, Y, Y_test,pxArea,pxArea_test,pxSpacing,pxSpacing_test = splitData
    
else:
    from sklearn.model_selection import train_test_split

    #load data - these files created by extract_dcm_for_wsx.ipynb
    X = np.load(os.path.join(DataDir,'X.npy'))
    Y = np.load(os.path.join(DataDir,'Y.npy')).astype('float')
    pxArea = np.load(os.path.join(DataDir,'pxSize.npy'))
    pxSpacing = np.sqrt(pxArea)

    #ensure the shape is correct arrays saved were rank 3, so this changes to rank 4 (last dimension represents channels)
    X = X.reshape([*X.shape,1])
    Y = Y.reshape([*Y.shape,1])

    #do train/test split!
    splitData = train_test_split(X, Y, pxArea,pxSpacing, test_size=0.2,random_state=101)
    pickle.dump(splitData,open(splitDataFile,'wb'))
    #extract individual bits
    X, X_test, Y, Y_test,pxArea,pxArea_test,pxSpacing,pxSpacing_test = splitData

del splitData #as this variable is o longer required   


M = X.shape[0]
MTest = X_test.shape[0]
imShape = (1,*X.shape[1:])

In [None]:
#load the model
modelBaseName = 'mrunet_bayesian_2020-06-15_17:46' 

modelBaseName = os.path.join('data','models',modelBaseName)

modelParamFile = modelBaseName + '.h5'

#make sure you know this!!!!!!!!!
originalDropoutRate = 0.15


In [None]:
#FUNCTIONS FOR DOING STOCHASTIC PREDICTIONS...

#FIXMMEEEEEEEE make it so these can be called on arrays where M>1!!!!! BECAUSE THIS SUCKS

def global_iou(predictions):
    
    '''takes the iou of multiple different segmentations'''
    
    intersection = np.min(predictions,axis=0).sum()
    union = np.max(predictions,axis=0).sum()
    
    return intersection / union

def global_dsc(predictions):
    
    N = predictions.shape[0]
    numerator = N * np.min(predictions,axis=0).sum()
    denominator = predictions.sum()
    
    return numerator/denominator
    
def mean_pairwise_iou(predictions):
    
    #all combinations of inputs
    ious = [iou(a,b) for a,b in itertools.combinations(predictions,2)]
    
    return np.mean(ious)

def mean_pairwise_dsc(predictions):
    
    #all combinations of samples, which will be axis 0
    dscs = [dsc(a,b) for a,b in itertools.combinations(predictions,2)]
    
    return np.mean(dscs)
    
def voxel_uncertainty(predictions):
    
    '''voxel-wise uncertainty as defined in Roy et al (2018)'''
    
    #strcture-and-voxel-wise uncertainty (compresses over the sample axis
    feature_uncertainty = -np.sum(predictions*np.log(predictions),axis = 0)
    #global uncertainty is the sum over the feature axis
    global_uncertainty = np.sum(feature_uncertainty,axis=-1)
    
    return global_uncertainty
    
def mean_std_area(predictions):
    
    '''the area occupied by each segmented channel. outputs two array: mean and standard deviation
    RETURNS ANSWERS IN PIXELS WHICH MUST BE RESCALED LATER!!!!!!
    '''
    #get the dims
    N = predictions.shape[0]
    nPixels = np.product(predictions.shape[1:-1])
    nFeatures = predictions.shape[-1]
    
    #reshape array so that it is (N,pixels,features) and thrshold.
    predictions = predictions.reshape((N,nPixels,nFeatures)) > 0.5
    
    #sum of voxels for each 
    areas = np.sum(predictions,axis = 1)
    
    #mean, returning a value for each segmentation channel
    mu = np.mean(areas,axis=0)
    sigma = np.std(areas,axis=0)
    
    return mu,sigma

def predict_stochastic(model,N,X):
    
    '''draw and summarise multiple predictions from a model
    Arguments:
        model {a model, for example a Keras model, with a predict method} -- is assumed to have some stochastic component, i.e. multiple
        N {int} -- the number of sample predictions to be drawn from the stochastic model
        X {numpy array, probably float} -- assumed to be already consistent with inputs to the model. MUST ONLY BE A SINGLE IMAGE AND NOT MULTIPLE STACKED!!!!!
        
    Returns:
        consensus {numpy array, boolean} -- pixelwise segmentation of x
        also various floats, representing different metrics for uncertainty and the outputs.
    '''
    
    #draw N predictions from the model over x
    predictions = np.stack([model.predict(X) for n in range(N)],axis=0)
    
    #binarise
    predictions = predictions
    
    consensus = np.mean(predictions,axis=0)>0.5 
    
    #metrics described in Roy et al...
    uncertainty = voxel_uncertainty(predictions)
    
    mpDsc = mean_pairwise_dsc(predictions)
    gDsc = global_dsc(predictions)
    
    mpIou = mean_pairwise_iou(predictions)
    gIou = global_iou(predictions)
    meanArea,stdArea = mean_std_area(predictions)
    
    return consensus,uncertainty,meanArea,stdArea,mpDsc,gDsc,mpIou,gIou

Lets examine how adjusting the dropout rate impacts on model performance - this can work in both directions...

In [None]:
def get_ious(dropoutRate,N):
    
    model = MultiResUnet(height=X.shape[1],
                                width=X.shape[2],
                                n_channels=1,
                                layer_dropout_rate=None,
                                block_dropout_rate=dropoutRate,
                               )
    
    model.load_weights(modelParamFile)    

    pred,uncertainty,meanArea,stdArea,mpDsc,gDsc,mpIouPred,gIou = map(np.array,zip(*[predict_stochastic(model=model,N=N,X=x.reshape(1,208,208,1)) for x in X_test]))

    pred = pred.reshape(*X_test.shape)
    
    IOU = np.array([iou(Y_test[m,:,:,:], pred[m,:,:]) for m in range(MTest)])
    
    return IOU

dropRates = np.array(sorted([0,10**-3,10**-2.5, 10**-2, 10**-1.5, 10**-1, 10**-0.5, 0.5,originalDropoutRate]))
nRates = len(dropRates)
ious = np.zeros((dropRates.size,MTest))

for ind,rate in enumerate(dropRates):
    clear_output()
    print('/'.join((str(ind+1),str(nRates))))
    
    ious[ind,:] = get_ious(rate,20)

clear_output()

In [None]:
#extract the original model performance..
modelHistory = pd.read_csv(os.path.join('data','models','model_history.csv'),index_col=0)
originalMeanIOU = modelHistory.loc[modelBaseName,'TestIOUMean']

In [None]:
plotX = np.log10(dropRates.copy())
plotX[0] = -5


plt.figure(figsize=(15,15))

plt.axhline(originalMeanIOU,c = 'r',linestyle='--')
plt.axvline(np.log10(originalDropoutRate),c='r',linestyle='--')

plt.errorbar(plotX,ious.mean(axis=1),ious.std(axis=1))
xlabels = list(plotX)
xlabels[0] = 'no dropout'
plt.xticks(plotX,xlabels)


plt.xlabel('log dropout rate')
plt.ylabel('IOU')

So, there is very little effect on performance from lowering the dropout rate relative to the (as expected given what dropout is usually used for). But, what happens with indivdual examples?

In [None]:
plt.figure(figsize=(12,12))

plt.plot([0,1],[0,1],c='k')

plt.scatter(ious[0,:],ious[6,:])

plt.axis('square')

plt.xlabel('iou (no dropout)')
plt.ylabel('iou (original dropout rate)')

So, there are some small changes but nothing major. Lets go onto examining how accuracy predictions vary with INCREASING dropout rate.

In [None]:
def get_metric_values(dropoutRate,N):
    
    model = MultiResUnet(height=X.shape[1],
                                width=X.shape[2],
                                n_channels=1,
                                layer_dropout_rate=None,
                                block_dropout_rate=dropoutRate,
                               )
    
    model.load_weights(modelParamFile)

    pred,uncertainty,meanArea,stdArea,mpDsc,gDsc,mpIouPred,gIou = map(np.array,zip(*[predict_stochastic(model=model,N=N,X=x.reshape(1,208,208,1)) for x in X_test]))
    
    return mpDsc,gDsc,mpIouPred,gIou

In [None]:

predictModel = MultiResUnet(height=X.shape[1],
                            width=X.shape[2],
                            n_channels=1,
                            layer_dropout_rate=None,
                            block_dropout_rate=originalDropoutRate,
                           )

predictModel.load_weights(modelParamFile)

predPred,uncertaintyPred,meanAreaPred,stdAreaPred,mpDscPred,gDscPred,mpIouPred,gIouPred = map(np.array,zip(*[predict_stochastic(model=predictModel,N=20,X=x.reshape(1,208,208,1)) for x in X_test]))
del predictModel

predPred = predPred.reshape(*X_test.shape)

#all true IOU and DSC fpr the predictions.
IOU = [iou(Y_test[m,:,:,:], predPred[m,:,:]) for m in range(MTest)]
DSC = [dsc(Y_test[m,:,:,:], predPred[m,:,:]) for m in range(MTest)]



trues = [IOU,DSC]

metricNames = ['mean pairwise Dice coefficient',
               'global Dice coefficient',
               'mean pairwise IOU',
               'global IOU'
              ]

trueNames = ['true IOU','true DSC']

names = ['/'.join((m,t)) for t,m in itertools.product(trueNames,metricNames)]

nCombs = len(names)



In [None]:
#how good are the predicted metrics generated concurrently with the segementations? This represents a comparison with varying the rate

originals = [mpDscPred,gDscPred,mpIouPred,gIouPred]

rOriginal = np.zeros(nCombs)
maeOriginal =np.zeros(nCombs)

for combInd,(t,m) in enumerate(itertools.product(trues,originals)):
        
    rOriginal[combInd] = pearsonr(t,m)[0]
    maeOriginal[combInd] = np.mean(np.abs(m-t))

In [None]:
increaseDropRates = np.arange(originalDropoutRate,0.525,0.025)

nRates = len(increaseDropRates)

nRepeats = 5

#initialise outputs for r and mae

r = np.zeros((nCombs,nRates,nRepeats))
mae = np.zeros((nCombs,nRates,nRepeats))

for rateInd,rate in enumerate(increaseDropRates):

    clear_output()
    print('/'.join((str(rateInd+1),str(nRates))))
    
    for repeat in range(nRepeats):

        mpDsc,gDsc,mpIou,gIou = get_metric_values(rate,20)

        metrics = [mpDsc,gDsc,mpIou,gIou]

        for combInd,(t,m) in enumerate(itertools.product(trues,metrics)):

            r[combInd,rateInd,repeat] = pearsonr(t,m)[0]
            mae[combInd,rateInd,repeat] = np.mean(np.abs(m-t))

clear_output()

In [None]:
rate

In [None]:
plt.figure(figsize = (15,15))

plt.subplot(3,1,1)
for combInd,name in enumerate(names):       
    l = plt.errorbar(increaseDropRates,r[combInd].mean(axis=-1),r[combInd].std(axis=-1),label= name)
    plt.axhline(rOriginal[combInd],linestyle='--',c=l[0].get_c())
plt.ylabel('pearson r')
    
    
plt.subplot(3,1,2)
for combInd,name in enumerate(names):     
    l = plt.errorbar(increaseDropRates,mae[combInd].mean(axis=-1),mae[combInd].std(axis=-1),label= name)
    plt.axhline(maeOriginal[combInd],linestyle='--',c=l[0].get_c())
plt.ylabel('MAE')


plt.subplot(3,1,3)
mult = (1-mae)*r
multOriginal = rOriginal*(1-maeOriginal)

for combInd,name in enumerate(names):     
    l = plt.errorbar(increaseDropRates,mult[combInd].mean(axis=-1),mult[combInd].std(axis=-1),label= name)
    plt.axhline(multOriginal[combInd],linestyle='--',c=l[0].get_c())
plt.ylabel('r * (1-MAE)')

plt.xlabel('resampled dropout rate')
    
plt.legend()

So, interestingly, there is no difference in predictive performance between the sample used to predict and the sample used to quantify that prediction - original mae and r are no different from that using an independent sample with the same dropout rate.

Any gains in MAE/R from increasing dropout rate are extremely marginal if present at all (error bars)