RCA uses a network output to train a classifier which can be applied to other labelled data. This notebook attempts to use random forest segmentation to achieve this.
THIS STUFF WILL ONLY WORK ON IMAGES, NOT VOLUMES.

In [None]:
from tensorflow.keras.models import model_from_json,clone_model

from mask_utils import iou,dsc,mean_contour_distance,symmetric_hausdorff_distance

from network_utils import augmentImageSequence, gpu_memory_limit

import numpy as np

import os

from sklearn.model_selection import train_test_split

import copy

import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier

from scipy import signal

import warnings

In [None]:
gpu_memory_limit(8000)

Load data and model, and get them sorted in the same way as in the notebooks used to train models (i.e. same train/test split, random seed etc)

In [None]:
DataDir = './data/pericardial/wsx_round2/'

#load data - these files created by extract_dcm_for_wsx.ipynb
X = np.load(os.path.join(DataDir,'X.npy'))
Y = np.load(os.path.join(DataDir,'Y.npy')).astype('float')
pxArea = np.load(os.path.join(DataDir,'pxSize.npy'))
pxSpacing = np.sqrt(pxArea)

#ensure the shape is correct arrays saved were rank 3, so this changes to rank 4 (last dimension represents channels)
X = X.reshape([*X.shape,1])
Y = Y.reshape([*Y.shape,1])

#do train/test split!
X_train, X_test, Y_train, Y_test,pxArea_train,pxArea_test,pxSpacing_train,pxSpacing_test = train_test_split(X, Y, pxArea,pxSpacing, test_size=0.2,random_state=101)

# X = X[:200,:,:,:]
# Y = Y[:200,:,:,:]

#
# M = X.shape[0]
# MTest = X_test.shape[0]

Now, need to load a model which can be used for the RCA

In [None]:
#pick a model, just need one to play with.
modelBaseName = './data/models/mrunet_2020-04-07_09:59' #THIS MODEL IS NOT THE BEST ONE BUT HAS BEEN SELECTED TO GIVE A WIDE SPREAD IN IOU ON TRAIN AND TEST SETS

#load the model archistecture
with open( modelBaseName + '.json', 'r') as json_file:
    model = model_from_json( json_file.read() )
    
#get the weights
model.load_weights(modelBaseName + '.h5')    

In [None]:
def cuboid_mean(image,cuboidRadii):

    '''Takes the mean of 
    image is an image. 
    cuboidRadii is a tuple of ints, representing the radii of 
    actually the rectangular mean, as this will only work on images (not volumes or hypervolumes).'''
    
    #pad the image with edge pixels so that edges don't tend to 0 after convolution
    pads = [(d,d) for d in cuboidRadii] # convert radii to symmetric tuples for each dimension
    image = np.pad(image,pad_width=(pads),mode='edge')
    
    dim = [1+2*d for d in cuboidRadii] #only odd edge lengths allowed, as centers must be unambiguous
    
    #create block for convolution
    block = np.ones(dim,dtype='float')
    
    #do the convolution
    cuboidMean = signal.convolve(image,block,method = 'direct',mode='valid') #valid convolution will reduce size of array back to original dims.
    
    return cuboidMean
    
def cuboid_offset_mean_difference(image,cuboidRadii,offsets):
    
    '''nonlocal feature generation - takes the cuboid mean (actually rectangular mean) and subtracts it from each pixel, but offset by some dimensions specified by offset (which should contain x and y)'''
    
    #pad the edges using the offsets - which must be on the correct side i.e. before if negative, after if positive. This bit will work for higher-dimensions!
    pads = []

    for offset in offsets:
        if offset < 0:
            pads.append((abs(offset),0))
        else:
            pads.append((0,offset))
    
    image_padded = np.pad(image,pad_width=pads,mode='edge')
    
    #calculate the cuboid mean
    cuboidMean = cuboid_mean(image_padded, cuboidRadii)
        
    #keep the bit corresponding to the original shape, allows elementwise subtractions
    indices = []
    for dim,offset in zip(cuboidMean.shape,offsets):
        if offset < 0:
            indices.append(slice(-dim,offset))
        else:
            indices.append(slice(offset,dim))
            
    cuboidMean = cuboidMean[tuple(indices)]
    
    cuboidMeanDifference = image - cuboidMean
    
    return cuboidMeanDifference

def image2features(image):
    
    '''take an image (of arbitrary dimension) and convert it to an array of (npixels,nfeatures)'''
    
    npx = np.product(image.shape[:-1])
    
    nfeatures = image.shape[-1]
    
    features = image.reshape(npx,nfeatures)
    
    return features


def feature_engineer_function(nFeatures=100,maxRadius=5,maxOffset=20,random_seed = None):
    
    '''this function RETURNS A FUNCTION which can be used to process images into (npixels,nfeatures)'''
    
    #calculate the maximum number of unique parameter sets that can be generated with the maximum parameters specified.
    maxParameterSets = maxRadius**2 * (maxOffset*2 - 1)**2
    
    assert maxParameterSets > nFeatures,'you are trying to generate more features than are mathematically possible (max is ' + str(maxParameterSets) + ')'
    
    #generate a big set of parameters that can be fed into 
    radii = np.random.randint(low=0,high=maxRadius,size=(nFeatures,2))
    offsets = np.random.randint(low=-maxRadius+1,high=maxRadius,size=(nFeatures,2))

    def feature_Function(image):
        
        #only need to use the cuboid_offset_mean_difference as cuboid_mean is a special case of this (with offsets = [0,0])    

        feat = np.stack([cuboid_offset_mean_difference(image.squeeze(),radius,offset) for radius,offset in zip(radii,offsets)],axis=-1)        
        feat = image2features(feat)
        
        return feat
    
    return feature_Function

In [None]:
def train_rca_model(x,y):
    
    ''''''
#     assert x.shape[0] == 1 and y.shape[0]==1, 'you can only do RCA on one image at a time! or alternatively the images are the wrong shape'
    
    #threshold mask so that it can be used as a target for another classifier
    y = y > 0.5
    
    
    #generate feature engineering function which can be reused
    feature_Function = feature_engineer_function()
       
    #reshape multidimensional image into (npx,nfeatures) array
    x = feature_Function(x)
    print(x.shape)
    y = image2features(y).flatten() #FIXME this only works for cases with one output class.
    print(y.shape)
    #instantiate model and fit - hyperparameters from Valindria at al 2017 currently
    rca_model = RandomForestClassifier(n_estimators=50,
                                   max_depth=30,
                                   n_jobs=4, #don't be a dick
                                  )
    
    rca_model.fit(x,y)
    
    return rca_model,feature_Function

In [None]:
train_rca_model(X[0].squeeze(),Y[0])

In [None]:
def evaluate_model(model,X_val,Y_val):
    
    '''this function takes a model (presumably retrained on a predicted mask in order to do RCA) and evaluates it on the set of masks which are known'''

#     assert np.all(X.shape==Y.shape),'looks like you have mismatched your images and masks'
#     assert X.shape[0]>1,'you should only use this on more than one image. Are you doing what you think youre doing?
    
    
    Y_pred = model.predict(X_val)
    
    ious = np.array([iou(Y_pred[m],Y_val[m]) for m in range(Y_pred.shape[0])])
    
    
    return ious

In [None]:
def predict_and_RCA_evaluate(model,x,X_val,Y_val):

    assert np.all(model.input_shape[1:] == x.shape[1:]),'image input shape and model input do not match - have you reshaped the image correctly?'
    assert x.shape[0] == 1, 'you can only do RCA on one image at a time!'
    
    y = model.predict(x)
    
    rca_model,feature_Function = train_rca_model(x,y)
    
    ious = evaluate_model(rca_model,X_val,Y_val)
    
    return ious#,mcd

First, we should look at the iou spread over the whole thing... The IOUs of all of the data we plan to use, to evaluate the *original* model

In [None]:
trueIOUs = evaluate_model(model,X_test,Y_test)

plt.hist(trueIOUs,bins = np.arange(0,1.05,0.05))

Now, lets just get a single datapoint to play with... 

In [None]:
#select an example image
np.random.seed(7)
egInd = np.random.randint(X_test.shape[0])

#get the IOU that we want to predict...
trueIOU = trueIOUs[egInd]

#get the actual image out and shaped correctly
egX = X_test[egInd,:,:].reshape(1,*model.input_shape[1:])

#get all images EXCEPT that one, from both X and Y
mask = np.ones(X_test.shape[0],dtype=bool)
mask[egInd] = False
X_val = X_test[mask,:,:,:]
Y_val = Y_test[mask,:,:,:]

predictedIOUs,fitHistory = predict_and_RCA_evaluate(model,egX,X_val,Y_val)

Additionally, there are considerations about which evaluation set should be used. It seems to me that thw whole point of this is to do with looking for similarities to the training set - so this should be used for evaluation

In [None]:
#get the true IOU for all of the test set
trueIOUs = evaluate_model(model,X_test,Y_test)

MTest =  X_test.shape[0]
M = X_train.shape[0]

predIOUs = np.zeros((MTest,M))

#loop over each test set example
for ind in range(MTest):

    predIOUs[ind,:] = predict_and_RCA_evaluate(model,egX,X_train,Y_train,optimizer=OPT,epochs=EPOCHS,dataGenArgs=dataGenArgs)
    

In [None]:
from scipy.stats import pearsonr

In [None]:
plt.figure(figsize = (5,15))

y = np.max(predIOUs,axis=1)
plt.subplot(3,1,1)
plt.plot([0,1],[0,1],c='k')
plt.scatter(trueIOUs,y)
plt.title'r = ' + (f'{pearsonr(trueIOUs,y)[0]:.02}')
plt.xlim([0,1])
plt.ylim([0,1])
plt.ylabel('predicted IOU (max)')                      

y = np.median(predIOUs,axis=1)            
plt.subplot(3,1,2)
plt.plot([0,1],[0,1],c='k')
plt.scatter(trueIOUs,y)
plt.title('r = ' + f'{pearsonr(trueIOUs,y)[0]:.02}')
plt.xlim([0,1])
plt.ylim([0,1])
plt.ylabel('predicted IOU (median)')                      
  
y = np.mean(predIOUs,axis=1)
plt.subplot(3,1,3)
plt.plot([0,1],[0,1],c='k')
plt.scatter(trueIOUs,y)
plt.title('r = ' + f'{pearsonr(trueIOUs,y)[0]:.02}')
plt.xlim([0,1])
plt.ylim([0,1])
plt.ylabel('predicted IOU (mean)')                      

            
plt.xlabel('true IOU')


So, that suuuuuucks. Give up.