In [None]:
import numpy as np

import os

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import initializers
from tensorflow.keras.models import Model

from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as keras
from tensorflow.keras import callbacks
from tensorflow.keras import metrics

from scipy.stats import pearsonr

from custom_losses import binary_crossentropy_weight_balance, binary_crossentropy_weight_dict, binary_crossentropy_closeness_to_foreground,dice_coef_loss

from mask_utils import show_image_with_masks,iou,symmetric_hausdorff_distance,mean_contour_distance

from network_utils import gpu_memory_limit,augmentImageSequence

from MultiResUNet.MultiResUNet import MultiResUnet

from datetime import datetime

In [None]:
#limit how much GPU RAM can be allocated by this notebook... 8GB is 1/3 of available
gpu_memory_limit(8000)

In [None]:
#directory for keeping models, and journaling their performance/graphs
modelDir = os.path.join('data','models')
if not os.path.isdir(modelDir):
    os.mkdir(modelDir)
    
dateStr = datetime.now().strftime('%Y-%m-%d_%H:%M')
outputName = os.path.join(modelDir,'mrunet_' + dateStr)

In [None]:
DataDir = './data/pericardial/wsx_round2/'

#load data - these files created by extract_dcm_for_wsx.ipynb
X = np.load(os.path.join(DataDir,'X.npy'))
Y = np.load(os.path.join(DataDir,'Y.npy')).astype('float')
pxArea = np.load(os.path.join(DataDir,'pxSize.npy'))
pxSpacing = np.sqrt(pxArea)

#ensure the shape is correct arrays saved were rank 3, so this changes to rank 4 (last dimension represents channels)
X = X.reshape([*X.shape,1])
Y = Y.reshape([*Y.shape,1])

#do train/test split!
X, X_test, Y, Y_test,pxArea,pxArea_test,pxSpacing,pxSpacing_test = train_test_split(X, Y, pxArea,pxSpacing, test_size=0.2,random_state=101)

#
M = X.shape[0]
MTest = X_test.shape[0]

In [None]:
#properties for data augmentation - that does nothing except randomise the order
# dataGenArgs = dict(rotation_range=0,
#                    width_shift_range=0,
#                    height_shift_range=0,
#                    shear_range=0,#0.05,
#                    zoom_range=0,
#                    horizontal_flip=False,
#                    vertical_flip=False,
#                    fill_mode='nearest',
#                    data_format= 'channels_last',
#                    featurewise_center=False,
#                    featurewise_std_normalization=False,
#                    zca_whitening=False,
#                   )

# #REAL properties for data augmentation
dataGenArgs = dict(rotation_range=10,
                   width_shift_range=0.1,
                   height_shift_range=0.1,
                   shear_range=0.05,
                   zoom_range=0.1,
                   horizontal_flip=False, #DO NOT FLIP THE IMAGES FFS
                   vertical_flip=False,
                   fill_mode='nearest',
                   data_format= 'channels_last',
                   featurewise_center=False,
                   featurewise_std_normalization=False,
                   zca_whitening=False,
                  )



earlyStop = callbacks.EarlyStopping(patience=10, #be a bit patient...
                                    min_delta=0,
                                    monitor='loss',
                                    restore_best_weights=True,
                                    mode='min',
                                   )

reduceLR = callbacks.ReduceLROnPlateau(monitor='val_loss',
                                       patience=5,
                                       factor=0.3,
                                       verbose=1,
                                       cooldown=5,
                                      )

CALLBACKS = [earlyStop,
             reduceLR
            ]

OPT = Adam(learning_rate = 1e-2,
           beta_1 = 0.9,
           beta_2 = 0.999,
           amsgrad = False
          )

#other hyperparameters
BATCHSIZE = 10 #THIS MATTERS A LOT

#Spatial smoothing for pixel weights
SIGMA = 10

Instantiate and train the model.

In [None]:
keras.clear_session()

tf.random.set_seed(101) #FIXME!!! this is not sufficient to guarantee deterministic behaviour during fitting.

model = MultiResUnet(height=X.shape[1],width=X.shape[2],n_channels=1)

model.compile(optimizer = OPT, 
              loss = 'binary_crossentropy',
#               loss = binary_crossentropy_weight_balance,
#               loss = binary_crossentropy_closeness_to_foreground(sigma=SIGMA),
#               loss = dice_coef_loss,
              metrics = ['accuracy',metrics.MeanIoU(num_classes=2)],
             )

fitHistory = model.fit(augmentImageSequence(X,Y,dataGenArgs,batchSize=BATCHSIZE),
                       epochs = 300,#think about me... 
                       steps_per_epoch= M//BATCHSIZE, #obvs
                       workers=8,
                       use_multiprocessing=True,
                       validation_data=(X_test,Y_test.astype('float')),
                       callbacks=CALLBACKS,
                       verbose=1,
                      )

Lets have a look at how fitting has proceeded

In [None]:
plt.figure(figsize = (15,10))

plt.subplot(2,1,1)
plt.plot(fitHistory.history['loss'],label = 'train')
plt.plot(fitHistory.history['val_loss'],label = 'dev')
plt.ylabel('loss')
plt.ylim([0,1])
plt.legend()
plt.xticks([])

plt.subplot(2,1,2)
plt.plot(fitHistory.history['mean_io_u'],label = 'train')
plt.plot(fitHistory.history['val_mean_io_u'],label = 'dev')
plt.ylim([0,1])
plt.ylabel('mean iou')

plt.xlabel('epoch #')

plt.savefig(outputName + '_loss_history.svg')
plt.savefig(outputName + '_loss_history.png')

Look at the values actually output by the model - as there is some instability atm this is pretty important

In [None]:
predTest = model.predict(X_test)

predTrain = model.predict(X)

#show the actual distribution of output values
plt.hist(predTrain.flatten(),density=True, alpha=0.5, label = 'Train',bins = np.arange(0,1.05,0.1))
plt.hist(predTest.flatten(), density=True, alpha=0.5, label = 'Test' ,bins = np.arange(0,1.05,0.1),)
# plt.title('distribution of output values over all pixels')
plt.legend()
plt.xlabel('value')
plt.ylabel('probability density')

Lets have a look at the  distribution of IoU, hausdorff distance and mean contour distance, for each example image in train and test set.

In [None]:

#loop over th eexample axis, calculating metrics for each image separately
TrainIOU = [iou(Y[m,:,:,:], predTrain[m,:,:]) for m in range(M)]
TestIOU = [iou(Y_test[m,:,:,:], predTest[m,:,:]) for m in range(MTest)]

TrainHD = [symmetric_hausdorff_distance(Y[m,:,:,:], predTrain[m,:,:],pxSpacing[m]) for m in range(M)]
TestHD = [symmetric_hausdorff_distance(Y_test[m,:,:,:], predTest[m,:,:],pxSpacing_test[m]) for m in range(MTest)]

TrainMCD = [mean_contour_distance(Y[m,:,:,:], predTrain[m,:,:],pxSpacing[m]) for m in range(M)]
TestMCD = [mean_contour_distance(Y_test[m,:,:,:], predTest[m,:,:],pxSpacing_test[m]) for m in range(MTest)]


In [None]:
#Histograms for each of the metrics...

plt.figure(figsize = (15,5))

plt.subplot(1,3,1)

plt.hist(TrainIOU , bins = np.arange(0,1.05,0.05), density=True, alpha=0.5, label = 'Train')
plt.hist(TestIOU ,  bins = np.arange(0,1.05,0.05), density=True, alpha=0.5, label = 'Test')

plt.xlabel('Intersection-over-Union')
plt.ylabel('probability density')

# plt.legend()

plt.subplot(1,3,2)


plt.hist(TrainHD , bins = np.arange(0,125,5), density=True, alpha=0.5, label = 'Train')
plt.hist(TestHD , bins = np.arange(0,125,5), density=True, alpha=0.5, label = 'Test')

plt.xlabel('Hausdorff Distance (mm)')
# plt.ylabel('probability density')

plt.subplot(1,3,3)

plt.hist(TrainMCD , bins = np.arange(0,25,2),density=True, alpha=0.5, label = 'Train')
plt.hist(TestMCD , bins = np.arange(0,25,2), density=True, alpha=0.5, label = 'Test')

plt.xlabel('Mean Contour Distance (mm)')
# plt.ylabel('probability density')

plt.legend()


 
plt.savefig(outputName + '_metrics_histogram.svg')
plt.savefig(outputName + '_metrics_histogram.png')

How well do predicted **areas** of fat match? That is what the project is all about

In [None]:
plt.figure(figsize = (5,5))


areasPredTrain = np.sum(predTrain,axis=(1,2,3)) * pxArea / 100
areasTrueTrain = np.sum(Y,axis=(1,2,3)) * pxArea / 100

areasPredTest = np.sum(predTest,axis=(1,2,3)) * pxArea_test / 100
areasTrueTest = np.sum(Y_test,axis=(1,2,3)) * pxArea_test / 100

plt.scatter(areasTrueTrain,areasPredTrain,label = 'train',alpha=0.6)
plt.scatter(areasTrueTest,areasPredTest,label = 'test',alpha=0.6)

r,p = pearsonr(areasTrueTest,areasPredTest)

plt.title('for test set, R$^2$ = ' + f'{r**2:.03}' + ', p = ' + f'{p:.03}')

plt.xlabel('human area (cm$^2$)')

plt.ylabel('machine area (cm$^2$)')

# plt.axis('equal')
lims = [0,np.max(np.hstack((areasPredTrain,areasTrueTrain,areasPredTest,areasTrueTest)))]
plt.xlim(lims)
plt.ylim(lims)

plt.legend()

plt.savefig(outputName + '_area_correlation.svg')
plt.savefig(outputName + '_area_correlation.png')

a few examples of the training set segmentations

In [None]:

negs = 25

egs = np.random.choice(range(M), negs, replace=False)

ncols = 5
nrows = np.ceil(negs/ncols)

plt.figure(figsize = (5*ncols,5*nrows))

imShape = X.shape[1:-1]

for i in range(negs):
    
    plt.subplot(nrows,ncols,i+1)
    
    manual,automated = Y[egs[i],:,:].reshape(imShape), predTrain[egs[i],:,:].reshape(imShape) > 0.5
    
    pxS = pxSpacing[egs[i]]

    
    show_image_with_masks(image = X[egs[i],:,:].reshape(imShape),
                          masks = [manual,automated],
                          maskOptions = [{'linewidth':1,'color':'g'},{'linewidth':1,'color':'r'}]
                         )
    
    plt.title('iou = ' + f'{iou(manual,automated):.03}' + '\n' + 
              'hd = ' + f'{symmetric_hausdorff_distance(manual,automated,pxS):.03}' + '\n' +
              'mcd = ' + f'{mean_contour_distance(manual,automated,pxS):.03}')
    
plt.savefig(outputName + '_train_examples.svg')
plt.savefig(outputName + '_train_examples.png')

Examples from the test set:

In [None]:

negs = 25

egs = np.random.choice(range(MTest), negs, replace=False)

ncols = 5
nrows = np.ceil(negs/ncols)

plt.figure(figsize = (5*ncols,5*nrows))

imShape = X_test.shape[1:-1]

for i in range(negs):
    
    plt.subplot(nrows,ncols,i+1)
    
    manual,automated = Y_test[egs[i],:,:].reshape(imShape), predTest[egs[i],:,:].reshape(imShape) > 0.5
    
    pxS = pxSpacing_test[egs[i]]
    
    show_image_with_masks(image = X_test[egs[i],:,:].reshape(imShape),
                          masks = [manual,automated],
                          maskOptions = [{'linewidth':1,'color':'g'},{'linewidth':1,'color':'r'}]
                         )
    
    plt.title('iou = ' + f'{iou(manual,automated):.03}' + '\n' + 
              'hd = ' + f'{symmetric_hausdorff_distance(manual,automated,pxS):.03}' + '\n' +
              'mcd = ' + f'{mean_contour_distance(manual,automated,pxS):.03}')
    
    
plt.savefig(outputName + '_test_examples.svg')
plt.savefig(outputName + '_test_examples.png')

Now, save the model for use elsewhere, along with some performance statistics

In [None]:

#need to save architecture and weight separately as custom loss functions cause issues with loading from a single .h5
# serialize model to JSON
model_json = model.to_json()
with open( outputName + '.json', 'w') as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights(outputName + '.h5')    

Write some metrics of model performance for the future....

In [None]:
#format a line to add to the csv

modelDetails = {'Filename': outputName,
                'TrainIOUMean': str(np.mean(TrainIOU)),
                'TrainIOUStd': str(np.std(TrainIOU)),
                'TestIOUMean': str(np.mean(TestIOU)),
                'TestIOUStd': str(np.std(TestIOU)),
                'TrainHDMean': str(np.mean(TrainHD)),
                'TrainHDStd': str(np.std(TrainHD)),
                'TestHDMean': str(np.mean(TestHD)),
                'TestHDStd': str(np.std(TestHD)),
                'TrainMCDMean': str(np.mean(TrainMCD)),
                'TrainMCDStd': str(np.std(TrainMCD)),
                'TestMCDMean': str(np.mean(TestMCD)),
                'TestMCDStd': str(np.std(TestMCD))
               }

#if the file containing details of past models does not exist, then create it (with a header row)
historyFile = os.path.join(modelDir,'model_history.csv')
if not os.path.isfile(historyFile):
    
    fields = modelDetails.keys()
    
    with open(historyFile,'w+') as f:
        f.write(','.join(fields) + '\n')
        
#now write out the line of performance statistics.
with open(historyFile,'a') as f:
    f.write(','.join(modelDetails.values()) + '\n')
    
