In [3]:
%matplotlib inline
%autosave 60
import numpy as np
import pandas as pd
from glob import glob
import os
from sklearn.model_selection import train_test_split
import shutil
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator as kerasImageDataGenerator

Autosaving every 60 seconds


Using TensorFlow backend.


In [1]:
# DEFINE FUNCTIONAL PARAMETERS
train_again = 0
split_and_organize = 0     # 1 to organize and split all images by label and to split train/validation, 0 if already done
print_model_graph = 1

# DEFINE PATHS
IMAGES_PATH = 'dataset\input\*.tif'
LABELS_PATH = r'dataset\train_labels.csv'
TRAINING_PATH = 'training'
VALIDATION_PATH = 'validation'
MODEL_CHECKPOINT_FILE = 'cancer_detector_nasnes.h5'
MODEL_HISTORY_FILE = 'cancer_detector_nasnes.h5'
TRAINING_LOGS_FILE = 'training_logs.csv'


# DEFINE HYPER-PARAMETERS
TRAINING_BATCH_SIZE = 32
LR_INITIAL = 0.0001
LR_DECAY = 0.00001
TRAINING_EPOCHS = 10

## Data Generator Functions with Augmentation

In [4]:
## GENERATOR FUNCTIONS WITH DATA AUGMENTATION
# 1. Keras image data generators
datagen = kerasImageDataGenerator(
                rescale=1./255,
                horizontal_flip=True,
                vertical_flip=True,
                rotation_range=90,
                width_shift_range=0.1,
                height_shift_range=0.1,
                zoom_range=0.2,
                shear_range=0.05,
                channel_shift_range=0.1)

validation_gen = kerasImageDataGenerator(rescale=1./255)

# 2. Keras flow from directory
train_generator = datagen.flow_from_directory(TRAINING_PATH,
                                              target_size=(96, 96),
                                              batch_size=TRAINING_BATCH_SIZE,
                                              class_mode='binary')

validation_generator = validation_gen.flow_from_directory(VALIDATION_PATH,
                                                          target_size=(96, 96),
                                                          batch_size=TRAINING_BATCH_SIZE,
                                                          class_mode='binary')

Found 153000 images belonging to 2 classes.
Found 17000 images belonging to 2 classes.


## Model Construction (NASNet Mobile)

In [6]:
from keras.applications.xception import Xception
from keras.applications.nasnet import NASNetMobile
from keras.layers import Input, Concatenate, GlobalMaxPooling2D, GlobalAveragePooling2D, Flatten, Dropout, Dense
from keras.models import Model
from keras.optimizers import Adam

input_shape = (96, 96, 3)

inputs = Input(input_shape)

base_model = NASNetMobile(include_top=False, input_shape=input_shape)
x = base_model(inputs)
out1 = GlobalMaxPooling2D()(x)
out2 = GlobalAveragePooling2D()(x)
out3 = Flatten()(x)
outputs = Concatenate(axis=-1)([out1, out2, out3])
outputs = Dropout(0.5)(outputs)
outputs = Dense(1, activation='sigmoid')(outputs)

model = Model(inputs, outputs)
model.compile(optimizer=Adam(lr=LR_INITIAL, decay=LR_DECAY), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 96, 96, 3)    0                                            
__________________________________________________________________________________________________
NASNet (Model)                  (None, 3, 3, 1056)   4269716     input_1[0][0]                    
__________________________________________________________________________________________________
global_max_pooling2d_1 (GlobalM (None, 1056)         0           NASNet[1][0]                     
__________________________________________________________________________________________________
global_average_pooling2d_1 (Glo (None, 1056)         0           NASNet[1][0]                     
__________________________________________________________________________________________________
flatten_1 

In [10]:
if print_model_graph == 1:
    os.environ["PATH"] += os.pathsep + 'C:\\Users\\acamb\\AppData\\Local\\Continuum\\anaconda3\\envs\\imperial36\\Library\\bin\\graphviz'

    import graphviz
    from IPython.display import SVG
    from keras.utils.vis_utils import model_to_dot
#     display(SVG(model_to_dot(model).create(prog='dot', format='svg')))

    from keras.utils.vis_utils import plot_model
#     display(plot_model(model, to_file='model_plot.png', show_shapes=True, expand_nested=True, show_layer_names=True))

## Training

In [None]:
from livelossplot import PlotLossesKeras
from keras.callbacks import CSVLogger, ModelCheckpoint

# Callbacks
checkpoint = ModelCheckpoint(MODEL_CHECKPOINT_FILE, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
csv_logger = CSVLogger(TRAINING_LOGS_FILE, append=False, separator=';')

# Begin training
training_history = model.fit_generator(
                            train_generator,
                            steps_per_epoch=len(train_generator),
                            epochs=TRAINING_EPOCHS,
                            verbose=1,
                            validation_data=validation_generator,
                            validation_steps=len(validation_generator),
                            max_queue_size=10,
                            callbacks=[PlotLossesKeras(), checkpoint, csv_logger])

# Save Model and Training History
with open('training_history.pkl', 'wb') as f:
    pickle.dump(training_history, f)

Epoch 1/10

In [None]:
# Plot Training History
epochs = [i for i in range(1, len(training_history.history['loss'])+1)]

plt.plot(epochs, training_history.history['loss'], label='Train Loss')
plt.plot(epochs, training_history.history['val_loss'], label='Validation Loss')
plt.legend(loc='best')
plt.title('Loss')
plt.xlabel('Epochs')
plt.show()

plt.plot(epochs, training_history.history['acc'], label='Train Loss')
plt.plot(epochs, training_history.history['val_acc'], label='Validation Loss')
plt.legend(loc='best')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.show()

## Validation Performance: ROC, AUC

In [None]:
# Validation ROC (TP vs FP)
from sklearn.metrics import roc_curve, auc

roc_validation_generator = kerasImageDataGenerator(rescale=1./255)
roc_validation_generator = roc_validation_generator.flow_from_directory(VALIDATION_PATH,
                                                                        target_size=(96,96),
                                                                        batch_size=60,
                                                                        class_mode='binary',
                                                                        shuffle=False)

predictions_roc = model.predict_generator(roc_validation_generator, steps=len(roc_validation_generator), verbose=1)

# Plot ROC Curve Validation dataset
FP, TP, thresh = roc_curve(roc_validation_generator.classes, predictions_roc)
AUC = auc(FP, TP)
plt.plot([0, 1], [0,1], 'k--')
plt.plot(FP, TP, label='AUC = {:.3f}'.format(AUC))
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()