In [4]:
#adapted from https://github.com/Narasimha1997/Blood-Cell-type-identification-using-CNN-classifier

import os 
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Dropout, MaxPool2D, Flatten
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.applications import vgg16
from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img 
from sklearn.metrics import auc, roc_curve, confusion_matrix, classification_report

In [5]:
batch_size = 16
img_width, img_height = 120, 160

train_dir = 'cell/data/train'
val_dir = 'cell/data/val'
test_dir = 'cell/data/test_set'
model_weights_dir = 'model_weights.h5'

#generators to upload training, validation, and test images
train_generator = ImageDataGenerator(
        rescale = 1./255,
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False)


train_data = train_generator.flow_from_directory(
    shuffle = True,
    batch_size = 32,
    target_size = (img_width, img_height),
    directory = train_dir)

num_classes = len(train_data.class_indices) 

val_generator = ImageDataGenerator(
                rescale= 1/.255)

val_data = val_generator.flow_from_directory(
    shuffle = True,
    batch_size = 32,
    target_size = (img_width, img_height),
    directory = val_dir)

#create convolutional neural network to classify cell images
def model():
    model = Sequential()
    model.add(Conv2D(80, (3,3), activation = 'relu', input_shape = (img_width, img_height, 3)))
    model.add(Conv2D(64, (3,3), activation = 'relu', input_shape = (img_width, img_height, 3)))
    model.add(MaxPool2D(pool_size = (2,2)))
    
    model.add(Conv2D(64, (3,3), activation = 'relu'))
    model.add(Dropout(0.25))
    
    model.add(Conv2D(32, (3,3), activation = 'relu'))
    model.add(Dropout(0.25))

    model.add(Flatten())

    model.add(Dense(128, activation = 'relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation = 'softmax'))

    model.compile(loss = 'categorical_crossentropy', optimizer = 'adadelta', metrics = ['accuracy'])
    
    return model

#used to stop training if NN shows no improvement for 6 epochs
early_stop = EarlyStopping(monitor='val_loss', min_delta=0.01, patience=6, verbose=1)
    
#checks each epoch as it runs and saves the weight file from the model with the lowest validation loss
checkpointer = ModelCheckpoint(filepath=model_weights_dir, verbose=1, save_best_only=True)

#fit model to the data
nn = model()
nn.fit_generator(train_data, steps_per_epoch = len(train_data.filenames)//batch_size, 
                 validation_data=val_data,
                 validation_steps= len(val_data.filenames) // batch_size, 
                 epochs = 30,
                 callbacks = [early_stop, checkpointer],
                 verbose = 1)

Found 4800 images belonging to 4 classes.
Found 960 images belonging to 4 classes.
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 00019: early stopping


<keras.callbacks.History at 0x7f086a4b5ba8>

In [6]:
#load test images into array for testing model predictions
all_images = []
for sub in sorted(os.listdir(test_dir)):
    for image_path in os.listdir(test_dir + '/' + sub):
        image = load_img(test_dir + '/' + sub + '/' + image_path, target_size=(img_width, img_height))
        image = img_to_array(image)
        all_images.append(image)
all_test = np.array(all_images)

#must divide image array by 255 due to 'rescale=1./255' in ImageDataGenerator; rescales all image values from
#[0-255] range to [0-1.0] range
all_test = all_test / 255

In [11]:
#generate true labels for test data
test_label_datagen = ImageDataGenerator(rescale=1./255)
    
test_label_generator = test_label_datagen.flow_from_directory(  
    test_dir,  
    target_size=(img_width, img_height),  
    batch_size=batch_size,  
    class_mode=None,  
    shuffle=False)  

test_labels = test_label_generator.classes

#use trained model to predict class of each image in the test set
all_pred = nn.predict(all_test)
all_pred_class = nn.predict_classes(all_test)

print('\n')
print(confusion_matrix(test_labels, all_pred_class))
print(classification_report(test_labels, all_pred_class))

Found 400 images belonging to 4 classes.

[[ 73   0   0  27]
 [  0 100   0   0]
 [  0   1  98   1]
 [  3   2   2  93]]
             precision    recall  f1-score   support

          0       0.96      0.73      0.83       100
          1       0.97      1.00      0.99       100
          2       0.98      0.98      0.98       100
          3       0.77      0.93      0.84       100

avg / total       0.92      0.91      0.91       400

