<h1 style="color:000000">Leukemia Classification</h1>

<h3 style="color:000000">Installing dependicies and packages</h3>

In [None]:
!pip install numpy
!pip install pandas
!pip install matplotlib
!pip install tensorflow
!pip install scikit-image
!pip install tqdm
!pip install scikit-learn
!pip install seaborn
!pip install opencv-contrib-python

<h3 style="color:000000">Importing libraries</h3>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import os
import tqdm
import skimage.io
import glob
import random
from PIL import Image
import itertools

import cv2 as cv

import seaborn as sns

from tqdm import tqdm

from skimage.io import imread, imshow
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
from skimage.transform import resize
from sklearn.utils import shuffle
from sklearn.metrics import classification_report, confusion_matrix 
from sklearn.model_selection import train_test_split

import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import InputLayer, Conv2D, BatchNormalization, MaxPool2D, Dropout, Flatten, Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.applications.inception_v3 import preprocess_input, decode_predictions
from tensorflow.keras.preprocessing import image
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.xception import Xception, preprocess_input
from tensorflow.keras.optimizers import Adam

from keras.utils import np_utils
from keras import backend as K

<h3 style="color:000000">Utilities</h3>
<h3 style="color:blue">Confusion Matrix Plotting Function</h3>

In [2]:
def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=True):
    """
    given a sklearn confusion matrix (cm), make a nice plot

    Arguments
    ---------
    cm:           confusion matrix from sklearn.metrics.confusion_matrix

    target_names: given classification classes such as [0, 1, 2]
                  the class names, for example: ['high', 'medium', 'low']

    title:        the text to display at the top of the matrix

    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm
                  see http://matplotlib.org/examples/color/colormaps_reference.html
                  plt.get_cmap('jet') or plt.cm.Blues

    normalize:    If False, plot the raw numbers
                  If True, plot the proportions

    Usage
    -----
    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by
                                                              # sklearn.metrics.confusion_matrix
                          normalize    = True,                # show proportions
                          target_names = y_labels_vals,       # list of names of the classes
                          title        = best_estimator_name) # title of graph

    Citiation
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html

    """


    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

<h3 style="color:000000">Data preprocessing</h3>
<h3 style="color:blue">Reading Files</h3>

<h3 style="font-weight:bold;color:red;">!!! NOTE: For test dataset, labels are not defined. So, we are not going to be able to use test dataset for testing</h3>

In [3]:
#Traning Dataset
train_dataset_0_all = glob.glob('data/training_data/fold_0/all/*.bmp')
train_dataset_0_hem = glob.glob('data/training_data/fold_0/hem/*.bmp')
train_dataset_1_all = glob.glob('data/training_data/fold_1/all/*.bmp')
train_dataset_1_hem = glob.glob('data/training_data/fold_1/hem/*.bmp')
train_dataset_2_all = glob.glob('data/training_data/fold_2/all/*.bmp')
train_dataset_2_hem = glob.glob('data/training_data/fold_2/hem/*.bmp')

#Test Dataset
test_dataset  = glob.glob('data/testing_data/C-NMC_test_final_phase_data/*.bmp')

#Validation Dataset (Images)
valid_dataset = glob.glob('data/validation_data/C-NMC_test_prelim_phase_data/*.bmp')

#Validation Dataset (.CSV)
valid_data = pd.read_csv('data/validation_data/C-NMC_test_prelim_phase_data_labels.csv')

<h3 style="color:000000">Data preprocessing cont.</h3>
<h3 style="color:blue">Collecting Images</h3>

In [4]:
ALL = []
HEM = []

ALL.extend(train_dataset_0_all)
ALL.extend(train_dataset_1_all)
ALL.extend(train_dataset_2_all)

HEM.extend(train_dataset_0_hem)
HEM.extend(train_dataset_1_hem)
HEM.extend(train_dataset_2_hem)

ALL = np.array(ALL)
HEM = np.array(HEM)

<h3 style="color:000000">Data preprocessing</h3>
<h3 style="color:blue">Load the data</h3>

In [5]:
X_train = []
y_train = []

for i in tqdm(range(0, len(ALL))):
    img = imread(ALL[i])
    img = resize(img, (229,229))
    X_train.append(img)
    y_train.append(1)
    
for i in tqdm(range(0, len(HEM))):
    img = imread(HEM[i])
    img = resize(img, (229,229))
    X_train.append(img)
    y_train.append(0)
    
#X_train = np.array(X_train)
#y_train = np.array(y_train)

#X_train.shape, y_train.shape

100%|██████████| 7272/7272 [02:26<00:00, 49.55it/s]
100%|██████████| 3389/3389 [01:08<00:00, 49.17it/s]


In [6]:
valid_data.head(10)

Unnamed: 0,Patient_ID,new_names,labels
0,UID_57_29_1_all.bmp,1.bmp,1
1,UID_57_22_2_all.bmp,2.bmp,1
2,UID_57_31_3_all.bmp,3.bmp,1
3,UID_H49_35_1_hem.bmp,4.bmp,0
4,UID_58_6_13_all.bmp,5.bmp,1
5,UID_57_8_11_all.bmp,6.bmp,1
6,UID_H49_29_2_hem.bmp,7.bmp,0
7,UID_H30_6_2_hem.bmp,8.bmp,0
8,UID_58_2_1_all.bmp,9.bmp,1
9,UID_54_35_3_all.bmp,10.bmp,1


In [7]:
X_val = []
y_val = []

for img_name in tqdm(valid_data.new_names):
    img = imread('data/validation_data/C-NMC_test_prelim_phase_data/' + img_name)
    img = resize(img,(229,229))
    X_val.append(img)
      
X_val = np.array(X_val)
y_val = valid_data.labels.values
X_val.shape, y_val.shape

100%|██████████| 1867/1867 [00:39<00:00, 47.84it/s]


((1867, 229, 229, 3), (1867,))

In [8]:
X_train = np.array(X_train, dtype="float") / 255.0

<h3 style="color:000000">Data preprocessing</h3>
<h3 style="color:blue">ONE-HOT Encoding</h3>

In [9]:
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_train = np_utils.to_categorical(y_train, 2)

In [10]:
(trainX, testX, trainY, testY) = train_test_split(X_train, y_train,test_size=0.20, random_state=42)

<h3 style="color:000000">Data preprocessing</h3>
<h3 style="color:blue">Parameter Selecting</h3>

In [11]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 32
IMG_SIZE = 299
EPOCHS = 100
WEIGHTS = 'imagenet'
CLASS_NAMES = ["HEM", "ALL"]

<h3 style="color:000000">Data preprocessing</h3>
<h3 style="color:blue">Data Augmentation</h3>

In [12]:
aug  = ImageDataGenerator(rotation_range=20, 
                                    zoom_range=0.15,
                                    width_shift_range=0.2, 
                                    height_shift_range=0.2, 
                                    shear_range=0.15,
                                    horizontal_flip=True, 
                                    fill_mode="nearest")
#train_datagen.fit(X_train)

In [13]:
valid_datagen = ImageDataGenerator()

valid_datagen.fit(X_val)

<h3 style="color:000000">Convolutional Neural Network</h3>
<h3 style="color:blue">XCEPTION</h3>

<img src="./images/1*t6qfo9ucYza_lbLfg5-p_w.png">

<h3 style="color:000000">XCEPTION</h3>
<h3 style="color:blue">Model Implementation</h3>

In [14]:
models = tf.keras.models
layers = tf.keras.layers
#initializers = tf.contrib.keras.initializers
#regularizers = tf.contrib.keras.regularizers

In [15]:
def conv_block(x, filters, block_num, conv_num, strides=(1,1)):
    name = 'block{}_conv{}_'.format(block_num, conv_num)

    # conv-BN-relu
    x = layers.Conv2D(filters, (3,3), strides=(2,2), use_bias=False, name=name)(x)
    x = layers.BatchNormalization(name=name+'bn')(x)
    x = layers.Activation('relu', name=name+'act')(x)

    return x

In [16]:
def separable_conv_block(x, filters, block_num, conv_num, pre_activation=None):
    name = 'block{}_sepconv{}_'.format(block_num, conv_num)

    if pre_activation is True:
        x = layers.Activation('relu', name=name+'act')(x)

    # (relu)-sepconv-BN-(relu)
    x = layers.SeparableConv2D(filters, (3,3), padding='same', use_bias=False, name=name)(x)
    x = layers.BatchNormalization(name=name+'bn')(x)

    if pre_activation is False:
        x = layers.Activation('relu', name=name+'act')(x)


    return x

In [17]:
def middle_flow_block(x, filters, block_num):

    # middle flow

    residual = x

    x = separable_conv_block(x, filters, block_num=block_num, conv_num='1', pre_activation=True)
    x = separable_conv_block(x, filters, block_num=block_num, conv_num='2', pre_activation=True)
    x = separable_conv_block(x, filters, block_num=block_num, conv_num='3', pre_activation=True)

    return layers.add([x, residual])

In [18]:
def xception_block(x, filters, block_num, pre_activation=True):
    block = 'block{}_'.format(block_num)
    filter_conv1, filter_conv2 = filters

    # residual conv branch
    residual = layers.Conv2D(filter_conv2, (1, 1), strides=(2, 2),padding='same', use_bias=False)(x)
    residual = layers.BatchNormalization()(residual)

    # separable conv block
    x = separable_conv_block(x, filter_conv1, block_num=block_num, conv_num='1', pre_activation=pre_activation)
    x = separable_conv_block(x, filter_conv2, block_num=block_num, conv_num='2', pre_activation=True)

    # downsampling and merging
    x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same', name=block+'pool')(x)

    return layers.add([x, residual])

In [19]:
def Xception(input_shape=(299,299,3), classes=2):
    """Instantiates the Xception architecture.
    """

    img_input = layers.Input(shape=input_shape)

    #===========ENTRY FLOW==============
    #Block 1
    x = conv_block(img_input, 32, block_num='1', conv_num='1', strides=(2,2))
    x = conv_block(x, 64, block_num='1', conv_num='2')

    #Block 2
    x = xception_block(x, (128, 128), '2', pre_activation=False)

    #Block 3
    x = xception_block(x, (256, 256), '3')

    #Block 4
    x = xception_block(x, (728, 728), '4')


    #===========MIDDLE FLOW===============
    for i in range(8):
        block_num = str(5+i)
        x = middle_flow_block(x, 728, block_num)

    #========EXIT FLOW============
    #Block 13
    x = xception_block(x, (728, 1024), '13') # second conv is different

    # Block 14
    x = separable_conv_block(x, 1536, block_num='14', conv_num='1', pre_activation=False)
    x = separable_conv_block(x, 2048, block_num='14', conv_num='2', pre_activation=False)

    # logistic regression
    x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
    x = layers.Dense(classes, activation='softmax', name='predictions')(x)

    # Create model.
    model = models.Model(inputs=img_input, outputs=x, name='xception')
    return model

In [20]:
model = Xception()
model.summary()

Model: "xception"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 299, 299, 3) 0                                            
__________________________________________________________________________________________________
block1_conv1_ (Conv2D)          (None, 149, 149, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 149, 149, 32) 128         block1_conv1_[0][0]              
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 149, 149, 32) 0           block1_conv1_bn[0][0]            
___________________________________________________________________________________________

<h3 style="color:000000">XCEPTION</h3>
<h3 style="color:blue">Callbacks</h3>

In [21]:
callbacks = [
    tf.keras.callbacks.ModelCheckpoint("model.h5", verbose = 1, save_best_only=True),
    tf.keras.callbacks.EarlyStopping(verbose = 1, patience=10, restore_best_weights=True)
]

<h3 style="color:000000">XCEPTION</h3>
<h3 style="color:blue">Compiler</h3>

In [22]:
model.compile(optimizer = 'adam', 
              loss = 'categorical_crossentropy',
              metrics = ['accuracy'])

<h3 style="color:000000">XCEPTION</h3>
<h3 style="color:blue">Training</h3>

In [None]:
H = model.fit(
    aug.flow(trainX, trainY, batch_size=BATCH_SIZE),
    validation_data=(testX, testY), steps_per_epoch=len(trainX) // BATCH_SIZE,
    epochs=EPOCHS)

In [None]:
model.save('xception-model')

In [None]:
val_accuracy = np.mean(H.history['val_accuracy'])
print("\n%s: %.2f%%" % ('val_accuracy', val_accuracy*100))


accuracy = np.mean(H.history['accuracy'])
print("\n%s: %.2f%%" % ('accuracy', accuracy*100))

In [None]:
history_df = pd.DataFrame(H.history)

plt.figure(figsize=(14, 4))
plt.subplot(1,2,1)
sns.scatterplot(data=history_df[['accuracy','val_accuracy']], palette=['#E2485A', '#679B8B']);
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1,2,2)
sns.scatterplot(data=history_df[['loss','val_loss']], palette=['#E2485A', '#679B8B'])
plt.title('Training and Validation Loss')

plt.show()

In [None]:
model = tf.keras.models.load_model('xception-model')

In [None]:
y_test_arg=np.argmax(testY,axis=1)
Y_pred = np.argmax(model.predict(testX),axis=1)
cm = confusion_matrix(y_test_arg, Y_pred)
plot_confusion_matrix(cm=cm, normalize=True,target_names=CLASS_NAMES)

In [None]:
print(classification_report(y_test_arg, Y_pred, target_names=CLASS_NAMES))

In [None]:
#new_model = tf.keras.models.load_model('vgg19-model/model.h5',custom_objects={'KerasLayer':hub.KerasLayer})
#new_model.summary()

In [None]:
test_img_path = random.choice(valid_dataset)

test_img = cv.imread(test_img_path)

img = cv.imread(test_img_path)
img = cv.resize(img, (128,128))
img = img.astype("float") / 255.0
img = img_to_array(img)
img = np.expand_dims(img, axis=0)

preds = model.predict(img)[0]

j = np.argmax(preds)
label = CLASS_NAMES[j]

bmp_f = test_img_path.split(os.sep)[-1]

label = "{}: {:.2f}%".format(label, preds[j])


plt.title(label)
plt.imshow(test_img)

print(valid_data[valid_data["new_names"] == bmp_f].Patient_ID.values[0])