In [1]:
# mount the google drive on the colab
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# move the datasets for more convenient usage
!cp /content/drive/MyDrive/Test_workspace/datasets.zip /content/

In [3]:
# uncompressed the zip file
import zipfile
import os
files = zipfile.ZipFile('datasets.zip', 'r')
files.extractall(os.getcwd())

In [4]:
# build the class Config used for storing paths
import os

class Config():
  # the path for the new directory 
  BASE_PATH = "/content/datasets/idc"
  # the paths for the training,validation,and testing directories using the
  # bat
  TRAIN_PATH = os.path.sep.join([BASE_PATH, "training"])
  VAL_PATH = os.path.sep.join([BASE_PATH, "validation"])
  TEST_PATH = os.path.sep.join([BASE_PATH, "testing"])

In [5]:
"""
The model to be built is a CNN named as CancerNet. This network performs the operations below:
1.Use 3x3 filters
2.Stack these filters on the top of others
3.Perform max-pooling
4.Use depthwise separable convolution (more efficient than the common convolution, but takes up less memory)
"""

# import required libraries
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization, \
SeparableConv2D, MaxPooling2D, Activation, Flatten, Dropout, Dense
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.models import load_model

# declare a class with static methods to create the network
class CancerNet:
  @staticmethod
  def build(width, height, depth, class_num):
    # the variable used for building models
    model = Sequential()
    # the input size of the network
    shape = (height, width, depth)
    # the location of channels, after the convolution, the batch
    # normalization will happen on channels. The default location
    # of channel is at the last element of the input size 
    # (channel_last), e.g., (batch, height, width, channel)
    channel_dim = -1
    
    # if the Conv2D layer's data format is channel_first, switch
    # the channel_dim's location (1)
    if K.image_data_format() == "channels_first":
      shape = (depth, height, width)
      channel_dim = 1

    # Convolutional layers. It is combined with the higher stacking
    # of the architecture: depthwise conv -> relu -> pool layers
    # and a great number of filters. 
    model.add(SeparableConv2D(32, (3, 3), padding="same", input_shape=shape))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=channel_dim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(SeparableConv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=channel_dim))
    model.add(SeparableConv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=channel_dim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(SeparableConv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=channel_dim))
    model.add(SeparableConv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=channel_dim))
    model.add(SeparableConv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=channel_dim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # convert the multi-dimensional data to the one-dimensional. 
    # it is often used for the transiton between Conv layers
    # and Dense layers(Fully-connected layers)
    model.add(Flatten())
    model.add(Dense(256))
    model.add(Activation("relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))

    # Prediction probabilities only has two types: 
    # the probability for category 0 or 1
    model.add(Dense(class_num))
    model.add(Activation("softmax"))

    return model

In [6]:
"""
This part prepare the data to train and evaluate the model
"""
# the figure will not be shown after program running
import matplotlib
#matplotlib.use("Agg")

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.optimizers import Adagrad
from tensorflow.python.keras.utils import np_utils
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
# lacking of the program file config's path
from imutils import paths 
import matplotlib.pyplot as plt
import numpy as np
import os

EPOCHS_NUM = 40  # the number of training epochs
INIT_LR = 1e-2  # the initialized learning rate
BATCH_SIZE = 32  # batch size

# the sys paths of all training samples 
train_paths = list(paths.list_images(Config.TRAIN_PATH))
# the number of samples in the training set
train_len = len(train_paths)
# the number of samples in the validation set
valid_len = len(list(paths.list_images(Config.VAL_PATH)))
# the number of samples in the testing dataset
test_len = len(list(paths.list_images(Config.TEST_PATH)))

# get the labels for all training samples (from folders named 0 or 1),
# put them all in a list 
train_labels = [int(path.split(os.path.sep)[-2]) for path in train_paths]
# convert the train labels' single digit to the one hot encoding
train_labels = np_utils.to_categorical(train_labels)
# add train_labels by rows to count the number of samples for each category
class_totals = train_labels.sum(axis=0)
# calculate the class_weights for the imbalanced classification
# e.g. the class_totals is [15, 13], the class_weight is
# [15/15, 15/13], this weights show that the category with less samples
# generally will gain more weights. Another calculation way [1/15, 1/13]
# reference link: https://keras.io/examples/structured_data/imbalanced_classification/
# how to resolve imbalanced issues: https://blog.csdn.net/weixin_39668479/article/details/112395317
class_weight = class_totals.max()/class_totals

# perform the different data augmentation operations to the training set
train_aug = ImageDataGenerator(
    rescale = 1/255.0,
    rotation_range = 20,
    zoom_range = 0.05,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    shear_range = 0.05,   # shear exchange, keep x or y coordinate unchanged and change the coordinate on another axis
    horizontal_flip = True,
    vertical_flip = True,
    fill_mode = "nearest")

# perform the data augmentation operation to the validation set
val_aug = ImageDataGenerator(rescale = 1/255.0)

# perform the data augmentation operation to the test set
test_aug = ImageDataGenerator(rescale = 1/255.0)

# generate batches of tensor image data with real-time data augmentation
train_gen = train_aug.flow_from_directory(
    Config.TRAIN_PATH,
    class_mode="categorical",
    target_size=(48,48),  # the original size of each image is 50 x 50
    color_mode="rgb",
    shuffle=True,
    batch_size=BATCH_SIZE)

# generate batches of tensor image data with real-time data augmentation
val_gen = train_aug.flow_from_directory(
    Config.VAL_PATH,
    class_mode="categorical",
    target_size=(48,48),  # the original size of each image is 50 x 50
    color_mode="rgb",
    shuffle=False,
    batch_size=BATCH_SIZE)

# generate batches of tensor image data with real-time data augmentation
test_gen = test_aug.flow_from_directory(
    Config.TEST_PATH,
    class_mode="categorical",
    target_size=(48,48),  # the original size of each image is 50 x 50
    color_mode="rgb",
    shuffle=False,
    batch_size=BATCH_SIZE)

Found 199818 images belonging to 2 classes.
Found 22201 images belonging to 2 classes.
Found 55505 images belonging to 2 classes.


In [None]:
"""
Model training with the adaptive gradients optimizer
"""
# Build the network with some parameters as parameters like
# the input size of network and number of categories
model = CancerNet.build(width=48, height=48, depth=3, class_num=2)
# Build the optimizer of adaptive gradients with the initial learning rate
# and the decay of the learning rate
opt = Adagrad(learning_rate=INIT_LR, decay=INIT_LR/EPOCHS_NUM)
# Compile the whole model
model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])

# reference link:https://keras.io/examples/structured_data/imbalanced_classification/
class_weight={0: class_weight[0], 1: class_weight[1]}
# Training the model with the function fit_generator
training_records = model.fit(
          train_gen,
          steps_per_epoch = train_len//BATCH_SIZE,
          validation_data = val_gen,
          validation_steps = valid_len//BATCH_SIZE,
          class_weight = class_weight,
          epochs = EPOCHS_NUM)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [None]:
"""
Model training with the Adam optimizer
"""
# Build the network with some parameters as parameters like
# the input size of network and number of categories
model = CancerNet.build(width=48, height=48, depth=3, class_num=2)
# Build the Adam optimizer
opt = Adam(learning_rate=INIT_LR)
# Compile the whole model
model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])
# Create the callback of the dynamically-changed learning rate
# factor: how much the learning rate decreased at each epoch
# min_lr: the bottom limit of LR
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=1, min_lr=0.0001)
# Set the checkpoint to save the best model
checkpoint = ModelCheckpoint('model-{epoch:03d}.h5', monitor='val_loss', verbose=0,
               save_best_only=True, mode='auto')

# reference link:https://keras.io/examples/structured_data/imbalanced_classification/
class_weight={0: class_weight[0], 1: class_weight[1]}
# Training the model with the function fit_generator
training_records = model.fit(
    train_gen,
    steps_per_epoch = train_len//BATCH_SIZE,
    validation_data = val_gen,
    validation_steps = valid_len//BATCH_SIZE,
    class_weight = class_weight,
    epochs = EPOCHS_NUM,
    callbacks = [reduce_lr, checkpoint])

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [None]:
"""
Model evaluation
part1: drawing the accuracy change curve during the training process
The convergence of the model trained with Adagrad otimizer is stable, but has the slow convergence speed.
The convergence of the model trained with Adam otimizer is not stable, but has the high convergence speed.
"""
N = EPOCHS_NUM
M = training_records
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), M.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), M.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, N), M.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, N), M.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy on the IDC Dataset")
plt.xlabel("Epoch No.")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.show()
plt.savefig('plot.png')

In [9]:
"""
Model evaluation
part2: take the model with the highest validation accuracy to make predictions: a model saved at 4th epoch with the training accuracy
accuracy 0.8468 and validation accuracy 0.8682.  
"""
model = load_model('model-004.h5')
print("Now evaluating the model")
test_gen.reset() # the required operation before the evaluation
# make predictions to the iterator test_gen
pred_indices = model.predict_generator(test_gen, steps=(test_len//BATCH_SIZE)+1)
# prediction results are one-hot encoding, return the index of the element
# with the largest number per row
pred_indices = np.argmax(pred_indices, axis=1)

# sklearn classification_report
# usage: sklearn.metrics.classification_report(y_true, y_pred, *, 
# labels=None, target_names=None, sample_weight=None, digits=2, 
# output_dict=False, zero_division='warn')
print(classification_report(test_gen.classes, pred_indices, 
              target_names=test_gen.class_indices.keys()))
# calculate the confusion matrix
cm_matrix = confusion_matrix(test_gen.classes, pred_indices)
# calculate the number of total training samples
total = sum(sum(cm_matrix))
# calculate the accuracy, specificity, sentivitiy, and f1 score
accuracy = (cm_matrix[0, 0] + cm_matrix[1, 1])/total
specificity = cm_matrix[1, 1]/(cm_matrix[1, 0] + cm_matrix[1, 1])
sensitivity = cm_matrix[0, 0]/(cm_matrix[0, 0] + cm_matrix[0, 1]) # recall
recall = sensitivity
precision = cm_matrix[0, 0]/(cm_matrix[0, 0] + cm_matrix[1, 0])
f1_score = (2 * precision * recall)/(precision + recall)
# print all results of evaluation indices
print(cm_matrix)
print(f'Accuracy: {accuracy}')
print(f'Specificity: {specificity}')
print(f'Sensitivity: {sensitivity}')
print(f'F1 score: {f1_score}')

Now evaluating the model


  # Remove the CWD from sys.path while we load stuff.


              precision    recall  f1-score   support

           0       0.88      0.93      0.91     39736
           1       0.80      0.68      0.73     15769

    accuracy                           0.86     55505
   macro avg       0.84      0.81      0.82     55505
weighted avg       0.86      0.86      0.86     55505

[[37012  2724]
 [ 5034 10735]]
Accuracy: 0.8602288082154761
Specificity: 0.6807660599911218
Sensitivity: 0.931447553855446
F1 score: 0.9051380499376391
