# Import packages

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from matplotlib import pyplot as plt
import random
from tensorflow.keras.callbacks import Callback,TensorBoard,EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.applications.densenet import DenseNet201 , preprocess_input as dense_pi
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adadelta,RMSprop,Adamax,SGD
from tensorflow.keras.models import Sequential,Model,save_model
from tensorflow.keras.layers import Dense, Flatten, GlobalAveragePooling2D,Input,BatchNormalization,Conv2D,Dropout
import cv2  # for image processing
from sklearn.model_selection import train_test_split
import scipy.io
from collections import defaultdict
import tqdm
import json
import os

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


# Reproducable results

In [3]:
seed = 1352
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

# Process Images and Create ImageGenerator

In [4]:
# Image processing functions
def random_crop(img, random_crop_size):
    """Take as input an image, and a scalar crop size and
      returns a random square crop, def random_crop(img, random_crop_size):

    """
    # Note: image_data_format is 'channel_last'
    assert img.shape[2] == 3
    height, width = img.shape[0], img.shape[1]
    dy, dx = random_crop_size
    x = np.random.randint(0, width - dx + 1)
    y = np.random.randint(0, height - dy + 1)
    return img[y:(y+dy), x:(x+dx), :]


def crop_generator(batches, crop_length =299):
    """Take as input a Keras ImageGen (Iterator) and generate random
    crops from the image batches generated by the original iterator. def crop_generator(batches, crop_length):

    """
    while True:
        batch_x, batch_y = next(batches)
        batch_crops = np.zeros((batch_x.shape[0], crop_length, crop_length, 3))
        for i in range(batch_x.shape[0]):
            batch_crops[i] = random_crop(batch_x[i], (crop_length, crop_length))
        yield (batch_crops, batch_y)

        

def create_gen(preprocessing_func):
    """Take as input a preprocessing function and returns a Keras ImageGen (Iterator) and generate random
    crops from the image batches generated by the original iterator.def create_gen(preprocessing_func):

    """
    data_generator = ImageDataGenerator(preprocessing_function=preprocessing_func,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,
        fill_mode='reflect')
    return data_generator


def get_crops():
    incep_resnet_gen = create_gen(dense_pi)
    resnet_train_batches = incep_resnet_gen.flow_from_directory(
            train_dir,
            target_size=(img_size1, img_size2),
            batch_size=BATCH_SIZE,
            class_mode='categorical')
    resnet_validation_batches = incep_resnet_gen.flow_from_directory(
            validation_dir,
            target_size=(img_size1, img_size2),
            batch_size = BATCH_SIZE,
            class_mode='categorical')
    tr_crops = crop_generator(resnet_train_batches,crop_length =crop_length)
    val_crops = crop_generator(resnet_validation_batches,crop_length =crop_length)
    test_batches = ImageDataGenerator(preprocessing_function=dense_pi).flow_from_directory(
            test_dir,
            target_size=(img_size1, img_size2),
            batch_size = BATCH_SIZE,
            class_mode='categorical')
    test_crops = crop_generator(test_batches,crop_length=crop_length)
    return tr_crops, val_crops, test_crops

In [None]:
class DataGen(ImageDataGenerator):
    
    def __init__():
        
    
    # Image processing functions
    def random_crop(self, img, random_crop_size):
        """Take as input an image, and a scalar crop size and
          returns a random square crop, def random_crop(img, random_crop_size):

        """
        # Note: image_data_format is 'channel_last'
        assert img.shape[2] == 3
        height, width = img.shape[0], img.shape[1]
        dy, dx = random_crop_size
        x = np.random.randint(0, width - dx + 1)
        y = np.random.randint(0, height - dy + 1)
        return img[y:(y+dy), x:(x+dx), :]


    def crop_generator(self, batches, crop_length =299):
        """Take as input a Keras ImageGen (Iterator) and generate random
        crops from the image batches generated by the original iterator. def crop_generator(batches, crop_length):

        """
        while True:
            batch_x, batch_y = next(batches)
            batch_crops = np.zeros((batch_x.shape[0], crop_length, crop_length, 3))
            for i in range(batch_x.shape[0]):
                batch_crops[i] = random_crop(batch_x[i], (crop_length, crop_length))
            yield (batch_crops, batch_y)



    def create_gen(self, preprocessing_func):
        """Take as input a preprocessing function and returns a Keras ImageGen (Iterator) and generate random
        crops from the image batches generated by the original iterator.def create_gen(preprocessing_func):

        """
        data_generator = ImageDataGenerator(preprocessing_function=preprocessing_func,
            rotation_range=40,
            width_shift_range=0.2,
            height_shift_range=0.2,
            horizontal_flip=True,
            vertical_flip=True,
            fill_mode='reflect')
        return data_generator


    def get_crops(self):
        incep_resnet_gen = create_gen(dense_pi)
        resnet_train_batches = incep_resnet_gen.flow_from_directory(
                train_dir,
                target_size=(img_size1, img_size2),
                batch_size=BATCH_SIZE,
                class_mode='categorical')
        resnet_validation_batches = incep_resnet_gen.flow_from_directory(
                validation_dir,
                target_size=(img_size1, img_size2),
                batch_size = BATCH_SIZE,
                class_mode='categorical')
        tr_crops = crop_generator(resnet_train_batches,crop_length =crop_length)
        val_crops = crop_generator(resnet_validation_batches,crop_length =crop_length)
        test_batches = ImageDataGenerator(preprocessing_function=dense_pi).flow_from_directory(
                test_dir,
                target_size=(img_size1, img_size2),
                batch_size = BATCH_SIZE,
                class_mode='categorical')
        test_crops = crop_generator(test_batches,crop_length=crop_length)
        return tr_crops, val_crops, test_crops

In [5]:
# paths settings
root_path = os.path.expanduser("~/repos/datasets/iciar/dataset/")
train_dir      = os.path.join(root_path,'train'     )
validation_dir = os.path.join(root_path,'validation')
test_dir       = os.path.join(root_path,'test'      )
output_base_dir    = os.path.expanduser("~/repos/genetic_programming/paper/iciar/outputs/")

# image settings 
img_size1 = 600
img_size2 = 800
crop_length = 600


# training params
BATCH_SIZE = 4

# get the crops generators
train_crops, val_crops,test_crops = get_crops()

Found 376 images belonging to 4 classes.
Found 12 images belonging to 4 classes.
Found 12 images belonging to 4 classes.


# Create the model

In [6]:
def create_model(model_name,weights="imagenet",pooling = 'avg',img_size = 224):
    """Take as input the following
def create_model(model_name,weights="imagenet",pooling = 'avg',img_size = 224):
and returns a model that has the pretrined model untrainable
    """
    num_classes = 4
    model = Sequential()
    model.add(model_name(include_top=False,pooling = 'avg', weights=weights,input_shape = (img_size,img_size,3)))
   # model.add(Flatten())
    model.add(Dense(num_classes, activation='softmax'))

    model.layers[0].trainable = False
    return model

def wrap_model(base_model , use_imagenet=True, img_size = 299):
    # load pre-trained model graph, don't add final layer
    model = base_model(include_top=False, input_shape=(img_size, img_size, 3),
                                          weights= "imagenet" )
    # add global pooling just like in InceptionV3
    new_output = tf.keras.layers.GlobalAveragePooling2D()(model.output)
    # add new dense layer for our labels
    new_output = tf.keras.layers.GaussianDropout(0.2)(new_output)
    new_output = tf.keras.layers.Dense(4, activation='softmax')(new_output)
    model = tf.keras.Model(model.inputs, new_output)
    return model

def zip_layers():
    '''
    Returns c which is a zip of layer names and its corresponding numbers
    layer name and layers num
    '''
    model_len = len(model.layers)
    layer_num = []
    layer_name = []
    for i,layer in enumerate(model.layers):
        layer_num.append(i)
        layer_name.append(layer)
    c = list(zip(layer_num,layer_name))
    return c,layer_num,layer_name



In [7]:
base_model = tf.keras.applications.DenseNet201
model = wrap_model(base_model = base_model, img_size=crop_length)

In [8]:
last_layer_num = 6
for layer in model.layers:
    layer.trainable = True
    if isinstance(layer, keras.layers.BatchNormalization):
        # we do aggressive exponential smoothing of batch norm
        # parameters to faster adjust to our new dataset
        layer.momentum = 0.9    
        # fix deep layers (fine-tuning only last 50)
for layer in model.layers[:-(last_layer_num)]:
    # fix all but batch norm layers, because we neeed to update moving averages for a new dataset!
    if not isinstance(layer, keras.layers.BatchNormalization):
        layer.trainable = False

# Callbacks

## LRS

In [9]:
class LossLearningRateScheduler(keras.callbacks.History):
    """
    A learning rate scheduler that relies on changes in loss function
    value to dictate whether learning rate is decayed or not.
    LossLearningRateScheduler has the following properties:
    base_lr: the starting learning rate
    lookback_epochs: the number of epochs in the past to compare with the loss function at the current epoch to determine if progress 
    is being made.
    decay_threshold / decay_multiple: if loss function has not improved by a factor of decay_threshold * lookback_epochs, then 
    decay_multiple will be applied to the learning rate.
    spike_epochs: list of the epoch numbers where you want to spike the learning rate.
    spike_multiple: the multiple applied to the current learning rate for a spike.
    """

    def __init__(self, base_lr, lookback_epochs, spike_epochs = None, spike_multiple = 10, decay_threshold = 0.002, decay_multiple = 0.5, loss_type = 'val_loss'):

        super(LossLearningRateScheduler, self).__init__()

        self.base_lr = base_lr
        self.lookback_epochs = lookback_epochs
        self.spike_epochs = spike_epochs
        self.spike_multiple = spike_multiple
        self.decay_threshold = decay_threshold
        self.decay_multiple = decay_multiple
        self.loss_type = loss_type


    def on_epoch_begin(self, epoch, logs=None):

        if len(self.epoch) > self.lookback_epochs:

            current_lr = K.get_value(self.model.optimizer.lr)

            target_loss = self.history[self.loss_type]

            loss_diff =  target_loss[-int(self.lookback_epochs)] - target_loss[-1]

            if loss_diff <= np.abs(target_loss[-1]) * (self.decay_threshold * self.lookback_epochs):

                print(' '.join(('Changing learning rate from', str(current_lr), 'to', str(current_lr * self.decay_multiple))))
                K.set_value(self.model.optimizer.lr, current_lr * self.decay_multiple)
                current_lr = current_lr * self.decay_multiple

            else:

                print(' '.join(('Learning rate:', str(current_lr))))

            if self.spike_epochs is not None and len(self.epoch) in self.spike_epochs:
                print(' '.join(('Spiking learning rate from', str(current_lr), 'to', str(current_lr * self.spike_multiple))))
                K.set_value(self.model.optimizer.lr, current_lr * self.spike_multiple)

        else:

            print(' '.join(('Setting learning rate to', str(self.base_lr))))
            K.set_value(self.model.optimizer.lr, self.base_lr)


        return K.get_value(self.model.optimizer.lr)

In [10]:
lr = 1e-2
lrs = LossLearningRateScheduler(lr, lookback_epochs = 3)

## Model Checkpoint

In [11]:

checkpoint_filepath = 'outputs/{}'.format(base_model.__name__)
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

## tqdm

In [12]:
import tensorflow_addons as tfa
tqdm_callback = tfa.callbacks.TQDMProgressBar()

 The versions of TensorFlow you are currently using is 2.5.0-rc2 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


## Early Stopping

In [13]:
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=10, verbose=0, mode='auto')

# Training

## Loss Function and Compile model

In [14]:
model.compile(
    loss='categorical_crossentropy',  # we train 102-way classification
    optimizer=Adamax(lr=lr),  # we can take big lr here because we fixed first layers
    metrics=['accuracy']  # report accuracy during training
) 



In [15]:
import time
ti = time.time()
hist=(model.fit(
            train_crops, 
            steps_per_epoch=376 // BATCH_SIZE,
            epochs=2000,
            validation_data=val_crops, 
            validation_steps=10,
            callbacks=[checkpoint,
                       lrs,
                       tqdm_callback,
                       es],
            verbose=0
        ))
tf = time.time()



Training:   0%|           0/2000 ETA: ?s,  ?epochs/s

Setting learning rate to 0.01
Epoch 1/2000


0/94           ETA: ?s - 

Setting learning rate to 0.01
Epoch 2/2000


0/94           ETA: ?s - 

Setting learning rate to 0.01
Epoch 3/2000


0/94           ETA: ?s - 

Setting learning rate to 0.01
Epoch 4/2000


0/94           ETA: ?s - 

Changing learning rate from 0.01 to 0.004999999888241291
Epoch 5/2000


0/94           ETA: ?s - 

Learning rate: 0.005
Epoch 6/2000


0/94           ETA: ?s - 

Learning rate: 0.005
Epoch 7/2000


0/94           ETA: ?s - 

Changing learning rate from 0.005 to 0.0024999999441206455
Epoch 8/2000


0/94           ETA: ?s - 

Changing learning rate from 0.0025 to 0.0012499999720603228
Epoch 9/2000


0/94           ETA: ?s - 

Learning rate: 0.00125
Epoch 10/2000


0/94           ETA: ?s - 

Learning rate: 0.00125
Epoch 11/2000


0/94           ETA: ?s - 

Learning rate: 0.00125
Epoch 12/2000


0/94           ETA: ?s - 

Changing learning rate from 0.00125 to 0.0006249999860301614
Epoch 13/2000


0/94           ETA: ?s - 

Learning rate: 0.000625
Epoch 14/2000


0/94           ETA: ?s - 

Learning rate: 0.000625
Epoch 15/2000


0/94           ETA: ?s - 

Changing learning rate from 0.000625 to 0.0003124999930150807
Epoch 16/2000


0/94           ETA: ?s - 

Learning rate: 0.0003125
Epoch 17/2000


0/94           ETA: ?s - 

Learning rate: 0.0003125
Epoch 18/2000


0/94           ETA: ?s - 

Changing learning rate from 0.0003125 to 0.00015624999650754035
Epoch 19/2000


0/94           ETA: ?s - 

Changing learning rate from 0.00015625 to 7.812499825377017e-05
Epoch 20/2000


0/94           ETA: ?s - 

Learning rate: 7.8125e-05
Epoch 21/2000


0/94           ETA: ?s - 

Changing learning rate from 7.8125e-05 to 3.9062499126885086e-05
Epoch 22/2000


0/94           ETA: ?s - 

Changing learning rate from 3.90625e-05 to 1.9531249563442543e-05
Epoch 23/2000


0/94           ETA: ?s - 

Changing learning rate from 1.953125e-05 to 9.765624781721272e-06
Epoch 24/2000


0/94           ETA: ?s - 

Changing learning rate from 9.765625e-06 to 4.882812390860636e-06
Epoch 25/2000


0/94           ETA: ?s - 

Learning rate: 4.8828124e-06
Epoch 26/2000


0/94           ETA: ?s - 

Changing learning rate from 4.8828124e-06 to 2.441406195430318e-06
Epoch 27/2000


0/94           ETA: ?s - 

Changing learning rate from 2.4414062e-06 to 1.220703097715159e-06
Epoch 28/2000


0/94           ETA: ?s - 

Learning rate: 1.2207031e-06
Epoch 29/2000


0/94           ETA: ?s - 

Changing learning rate from 1.2207031e-06 to 6.103515488575795e-07
Epoch 30/2000


0/94           ETA: ?s - 

Changing learning rate from 6.1035155e-07 to 3.0517577442878974e-07
Epoch 31/2000


0/94           ETA: ?s - 

Learning rate: 3.0517577e-07
Epoch 32/2000


0/94           ETA: ?s - 

Learning rate: 3.0517577e-07
Epoch 33/2000


0/94           ETA: ?s - 

Changing learning rate from 3.0517577e-07 to 1.5258788721439487e-07
Epoch 34/2000


0/94           ETA: ?s - 

Changing learning rate from 1.5258789e-07 to 7.629394360719743e-08
Epoch 35/2000


0/94           ETA: ?s - 

In [16]:
print(f'Training {base_model.__name__!r} executed in {(tf-ti):.4f}s with {len(hist.history["val_loss"])} epochs')

Training 'DenseNet201' executed in 1343.9386s with 35 epochs


# Evaluation

# load the checkpoint

In [15]:
model.load_weights(checkpoint_filepath)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f579269d3a0>

In [27]:
from sklearn.metrics import (roc_auc_score,accuracy_score,average_precision_score,log_loss,
                             precision_recall_fscore_support,average_precision_score,coverage_error,
                             label_ranking_average_precision_score,label_ranking_loss,
                             classification_report,confusion_matrix)

In [28]:
def print_metrics(gt,model1_labels_prob,model1_labels):
    incep_res_cm = confusion_matrix(gt,model1_labels.argmax(axis=1))
    plot_cm(incep_res_cm)
    gt_cat = to_categorical(gt)
    print('Classification Report')
    target_names = ['Benign', 'InSitu', 'Invasive', 'Normal']
    print(classification_report(gt_cat, model1_labels, target_names=target_names))
    print('roc_auc_score for predictions')
    print(roc_auc_score(to_categorical(gt), model1_labels_prob))
    print('roc_auc_score for labels')
    print(roc_auc_score(to_categorical(gt), model1_labels))
    print('accuracy_score for labels')
    print(accuracy_score(to_categorical(gt), model1_labels))
    print('average_precision_score for predictions')

    print(average_precision_score(to_categorical(gt), model1_labels_prob))
    print('average_precision_score for labels')
    print(average_precision_score(to_categorical(gt), model1_labels))
    print('log_loss for labels')
    print(log_loss(to_categorical(gt), model1_labels))
    print('precision_recall_fscore_support for labels')
    print(precision_recall_fscore_support(to_categorical(gt), model1_labels))
    print('average_precision_score for labels')
    print(average_precision_score(to_categorical(gt), model1_labels))
    print('average_precision_score for predictions')
    print(average_precision_score(to_categorical(gt), model1_labels_prob))
    print('coverage_error for labels')
    print(coverage_error(to_categorical(gt), model1_labels))
    print('coverage_error for predictions')
    print(coverage_error(to_categorical(gt), model1_labels_prob))
    print('label_ranking_average_precision_score for labels')
    print(label_ranking_average_precision_score(to_categorical(gt), model1_labels))
    print('label_ranking_average_precision_score for predictions')
    print(label_ranking_average_precision_score(to_categorical(gt), model1_labels_prob))
    print('label_ranking_loss for labels')
    print(label_ranking_loss(to_categorical(gt), model1_labels))

## Prepare the test batches and save them to a file

### save the test data

In [33]:
model_labels.argmax(axis=1)

array([0, 3, 0, 2, 2, 1, 3, 1, 1, 1, 0, 3, 2, 2, 2, 1, 3, 3, 3, 1, 1, 3,
       0, 1, 3, 1, 3, 1, 2, 1, 3, 0, 1, 3, 2, 2, 1, 3, 3, 1])

In [34]:
gt

['Benign',
 'InSitu',
 'Invasive',
 'Normal',
 'Benign',
 'InSitu',
 'Invasive',
 'Normal',
 'Benign',
 'InSitu',
 'Invasive',
 'Normal',
 'Benign',
 'InSitu',
 'Invasive',
 'Normal',
 'Benign',
 'InSitu',
 'Invasive',
 'Normal',
 'Benign',
 'InSitu',
 'Invasive',
 'Normal',
 'Benign',
 'InSitu',
 'Invasive',
 'Normal',
 'Benign',
 'InSitu',
 'Invasive',
 'Normal',
 'Benign',
 'InSitu',
 'Invasive',
 'Normal',
 'Benign',
 'InSitu',
 'Invasive',
 'Normal']

In [19]:
res = model.evaluate(test_crops, steps=120)
res



[0.3120424449443817, 0.856249988079071]