In [None]:
import numpy as np
import tensorflow as tf
from tqdm import tqdm

Data loading and augmentation

In [None]:
TRAIN_DIR = '/content/birds_species/train'
TEST_DIR = '/content/birds_species/test'

BATCH_SIZE = 32
IMG_SIZE = (224, 224)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

class DataGenerator(object):
    def __init__(
        self, size=(224, 224),
        color_mode='rgb',
        batch_size=32,
        class_mode='binary',
        validation_split=0.15
    ):
        self.size = size
        self.color_mode = color_mode
        self.batch_size = batch_size
        self.class_mode = class_mode
        self.train_datagen = ImageDataGenerator(
            shear_range=0.2,
            zoom_range=0.2,
            rotation_range=20,
            height_shift_range=0.1,
            width_shift_range=0.1,
            brightness_range=[0.5, 1.5],
            horizontal_flip=True,
            vertical_flip=True,
            validation_split=validation_split)
        self.test_datagen = ImageDataGenerator()
        
    def load_train_data(self, dir):
        train_generator = self.train_datagen.flow_from_directory(
            dir,
            target_size=self.size,
            color_mode=self.color_mode,
            batch_size=self.batch_size,
            class_mode=self.class_mode,
            subset= 'training')  
        
        validation_generator = self.train_datagen.flow_from_directory(
            dir,
            target_size=self.size,
            color_mode=self.color_mode,
            batch_size=self.batch_size,
            class_mode=self.class_mode,
            subset= 'validation')
        
        return train_generator, validation_generator

    def load_test_data(self, dir):
        test_generator = self.test_datagen.flow_from_directory(
            dir,
            target_size=self.size,
            color_mode=self.color_mode,
            batch_size=self.batch_size,
            class_mode=self.class_mode,
            shuffle=False)
        
        return test_generator


datagen = DataGenerator(size=IMG_SIZE, batch_size=BATCH_SIZE)
train_generator, validation_generator = datagen.load_train_data(TRAIN_DIR)
test_generator = datagen.load_test_data(TEST_DIR)

Model definition

In [1]:
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout

LAST_LAYER = 'block5_conv3'

class VGGNet(object):
    def __init__(self):
        pass
    
    def __new__(cls, input_shape):
        inputs = Input(shape=input_shape)
        processed_input = preprocess_input(inputs)

        base_model = VGG16(
            input_tensor=processed_input,
            include_top=False,
            weights='imagenet')
        # Fine-tune last conv block
        fine_tune_at = 17
        for layer in base_model.layers[:fine_tune_at]:
            layer.trainable =  False
        
        gap = GlobalAveragePooling2D()
        dropout = Dropout(0.2)
        prediction_layer = Dense(1, activation='sigmoid')

        x = base_model.output
        x = gap(x)
        x = dropout(x)
        outputs = prediction_layer(x)

        return Model(inputs, outputs)

In [2]:
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout

LAST_LAYER = 'conv5_block3_3_conv'

class ResNet(object):
    def __init__(self):
        pass
    
    def __new__(cls, input_shape):
        inputs = Input(shape=input_shape)
        processed_input = preprocess_input(inputs)

        base_model = ResNet50(
            input_tensor=processed_input,
            include_top=False,
            weights='imagenet')
        # Fine-tune conv5_x onwards
        fine_tune_at = 145
        for layer in base_model.layers[:fine_tune_at]:
            layer.trainable =  False
        
        gap = GlobalAveragePooling2D()
        dropout = Dropout(0.2)
        prediction_layer = Dense(1, activation='sigmoid')

        x = base_model.output
        x = gap(x)
        x = dropout(x)
        outputs = prediction_layer(x)

        return Model(inputs, outputs)

In [5]:
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.utils.class_weight import compute_class_weight

class MyNet(object):
    def __init__(self, model='VGG', input_shape=(224,224,3)):
        if model == 'VGG':
            self.model = VGGNet(input_shape)
        elif model == 'ResNet':
            self.model = ResNet(input_shape)
        else:
            print('Model not found. Available models = [VGG, ResNet]')

    def train(self, train_generator, validation_generator, optimizer, learning_rate, epochs):
        self._compile(optimizer, learning_rate)
        callbacks = self._callbacks()
        class_weight = self._class_weight(train_generator)
        H = self.model.fit(
            train_generator,
            epochs=epochs,
            steps_per_epoch=len(train_generator),
            validation_data=validation_generator,
            validation_steps=len(validation_generator),
            callbacks=callbacks,
            class_weight=class_weight)
        
        return H

    def decode_predictions(self, predictions):
        return (predictions > 0.5).flatten().astype(np.int8)

    def _compile(self, optimizer='sgd', learning_rate=0.0005):
        if optimizer == 'sgd':
            optimizer = SGD(learning_rate=learning_rate, momentum=0.9)
        else:
            optimizer = Adam(learning_rate=learning_rate)
        self.model.compile(
            optimizer=optimizer,
            loss='binary_crossentropy',
            metrics=['accuracy'])
        
    def _callbacks(self):
        rl = ReduceLROnPlateau(
            monitor='val_loss', factor=0.1, patience=3, verbose=1)
        es = EarlyStopping(
            monitor='val_loss', patience=5, verbose=1, restore_best_weights=True)

        return [rl, es]

    def _class_weight(self, generator):
        classes = np.unique(generator.classes)
        weights = compute_class_weight(
            class_weight='balanced',
            classes=classes,
            y=generator.classes)
        
        return {k : v for k, v in zip(classes, weights)}

Model training

In [None]:
net = MyNet('ResNet', train_generator.image_shape)
net.model.summary()

In [None]:
EPOCHS = 25

# VGG
# LR = 0.0005
# OPTIMIZER = 'sgd'

# RESNET
LR = 0.001
OPTIMIZER = 'Adam'

H = net.train(train_generator, validation_generator, OPTIMIZER, LR, EPOCHS)

In [None]:
import matplotlib.pyplot as plt

def train_plot(H, figsize=(10, 5)):
    acc = H.history['accuracy']
    val_acc = H.history['val_accuracy']
    loss = H.history['loss']
    val_loss = H.history['val_loss']

    N = list(range(1, len(acc) + 1))
    plt.figure(figsize=figsize)

    plt.subplot(1, 2, 1)
    plt.plot(N, acc, label='Training')
    plt.plot(N, val_acc, label='Validation')
    plt.legend(loc='lower right')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.ylim([0,1.0])
    plt.title('Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(N, loss, label='Training')
    plt.plot(N, val_loss, label='Validation')
    plt.legend(loc='upper right')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.ylim([0,1.0])
    plt.title('Training and Validation Loss')
    
    plt.show()

train_plot(H)

Quantitative Evaluation

In [6]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

def plot_confusion_matrix(y_true, y_pred, classes=None, normalize=False, k=3):
    """
    Display confusion matrix as a heatmap.
    """
    cm = confusion_matrix(y_true, y_pred)
    r, c = cm.shape
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    df = pd.DataFrame(cm)
    if classes:
        df.columns = classes
        df.index = classes
    fig, ax = plt.subplots(figsize=(k*r, k*c))
    ax = sns.heatmap(df, annot=True, cmap='YlGnBu')
    plt.show()

In [None]:
from time import time

y_true = test_generator.classes
classes = test_generator.class_indices.keys()
label2class = {v:k for k, v in test_generator.class_indices.items()}

s = time()
predictions = net.model.predict(test_generator)
print('Inference time: {}'.format(time() - s))

y_pred = net.decode_predictions(predictions)

score = accuracy_score(y_true, y_pred)
print('Test Accuracy = ', score)

plot_confusion_matrix(y_true, y_pred, classes, normalize=True)

Qualitative Evaluation

In [None]:
from tensorflow.keras import Model
from skimage.transform import resize
import matplotlib.pyplot as plt

def extract_images(generator):
    test_images = []
    for i, (x, _) in enumerate(generator):
        test_images.extend(x)
        if i >= len(generator) - 1:
            break
    
    return np.array(test_images, dtype=np.uint8)

def gallery(net, collection, layer_name, indices=None, labels2class=None, nrows=1, ncols=4, k=8):
    """
    Display a random nrows x ncols gallery from collection.
    """
    if indices is None:
        indices = len(collection)
        
    rng = np.random.default_rng()
    fig, axs = plt.subplots(nrows, ncols, figsize=(k*nrows, k*ncols), squeeze=False)
    for r in range(nrows):
        for c in range(ncols):
            ax = axs[r][c]
            i = rng.choice(indices, replace=False)
            img = collection[i]

            array = np.expand_dims(img, axis=0)
            preds = net.model.predict(array)
            label = net.decode_predictions(preds)[0]

            heatmap = make_gradcam_heatmap(array, net.model, layer_name)
            upsample = resize(heatmap, IMG_SIZE, preserve_range=True)

            ax.imshow(img)
            ax.imshow(upsample, alpha=0.4, cmap='jet')

            title = label
            if labels2class is not None:
                title = labels2class[title]
            ax.set_title(title)

    fig.tight_layout()
    plt.show()

def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    # First, we create a model that maps the input image to the activations
    # of the last conv layer as well as the output predictions
    grad_model = Model(
        [model.inputs],
        [model.get_layer(last_conv_layer_name).output, model.output])

    # Then, we compute the gradient of the top predicted class for our input image
    # with respect to the activations of the last conv layer
    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]

    # This is the gradient of the output neuron (top predicted or chosen)
    # with regard to the output feature map of the last conv layer
    grads = tape.gradient(class_channel, last_conv_layer_output)

    # This is a vector where each entry is the mean intensity of the gradient
    # over a specific feature map channel
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    # We multiply each channel in the feature map array
    # by "how important this channel is" with regard to the top predicted class
    # then sum all the channels to obtain the heatmap class activation
    last_conv_layer_output = last_conv_layer_output[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)

    # For visualization purpose, we will also normalize the heatmap between 0 & 1
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

In [None]:
test_images = extract_images(test_generator)

print('Correctly classified images and their predicted labels:')
correct_indices = np.argwhere(y_true == y_pred).flatten()
gallery(net, test_images, LAST_LAYER, correct_indices, label2class)

print('Incorrectly classified images and their predicted labels:')
wrong_indices = np.argwhere(y_true != y_pred).flatten()
gallery(net, test_images, LAST_LAYER, wrong_indices, label2class)