# Transfer Learning

This notebook shows how to apply transfer learning on image datasets

### Import all the needed modules

In [None]:
import os
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import (
    roc_auc_score, 
    accuracy_score, 
    confusion_matrix, 
    classification_report, 
    precision_recall_curve,
)

sns.set()

### Define paths for the data location and the place to save the model

Source of the data is https://www.kaggle.com/datasets/shaunthesheep/microsoft-catsvsdogs-dataset

The smaller data is also provided with the code in the `data` directory.
The provided data has small amount of training data to demonstrate the impact of transfer learning.

In [None]:
TRAIN_DIR = '../data/catdog/train'
TEST_DIR = '../data/catdog/test'
CKPT_DIR ='transfer/checkpoints/vgg_16_ckpts_{epoch:03d}.ckpt'
BEST_DIR ='transfer/checkpoints/vgg_16_best.ckpt'
BATCH_SIZE = 32
IMAGE_SIZE = (224, 224)

### Load the images and add augmentations

The training/validation set has multiple random data transformations while the training set is just scaled for the sake of normalization

In [None]:
train_idg = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=360,
    zoom_range=0.2,
    fill_mode='reflect',
    horizontal_flip=True,
    vertical_flip=True,
    validation_split=0.2
)
test_idg = tf.keras.preprocessing.image.ImageDataGenerator()

class_mode = 'binary'
classes = {
    'cat': 0,
    'dog': 1,
}

train_gen = train_idg.flow_from_directory(TRAIN_DIR, IMAGE_SIZE,
                                          seed=0,
                                          batch_size=BATCH_SIZE,
                                          subset='training',
                                          class_mode=class_mode,
                                          classes=classes)

valid_gen = train_idg.flow_from_directory(TRAIN_DIR, IMAGE_SIZE,
                                          seed=0,
                                          batch_size=BATCH_SIZE,
                                          subset='validation',
                                          class_mode=class_mode,    
                                          classes=classes)

test_gen = test_idg.flow_from_directory(TEST_DIR, IMAGE_SIZE,
                                        class_mode=class_mode,
                                        classes=classes,
                                        shuffle=False,
                                        batch_size=1)

In [None]:
def view_generator_data(gen, n):
    data = [next(gen) for _ in range(n)]
    data = list(zip(*data))
    X, y = data
    X = np.concatenate(X, axis=0)
    y = np.concatenate(y, axis=0)
    return X, y

def view_images(X, y, n=None):
    n = n or len(X)
    X = X.astype('uint8')
    for i in range(n):
        plt.imshow(X[i])
        plt.grid(None)
        plt.show()
        print('Label: ', y[i])

In [None]:
train_X, train_y = view_generator_data(train_gen, 1)
train_gen.reset()

In [None]:
view_images(train_X, train_y)

### Create a base model and add additional layers

Load the keras VGG model without the last/top layers and add the needed layers to solve the classification problem. Also some of the initial layers of the VGG model are set to be not trainable since only fine tuning is done to the model

In [None]:
img_input = tf.keras.layers.Input((*IMAGE_SIZE, 3))
preprocessed_input = tf.keras.applications.vgg16.preprocess_input(img_input)
base_model = tf.keras.applications.VGG16(include_top=False,
                   weights='imagenet',
                   input_tensor=preprocessed_input)
x = base_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(1024, activation='relu')(x)
predictions = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs=img_input, outputs=predictions)

for layer in base_model.layers[:-8]:
    layer.trainable = False
    
model.compile(tf.keras.optimizers.SGD(learning_rate=0.001), 'binary_crossentropy')

### Train the Keras model

Different callbacks are also added save the model and regularize training

In [None]:
es = tf.keras.callbacks.EarlyStopping(patience=30)
os.makedirs(os.path.dirname(CKPT_DIR), exist_ok=True)
os.makedirs(os.path.dirname(BEST_DIR), exist_ok=True)
mc = tf.keras.callbacks.ModelCheckpoint(CKPT_DIR)
bm = tf.keras.callbacks.ModelCheckpoint(BEST_DIR, save_best_only=True)
hst = model.fit(
    train_gen,
    steps_per_epoch=1,
    epochs=50,
    callbacks=[es, mc, bm],
    validation_data=valid_gen,
    validation_steps=1,
    max_queue_size=1
)

In [None]:
!ls transfer/checkpoints/

### Load the training data from the python generators

Since the keras utilities create generators, they should be called to create the data

In [None]:
test_X, test_y = view_generator_data(test_gen, test_gen.n)

In [None]:
view_images(test_X, test_y)

### Predict the result using the best model

The threshold of predictions will be based on the intersection of the precision and recall curve of the training data

In [None]:
best_model = tf.keras.models.load_model(BEST_DIR)

In [None]:
train_X, train_y = view_generator_data(train_gen, train_gen.n)
train_y_pred = best_model.predict(train_X)

In [None]:
pr, r, thr = precision_recall_curve(train_y, train_y_pred)

In [None]:
plt.plot(thr, pr[:-1])
plt.plot(thr, r[:-1])
plt.show()

In [None]:
pr_r_idx = np.argmin(np.abs(pr[:-1] - r[:-1]))
threshold = thr[pr_r_idx]
threshold

In [None]:
test_y_pred = best_model.predict(test_X)

In [None]:
roc_auc_score(test_y, test_y_pred)

In [None]:
accuracy_score(test_y > threshold, test_y_pred > threshold)

In [None]:
print(classification_report(test_y > threshold, test_y_pred > threshold))

In [None]:
sns.heatmap(confusion_matrix(test_y > threshold, test_y_pred > threshold), annot=True)
plt.ylabel('True')
plt.xlabel('Pred')
plt.show()