## Transfer Learning on Cats-Dogs Classification - Feature Extraction

#### Features are extracted from a MobileNet-V2 model pre-trained on ImageNet data, then passed through a new classification head to classify cats vs. dogs.
Adapted from https://www.tensorflow.org/tutorials/images/transfer_learning

### HPC and Data Science Summer Institute
Mai H. Nguyen, UC San Diego

-----

In [None]:
import tensorflow as tf
from tensorflow.compat.v1.keras import backend as K
from tensorflow.keras import applications
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from tensorflow.keras import optimizers
from tensorflow.keras import losses
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array, load_img

from sklearn.metrics import classification_report 
import matplotlib.pyplot as plt
import numpy as np
import random
import os
import time
import datetime

In [None]:
print("Tensorflow version:",tf.__version__)
!python --version

In [None]:
print(tf.config.list_physical_devices('GPU'))

In [None]:
!nvidia-smi

In [None]:
# Set logging level
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [None]:
# Set random generator seed
seed = 1234

# Set Python seed, NumPy seed, and TensorFlow seed
tf.keras.utils.set_random_seed(seed)

# Set numpy random generator
# np.random.seed(seed)

# Set python built-in random generator
# random.seed(seed)

# Set tf global random seed
# tf.random.set_seed(seed)

# Disable hash randomization by specifying the value 0.
# os.environ['PYTHONHASHSEED'] = '0'

# Potential randomness from CUDNN
os.environ['TF_DETERMINISTIC_OPS'] = '1'
os.environ['TF_CUDNN_DETERMINISTIC']= '1'

### Set image location and dimensions

In [None]:
import os 

from os.path import expanduser
HOME = expanduser("~")
data_path = HOME + '/data/catsVsDogs'
print (data_path)

# Set data_path:  Data is in home directory, under 'data/catsVsDogs'
# ==> YOUR CODE HERE
print (data_path)

# Location of images
train_data_dir = data_path + '/train'
val_data_dir   = data_path + '/val'
test_data_dir  = data_path + '/test'

print ('Train path:' + train_data_dir)
print ('Validation path:' + val_data_dir)
print ('Test path:' + test_data_dir)

In [None]:
# Image dimensions:  224 x 224
# ==> YOUR CODE HERE
IMG_SIZE = (img_width,img_height)
IMG_SHAPE = IMG_SIZE + (3,)

### Prepare data

In [None]:
# Set batch size to 16
# ==> YOUR CODE HERE

# Data augmentation setup
#rescale = tf.keras.layers.experimental.preprocessing.Rescaling(1./127.5, offset=-1)
rescale = tf.keras.applications.mobilenet_v2.preprocess_input
train_datagen      = ImageDataGenerator(shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True, preprocessing_function = rescale)
validation_datagen = ImageDataGenerator(preprocessing_function = rescale)
test_datagen       = ImageDataGenerator(preprocessing_function = rescale)

# Set up generator to read images found in subfolders of training data directory,
# and indefinitely generate batches of image data (scaled).  This is for training data.
train_generator = train_datagen.flow_from_directory(train_data_dir,target_size=IMG_SIZE,
                                              batch_size = BATCH_SIZE, class_mode='binary', 
                                              shuffle = True, seed = seed)           

# Set up generator to generate batched of validation data for model
validation_generator = validation_datagen.flow_from_directory(val_data_dir,target_size=IMG_SIZE,
                                                   batch_size = BATCH_SIZE,class_mode='binary',
                                                   shuffle = False, seed = seed)
# Set up generator to generate batched of test data for model
# ==> YOUR CODE HERE

### Load pre-trained model

In [None]:
#Load pre-trained model's Imagenet weights not including the fully connected layers
base_model = applications.MobileNetV2(include_top = False, weights = 'imagenet', input_shape=IMG_SHAPE)

# Freeze all weights of pre-trained model
base_model.trainable = False

# Needed to keep BatchNormalization layers in inference mode for pre-trained model. 
# See https://www.tensorflow.org/api_docs/python/tf/keras/layers/BatchNormalization 
base_model.training  = False

print ('Base model loaded')

# Uncomment to ook at pre-trained model's architecture
# base_model.summary()

### Create top model

In [None]:
img_inputs = tf.keras.Input(shape=(img_width, img_height, 3))
base_outputs = base_model(img_inputs)

GlobalAveragePooler2D = GlobalAveragePooling2D()
GlobalAveragePooler2D_outputs = GlobalAveragePooler2D(base_outputs)

Dropper = Dropout(0.2)
DroppedOut_outputs = Dropper(GlobalAveragePooler2D_outputs)

DenseLayer = Dense(1, activation='sigmoid')
outputs = DenseLayer(DroppedOut_outputs)

model = tf.keras.Model(inputs=img_inputs, outputs=outputs)

# Get model summary
# ==> YOUR CODE HERE

### Train top model

In [None]:
model.compile(optimizer=optimizers.Adam(learning_rate=0.0001),
              loss= losses.BinaryCrossentropy(),metrics=['accuracy'])

In [None]:
%%time 

# Train top model for 5 epochs
# ==> YOUR CODE HERE

train_history = model.fit(train_generator,validation_data=validation_generator, epochs=EPOCHS)

In [None]:
# Save weights from trained model
# Use model.save()
# ==> YOUR CODE HERE

In [None]:
# Plot train and validation history
fig, axs = plt.subplots(1,2, figsize= (20,5))
axs[0].plot(train_history.history['loss'])
axs[0].plot(train_history.history['val_loss'])
axs[0].set_title("Train, Val loss history")
axs[0].set_xlabel("Epoch")
axs[0].legend(["Train Loss","Val Loss"])

axs[1].plot(train_history.history['accuracy'])
axs[1].plot(train_history.history['val_accuracy'])
axs[1].set_title("Train, Val Accuracy history")
axs[1].set_xlabel("Epoch")
axs[1].legend(["Train Accuracy","Val Accuracy"])

### Evaluation and Inference

In [None]:
# Get train data accuracy
_, train_accuracy = model.evaluate(train_generator)
print("Train data accuracy:", train_accuracy)

# Get test data accuracy
# ==> YOUR CODE HERE

In [None]:
# Get predicted value and the ground truth value of test data
pred = (model.predict(test_generator) > 0.5).astype("int32")
true = test_generator.classes

In [None]:
# Get evaluation metrics for test data
print(classification_report(y_true= true, y_pred = pred, target_names=['cats', 'dogs'], digits=4))

### Perform inference on test images

In [None]:
def image_loader(img_file):
    img = load_img(img_file, target_size = (img_width, img_height))
    imgplot = plt.imshow(img)
    plt.show()
    # img = img_to_array(img) / 255
    img = (img_to_array(img)/127.5)-1.0
    img = np.expand_dims(img, axis = 0) #model input is (1,width,height,channels)
    return img

In [None]:
test_image = data_path + '/test/cats/cat.1070.jpg'
img = image_loader(test_image)
img_y_pred = model.predict(img) 
print(np.round(img_y_pred,5))

In [None]:
test_image = data_path + '/test/dogs/dog.1233.jpg'
img = image_loader(test_image)
img_y_pred = model.predict(img) 
print(np.round(img_y_pred,5))

In [None]:
test_image = data_path + '/test/cats/cat.1080.jpg'

# Load the image
# ==> YOUR CODE HERE

img_y_pred = model.predict(img) 
print(np.round(img_y_pred,5))

In [None]:
test_image = data_path + '/test/dogs/dog.1132.jpg'
img = image_loader(test_image)

# Get the model's prediction on image
# ==> YOUR CODE HERE

print(np.round(img_y_pred,5))

In [None]:
# Perform inference on dog image 1311
# ==> YOUR CODE HERE

img = image_loader(test_image)
img_y_pred = model.predict(img) 
print(np.round(img_y_pred,5))