# CNN Fine Tuning for Cats-Dogs Classification
## CIML Summer Institute, UC San Diego
Fine-tune VGG16 top layer (Conv block 5) and fully connected layers to classify cats vs. dogs. 
Adapted from tensorflow tutorials (https://github.com/tensorflow/docs/blob/master/site/en/tutorials/images/transfer_learning.ipynb)

------


### Setup

In [None]:
import tensorflow as tf
from tensorflow.compat.v1.keras import backend as K
from tensorflow.keras import applications
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout, Flatten, Dense, experimental
from tensorflow.keras.layers.experimental.preprocessing import Rescaling
from tensorflow.keras import optimizers
from tensorflow.keras import losses
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.preprocessing.image import img_to_array, load_img

from sklearn.metrics import classification_report 
import matplotlib.pyplot as plt
import numpy as np
import random
import os
import time

In [None]:
print (tf.__version__)
!python --version
print(tf.config.list_physical_devices('GPU'))
!nvidia-smi

In [None]:
# Set logging level
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [None]:
# Set random generator seed
seed = 1234

# Disable hash randomization by specifying the value 0.
os.environ['PYTHONHASHSEED'] = '0'

# Set numpy random generator
np.random.seed(seed)

# Set python built-in random generator
random.seed(seed)

# Set tf global random seed
tf.random.set_seed(seed)

# Set tensorflow graph-level random seed
tf.compat.v1.random.set_random_seed(seed)

# Potential randomness from CUDNN
os.environ['TF_DETERMINISTIC_OPS'] = '1'
os.environ['TF_CUDNN_DETERMINISTIC']= '1'

### Set image dimensions, location of images

In [None]:
# Image dimensions
img_width, img_height = 150, 150
IMG_SIZE = (img_width,img_height)
IMG_SHAPE = IMG_SIZE + (3,)

# Location of images
train_data_dir = 'data/catsVsDogs/train'
validation_data_dir = 'data/catsVsDogs/val'
test_data_dir = 'data/catsVsDogs/test'

# Batch size
BATCH_SIZE = 16

### Prepare data

In [None]:
# Data setup
train_datagen = ImageDataGenerator(rescale = 1. / 255, shear_range = 0.2,
                                   zoom_range = 0.2, horizontal_flip = True)
test_datagen = ImageDataGenerator(rescale = 1. / 255)
validation_datagen = ImageDataGenerator(rescale = 1. / 255)

# Set up generator to read images found in subfolders of training data directory,
# and indefinitely generate batches of image data (scaled).  This is for training data.
train_generator = train_datagen.flow_from_directory((train_data_dir,target_size=<<FILL-IN>>, # img size we defined above
                                                     batch_size = <<FILL-IN>> , class_mode='binary', # batch size we defined above
                                                     shuffle = True, seed = <<FILL-IN>>)   # seed we defined above                     

# Set up generator to generate batched of validation data for model
validation_generator = validation_datagen.flow_from_directory(<<FILL-IN>>,target_size=<<FILL-IN>>,
                                                              batch_size = <<TFILL-INODO>>,class_mode=<<FILL-IN>>,
                                                              shuffle = <<FILL-IN>>, seed = <<FILL-IN>>) # no need to shuffle val data
# Set up generator to generate batched of test data for model
test_generator = test_datagen.flow_from_directory(<<FILL-IN>>,target_size=<<FILL-IN>>,
                                                  batch_size = <<FILL-IN>>,class_mode=<<FILL-IN>>',
                                                  shuffle = <<FILL-IN>>, seed = <<FILL-IN>>) # no need to shuffle val data

### Load model from feature extraction
Load model saved from feature extraction.

Weights in last convoluational layer and top model will be adjusted.  All other weights are frozen.

In [None]:
model = tf.keras.models.load_model(<<FILL-IN>>)

In [None]:
# Freeze all weights of VGG16 model except for conv block5
for layers in model.layers[0].layers[:15]:
    layers.trainable = <<FILL-IN>>
model.summary()

### Fine tune model

In [None]:
EPOCHS = 20

# Compile model with Adam optimizer with very slow learning rate,
# Binary Cross-Entropy loss function and Accuracy metric 
model.compile(optimizer=<<FILL-IN>>, #use Adam optimizer with 0.000005 learning rate https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adam
              loss= <<FILL-IN>>, #use default Binary Cross-entropy loss https://www.tensorflow.org/api_docs/python/tf/keras/losses/BinaryCrossentropy
              metrics=[<<FILL-IN>>]) #use accuracy metrics; this will be printed out during training in each epochs https://www.tensorflow.org/api_docs/python/tf/keras/metrics/Accuracy

# Early Stopping to avoid overfitting and ModelCheckpoint to save the best model
checkpoint_path = 'tmp/checkpoint'
callbacks = [EarlyStopping(monitor=<<FILL-IN>>,patience=3,min_delta=0.001, #use val_loss to monitor during training https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/EarlyStopping
                           mode=<<FILL-IN>>), # we want to minimize val_loss
             ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss',
                             mode = 'min', save_best_only = True, 
                             save_weights_only=True)]

In [None]:
%% time

train_history = <<FILL-IN>> ## train the model

In [None]:
# Load the best model that was saved using ModelCheckpoint
model.load_weights(checkpoint_path)

In [None]:
# Plot train and validation history
fig, axs = plt.subplots(1,2, figsize= (20,5))
axs[0].plot(train_history.history['loss'])
axs[0].plot(train_history.history['val_loss'])
axs[0].set_title("Train, Val loss history")
axs[0].set_xlabel("Epoch")
axs[0].legend(["Train Loss","Val Loss"])

axs[1].plot(train_history.history['accuracy'])
axs[1].plot(train_history.history['val_accuracy'])
axs[1].set_title("Train, Val Accuracy history")
axs[1].set_xlabel("Epoch")
axs[1].legend(["Train Accuracy","Val Accuracy"])

### Evaluate model

In [None]:
_, train_accuracy = model.evaluate(train_generator)
print("Train data accuracy:", train_accuracy)

_, test_accuracy =  <<FILL-IN>> # evaluate test data
print("Test data accuracy:", test_accuracy)

In [None]:
# Get predicted value and the ground truth value of test data
pred = model.predict_classes(test_generator)
true = test_generator.classes

In [None]:
# Classification report
print(classification_report(y_true= true, y_pred = pred, target_names=['cats', 'dogs']))

### Perform inference on test images

In [None]:
def image_loader(img_file):
    """load individual images"""
    img = load_img(img_file, target_size = (img_width, img_height))
    imgplot = plt.imshow(img)
    plt.show()
    img = img_to_array(img) / 255
    img = np.expand_dims(img, axis = 0) #model input is (1,150,150,3)
    return img

In [None]:
img = image_loader('data/catsVsDogs/test/cats/cat.1070.jpg')
img_y_pred = <<FILL-IN>>
print(img_y_pred)

In [None]:
img = image_loader(<<FILL-IN>>) # choose your favorite dog image
img_y_pred = image_loader(img)
print(<<FILL-IN>>)

In [None]:
img = image_loader(<<FILL-IN>>) # choose your favorite image