## CNN Transfer Learning on Cats-Dogs Classification - Fine Tune

#### Finetune top layers and classification layers to classify cats vs. dogs.
Adapted from https://www.tensorflow.org/tutorials/images/transfer_learning

-----

In [1]:
!pip install tensorflow -q


In [2]:
!pip install scikit-learn -q


### Setup

In [3]:
!pip install matplotlib -q

In [4]:
!pip install numpy -q


In [5]:
# Set logging level
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  

import tensorflow as tf
tf.get_logger().setLevel('ERROR')

In [6]:
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras import applications
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout, Flatten, Dense
from tensorflow.keras import optimizers
from tensorflow.keras import losses
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array, load_img

from sklearn.metrics import classification_report 
import matplotlib.pyplot as plt
import numpy as np
import random
import os
import time

# Verify the TensorFlow version
print("TensorFlow version:", tf.__version__)


TensorFlow version: 2.13.0


In [7]:
print (tf.__version__)
!python --version

2.13.0
Python 3.11.5


In [8]:
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]


In [7]:
!nvidia-smi

Thu Jul 11 19:32:48 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.76                 Driver Version: 550.76         CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA L40                     Off |   00000000:CA:00.0 Off |                    0 |
| N/A   35C    P0             93W /  300W |   43868MiB /  46068MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA L40                     Off |   00

In [8]:
# Set random generator seed
seed = 1234

# Set Python seed, NumPy seed, and TensorFlow seed
tf.keras.utils.set_random_seed(seed)

# Potential randomness from CUDNN
# os.environ['TF_DETERMINISTIC_OPS'] = '1'
os.environ['TF_CUDNN_DETERMINISTIC']= '1'

### Set image location and dimensions

In [12]:
_URL = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'
path_to_zip = tf.keras.utils.get_file('cats_and_dogs.zip', origin=_URL, extract=True)
PATH = os.path.join(os.path.dirname(path_to_zip), 'cats_and_dogs_filtered')


Downloading data from https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip
Found 2000 files belonging to 2 classes.


In [13]:
PATH

'/home/jovyan/.keras/datasets/cats_and_dogs_filtered'

In [15]:
import shutil

In [16]:
shutil.unpack_archive(PATH, extract_dir='/home/jovyan/data')

ReadError: Unknown archive format '/home/jovyan/.keras/datasets/cats_and_dogs_filtered'

In [21]:
import subprocess 
import os 
# Define the URL and the target directory 
url = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip' 
target_dir = '/home/jovyan/data' 
# Ensure the target directory exists os.makedirs(target_dir, exist_ok=True) 
# Download the zip file directly to the target directory 
zip_path = os.path.join(target_dir, 'cats_and_dogs_filtered.zip') 
subprocess.run(['wget', '-O', zip_path, url], check=True) 
# Unzip the file 
subprocess.run(['unzip', zip_path, '-d', target_dir], check=True) 
# Optionally, remove the zip file after extraction 
os.remove(zip_path)

--2024-07-11 19:49:12--  https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 172.253.62.207, 142.251.167.207, 142.251.179.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|172.253.62.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 68606236 (65M) [application/zip]
Saving to: ‘/home/jovyan/data/cats_and_dogs_filtered.zip’

     0K .......... .......... .......... .......... ..........  0%  380K 2m56s
    50K .......... .......... .......... .......... ..........  0%  742K 2m13s
   100K .......... .......... .......... .......... ..........  0% 1.44M 1m44s
   150K .......... .......... .......... .......... ..........  0% 1.86M 86s
   200K .......... .......... .......... .......... ..........  0% 1.96M 76s
   250K .......... .......... .......... .......... ..........  0% 2.64M 67s
   300K .......... .......... .......... .......... ..........  0% 3.13M 61s

Archive:  /home/jovyan/data/cats_and_dogs_filtered.zip
   creating: /home/jovyan/data/cats_and_dogs_filtered/
  inflating: /home/jovyan/data/cats_and_dogs_filtered/vectorize.py  
   creating: /home/jovyan/data/cats_and_dogs_filtered/validation/
   creating: /home/jovyan/data/cats_and_dogs_filtered/train/
   creating: /home/jovyan/data/cats_and_dogs_filtered/validation/dogs/
  inflating: /home/jovyan/data/cats_and_dogs_filtered/validation/dogs/dog.2127.jpg  
  inflating: /home/jovyan/data/cats_and_dogs_filtered/validation/dogs/dog.2126.jpg  
  inflating: /home/jovyan/data/cats_and_dogs_filtered/validation/dogs/dog.2125.jpg  
  inflating: /home/jovyan/data/cats_and_dogs_filtered/validation/dogs/dog.2124.jpg  
  inflating: /home/jovyan/data/cats_and_dogs_filtered/validation/dogs/dog.2123.jpg  
  inflating: /home/jovyan/data/cats_and_dogs_filtered/validation/dogs/dog.2122.jpg  
  inflating: /home/jovyan/data/cats_and_dogs_filtered/validation/dogs/dog.2121.jpg  
  inflating: /home/jovyan/da

In [23]:
import os

# Define the home directory and data path
HOME = os.path.expanduser("~")
data_path = os.path.join(HOME, 'data', 'cats_and_dogs_filtered')

# Print the data path
print(f"Data path: {data_path}")

# Define the directories for train, validation, and test data
train_data_dir = os.path.join(data_path, 'train')
val_data_dir = os.path.join(data_path, 'validation')
test_data_dir = os.path.join(data_path, 'test')

# Print the paths
print(f"Train path: {train_data_dir}")
print(f"Validation path: {val_data_dir}")
print(f"Test path: {test_data_dir}")

Data path: /home/jovyan/data/cats_and_dogs_filtered
Train path: /home/jovyan/data/cats_and_dogs_filtered/train
Validation path: /home/jovyan/data/cats_and_dogs_filtered/validation
Test path: /home/jovyan/data/cats_and_dogs_filtered/test


In [24]:
# Image dimensions
img_width, img_height = 224, 224 
IMG_SIZE = (img_width,img_height)
IMG_SHAPE = IMG_SIZE + (3,)

# Print image shape
print(IMG_SHAPE)

(224, 224, 3)


### Prepare data

In [25]:
# Batch size
BATCH_SIZE = 16

# Data setup
rescale = tf.keras.applications.mobilenet_v2.preprocess_input
train_datagen      = ImageDataGenerator(shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True, preprocessing_function = rescale)
validation_datagen = ImageDataGenerator(preprocessing_function = rescale)
test_datagen       = ImageDataGenerator(preprocessing_function = rescale)

# Set up generator to read images found in subfolders of training data directory,
# and indefinitely generate batches of image data (scaled).  This is for training data.
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=IMG_SIZE,
    batch_size = BATCH_SIZE,
    class_mode='binary', 
    shuffle = True,
    seed = seed)           

# Set up generator to generate batched of validation data for model
validation_generator = validation_datagen.flow_from_directory(
    val_data_dir,
    target_size=IMG_SIZE,
    batch_size = BATCH_SIZE,
    class_mode='binary',
    shuffle = False, 
    seed = seed)

# Set up generator to generate batched of test data for model
test_generator = test_datagen.flow_from_directory(
    test_data_dir,
    target_size=IMG_SIZE,
    batch_size = BATCH_SIZE,
    class_mode='binary',
    shuffle = False,
    seed = seed)

Found 2000 images belonging to 2 classes.
Found 1001 images belonging to 2 classes.


FileNotFoundError: [Errno 2] No such file or directory: '/home/jovyan/data/cats_and_dogs_filtered/test'

### Load model from feature extraction
Load model saved from feature extraction.  Weights in last blocks and top model will be adjusted.  All other weights are frozen.

In [None]:
model = tf.keras.models.load_model('models/features_model')

In [None]:
# Uncomment to list layers in model
# print("Number of layers in the base model: ", len(model.layers[1].layers))
# list(enumerate(model.layers[1].layers))

In [None]:
## Freeze all weights of model up to Block 14
model.trainable = True
fine_tune_start = 116
for layer in model.layers[1].layers[:fine_tune_start]:
    layer.trainable = False
    
model.summary()

### Fine tune model

In [None]:
EPOCHS = 20

# Compile model with very slow learning rate
model.compile(
    optimizer = optimizers.Adam(learning_rate = 0.00001),
    loss = losses.BinaryCrossentropy(),
    metrics = ['accuracy'])

# Perform early stopping to avoid overfitting and ModelCheckpoint to save the best model
checkpoint_path = 'tmp/checkpoint'
callbacks = [
    EarlyStopping(
        monitor = 'val_loss', 
        patience = 3, 
        min_delta = 0.001, 
        mode = 'min'),
    ModelCheckpoint(
        filepath=checkpoint_path, 
        monitor = 'val_loss', 
        mode = 'min',          
        save_best_only = True, 
        save_weights_only = True)]

In [None]:
%%time

train_history = model.fit(
    train_generator,
    epochs = EPOCHS, 
    validation_data = validation_generator, 
    callbacks = callbacks)

In [None]:
# Load the best model that was saved using ModelCheckpoint
model.load_weights(checkpoint_path)

In [None]:
# Save weights from finetuning
model.save("models/finetune_model")

In [None]:
# Plot train and validation loss
fig, axs = plt.subplots(1,2, figsize= (20,5))
axs[0].plot(train_history.history['loss'])
axs[0].plot(train_history.history['val_loss'])
axs[0].set_title("Train, Val loss history")
axs[0].set_xlabel("Epoch")
axs[0].legend(["Train Loss","Val Loss"])

# Plot train and validation accuracy
axs[1].plot(train_history.history['accuracy'])
axs[1].plot(train_history.history['val_accuracy'])
axs[1].set_title("Train, Val Accuracy history")
axs[1].set_xlabel("Epoch")
axs[1].legend(["Train Accuracy","Val Accuracy"])

### Evaluate model

In [None]:
_, train_accuracy = model.evaluate(train_generator)
print("Train data accuracy:", train_accuracy)

_, test_accuracy = model.evaluate(test_generator)
print("Test data accuracy:", test_accuracy)

In [None]:
# Get predicted value and the ground truth value of test data
pred = (model.predict(test_generator) > 0.5).astype("int32")
true = test_generator.classes

In [None]:
# Get evaluation metrics for test data
print(classification_report(y_true= true, y_pred = pred, target_names=['cats', 'dogs'], digits=4))

### Perform inference on test images

In [None]:
def image_loader(img_file):
    """load individual images"""
    img = load_img(img_file, target_size = (img_width, img_height))
    imgplot = plt.imshow(img)
    plt.show()
    img = (img_to_array(img)/127.5)-1.0
    img = np.expand_dims(img, axis = 0) #model input is (1,150,150,3)
    return img

In [None]:
test_image = data_path + '/test/cats/cat.1070.jpg'
img = image_loader(test_image)
img_y_pred = model.predict(img) 
print(np.round(img_y_pred,5))

In [None]:
test_image = data_path + '/test/dogs/dog.1233.jpg'
img = image_loader(test_image)
img_y_pred = model.predict(img) 
print(np.round(img_y_pred,5))

In [None]:
test_image = data_path + '/test/cats/cat.1080.jpg'
img = image_loader(test_image)
img_y_pred = model.predict(img) 
print(np.round(img_y_pred,5))

In [None]:
test_image = data_path + '/test/dogs/dog.1132.jpg'
img = image_loader(test_image)
img_y_pred = model.predict(img) 
print(np.round(img_y_pred,5))

In [None]:
test_image = data_path + '/test/dogs/dog.1311.jpg'
img = image_loader(test_image)
img_y_pred = model.predict(img) 
print(np.round(img_y_pred,5))

In [None]:
test_image = data_path + '/test/cats/cat.1338.jpg'
img = image_loader(test_image)
img_y_pred = model.predict(img) 
print(np.round(img_y_pred,5))

In [None]:
test_image = data_path + '/test/cats/cat.1342.jpg'
img = image_loader(test_image)
img_y_pred = model.predict(img) 
print(img_y_pred)

In [None]:
test_image = data_path + '/test/cats/cat.1180.jpg'
img = image_loader(test_image)
img_y_pred = model.predict(img) 
print(np.round(img_y_pred,5))

In [None]:
test_image = data_path + '/test/cats/cat.1048.jpg'
img = image_loader(test_image)
img_y_pred = model.predict(img) 
print(np.round(img_y_pred,5))

In [None]:
test_image = data_path + '/test/dogs/dog.1342.jpg'
img = image_loader(test_image)
img_y_pred = model.predict(img) 
print(np.round(img_y_pred,5))

In [None]:
test_image = data_path + '/test/dogs/dog.1308.jpg'
img = image_loader(test_image)
img_y_pred = model.predict(img) 
print(np.round(img_y_pred,5))