# Lab 3


## 1. Obtain the dataset

The dataset (https://www.kaggle.com/datasets/biaiscience/dogs-vs-cats, with Open Data license) contains images of dogs and cats, divided into training (1000 dogs, 1000 cats), validation (500 dogs, 500 cats), and test sets (1000 dogs, 1000 cats). Each image has a resolution of 180×180.


In [None]:
import math

from IPython.core.pylabtools import figsize
from keras.utils import image_dataset_from_directory
import pathlib

dataset_dir = pathlib.Path("./data/kaggle_dogs_vs_cats_small")

# load the dataset
train_dataset = image_dataset_from_directory(
    dataset_dir / "train",
    image_size=(180, 180),
    batch_size=32,
    label_mode="binary",
    shuffle=True,
)

validation_dataset = image_dataset_from_directory(
    dataset_dir / "validation",
    image_size=(180, 180),
    batch_size=32,
    label_mode="binary",
    shuffle=False,
)

test_dataset = image_dataset_from_directory(
    dataset_dir / "test",
    image_size=(180, 180),
    batch_size=32,
    label_mode="binary",
    shuffle=False,
)

## 2. EDA


### 2.1 Class distribution


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

class_names = train_dataset.class_names


# Calculate the class distribution for each subset
def get_class_counts(dataset):
    counts = {}
    for _, labels in dataset:
        unique, counts_unique = np.unique(labels, return_counts=True)
        for u, c in zip(unique, counts_unique):
            class_name = class_names[int(u)]
            counts[class_name] = counts.get(class_name, 0) + c
    return counts


train_counts = get_class_counts(train_dataset)
validation_counts = get_class_counts(validation_dataset)
test_counts = get_class_counts(test_dataset)

counts_df = pd.DataFrame(
    {"Train": train_counts, "Validation": validation_counts, "Test": test_counts},
    index=class_names,
)

print("Class distribution for each subset:")
counts_df

In [None]:
# Plot the class distribution
plt.figure(figsize=(6, 6))
counts_df.plot(kind="bar")
plt.title("Class distribution")
plt.show()

### 2.2 Display sample images


In [None]:
from matplotlib import pyplot as plt

plt.figure(figsize=(12, 10))
for images, labels in train_dataset.take(1):
    for i in range(12):
        ax = plt.subplot(3, 4, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(class_names[int(labels[i])])
        plt.axis("off")

### 2.3 Image size and color channels


In [None]:
# Get the first image from the training dataset
for images, labels in train_dataset.take(1):
    print(f"Image size: {images.shape[1:3]}")
    print(f"Image color channels: {images.shape[3]}")
    break

### 2.4 Data augmentation


In [None]:
from keras import layers
import keras

# Data augmentation
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
        layers.RandomZoom(0.2),
    ]
)

# Display augmented images
plt.figure(figsize=(10, 10))
for images, _ in train_dataset.take(1):
    for i in range(9):
        augmented_images = data_augmentation(images)
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(augmented_images[0].numpy().astype("uint8"))
        plt.axis("off")

## 3. Training vanilla CNN model


Define a vanilla CNN model with data augmentation and dropout.


In [None]:
def create_vanilla_cnn_model():
    inputs = keras.Input(shape=(180, 180, 3))
    x = data_augmentation(inputs)

    x = layers.Rescaling(1.0 / 255)(x)

    x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x)
    x = layers.MaxPooling2D(pool_size=2)(x)

    x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x)
    x = layers.MaxPooling2D(pool_size=2)(x)

    x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x)
    x = layers.MaxPooling2D(pool_size=2)(x)

    x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
    x = layers.MaxPooling2D(pool_size=2)(x)

    x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
    x = layers.Flatten()(x)

    x = layers.Dense(256, activation="relu")(x)

    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(1, activation="sigmoid")(x)

    model = keras.Model(inputs=inputs, outputs=outputs)

    model.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"])

    return model

In [None]:
vanilla_cnn_model = create_vanilla_cnn_model()
vanilla_cnn_model.summary()

Train the vanilla CNN model.

In [None]:
vanilla_callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath="./models/vanilla_cnn_model.h5",
        save_best_only=True,
        monitor="val_loss",
    )
]

vanilla_history = vanilla_cnn_model.fit(
    train_dataset,
    epochs=100,
    validation_data=validation_dataset,
    callbacks=vanilla_callbacks,
)

In [None]:
def plot_history(history):
    accuracy = history.history["accuracy"]
    val_accuracy = history.history["val_accuracy"]
    loss = history.history["loss"]
    val_loss = history.history["val_loss"]
    epochs_range = range(1, len(accuracy) + 1)

    # accuracy
    plt.figure(figsize=(16, 6))
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, accuracy, "bo", label="Training accuracy")
    plt.plot(epochs_range, val_accuracy, "b", label="Validation accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.title("Training and validation accuracy")
    plt.legend(loc="upper left")

    # loss
    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, "bo", label="Training loss")
    plt.plot(epochs_range, val_loss, "b", label="Validation loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title("Training and validation loss")
    plt.legend(loc="upper left")

    plt.show()


plot_history(vanilla_history)

## 4. Fine-tuning VGG16 model


Instantiate the VGG16 model with pre-trained ImageNet weights, remove the top layers.


In [None]:
conv_base = keras.applications.vgg16.VGG16(weights="imagenet", include_top=False)

conv_base.summary()

Define a new model using VGG16 as the base, freezing all layers except the top four layers, and add a custom head for fine-tuning. Additionally, use data augmentation.


In [None]:
def create_finetuned_model(conv_base):
    # Freeze all layers until the fourth from the last.
    conv_base.trainable = True
    for layer in conv_base.layers[:-4]:
        layer.trainable = False

    inputs = keras.Input(shape=(180, 180, 3))
    x = data_augmentation(inputs)
    x = keras.applications.vgg16.preprocess_input(x)
    x = conv_base(x)

    x = layers.Flatten()(x)
    x = layers.Dense(256, activation="relu")(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(1, activation="sigmoid")(x)

    model = keras.Model(inputs=inputs, outputs=outputs)

    model.compile(loss="binary_crossentropy", optimizer=keras.optimizers.RMSprop(learning_rate=1e-5),
                  metrics=["accuracy"])

    return model

In [None]:
finetuned_model = create_finetuned_model(conv_base)
finetuned_model.summary()

Train the fine-tuned VGG16 model.

In [None]:
finetuned_callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath="./models/finetuned_vgg16_model.h5",
        save_best_only=True,
        monitor="val_loss",
    )
]

finetuned_history = finetuned_model.fit(
    train_dataset,
    epochs=50,
    validation_data=validation_dataset,
    callbacks=finetuned_callbacks,
)

In [None]:
plot_history(finetuned_history)

## 5. Evaluate the models


Define functions to evaluate the models and display the results.

In [None]:
import keras
from sklearn.metrics import classification_report, precision_recall_curve, auc
import tensorflow as tf


def get_predictions(model, dataset):
    y_true_all = []
    y_pred_all = []
    y_pred_bin_all = []

    for images, y_true in dataset:
        y_pred = model.predict(images)
        y_pred_bin = (y_pred > 0.5)

        y_true_all.extend(y_true)
        y_pred_all.extend(y_pred)
        y_pred_bin_all.extend(y_pred_bin)

    return (np.array(y_true_all).flatten().astype(int),
            np.array(y_pred_all).flatten(),
            np.array(y_pred_bin_all).flatten().astype(int))


def calc_confusion_matrix(y_true, y_pred_bin):
    cm = tf.math.confusion_matrix(y_true, y_pred_bin)
    TP = cm[1][1]
    TN = cm[0][0]
    FP = cm[0][1]
    FN = cm[1][0]

    return TP, TN, FP, FN


def plot_precision_recall_curve(y_true, y_pred):
    precision, recall, _ = precision_recall_curve(y_true, y_pred)
    pr_auc = auc(recall, precision)

    plt.figure(figsize=(8, 6))
    plt.plot(recall, precision, marker=".", label=f"AUC: {pr_auc:.3f}")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title("Precision-Recall Curve")
    plt.show()


def evaluate_model(model_path, dataset):
    model = keras.models.load_model(model_path)

    y_true, y_pred, y_pred_bin = get_predictions(model, dataset)
    TP, TN, FP, FN = calc_confusion_matrix(y_true, y_pred_bin)

    result = f"\n{classification_report(y_true, y_pred_bin, target_names=class_names)}\n"

    result += f"\nConfusion matrix:\n"
    result += f"{'-' * 25}\n"
    result += f"| TP: {TP:5} | FP: {FP:5} |\n"
    result += f"{'-' * 25}\n"
    result += f"| FN: {FN:5} | TN: {TN:5} |\n"
    result += f"{'-' * 25}"

    return result, y_true, y_pred, y_pred_bin


def show_misclassified_images(y_true, y_pred_bin, dataset, max_num=5):
    errors = np.where(y_true != y_pred_bin)[0]

    max_cols = 5
    num_images = min(len(errors), max_num)
    rows = (num_images + max_cols - 1) // max_cols
    
    selected_errors = np.random.choice(errors, num_images, replace=False)

    fig, axes = plt.subplots(rows, max_cols, figsize=(15, 4 * rows))
    axes = axes.flatten()

    for i, idx in enumerate(selected_errors):
        image, label = list(dataset.unbatch().as_numpy_iterator())[idx]
        prediction = class_names[y_pred_bin[idx]]

        axes[i].imshow(image.astype("uint8"))
        axes[i].set_title(f"True: {class_names[int(label)]}     Pred: {prediction}")
        axes[i].axis("off")

    # Hide the remaining axes
    for i in range(num_images, len(axes)):
        axes[i].axis("off")

    plt.suptitle("Misclassified images")
    plt.tight_layout()
    plt.show()

### 5.1 Vanilla CNN model

In [None]:
# Evaluate the vanilla CNN model
(vanilla_report,
 y_true_vanilla,
 y_pred_vanilla,
 y_pred_bin_vanilla) = evaluate_model("./models/vanilla_cnn_model.h5", test_dataset)

print(f"\n\nVanilla CNN model:\n{vanilla_report}")

In [None]:
#　Show misclassified images
show_misclassified_images(y_true_vanilla, y_pred_bin_vanilla, test_dataset, 10)

### 5.2 Fine-tuned VGG16 model

In [None]:
# Evaluate the fine-tuned VGG 16 model
(finetuned_report,
 y_true_finetuned,
 y_pred_finetuned,
 y_pred_bin_finetuned) = evaluate_model("./models/finetuned_vgg16_model.h5", test_dataset)

print(f"\n\nFine-tuned VGG16 model:\n{finetuned_report}")

In [None]:
# Show misclassified images
show_misclassified_images(y_true_finetuned, y_pred_bin_finetuned, test_dataset, 10)

## 6. Conclusion
