<p align="left">
  <a href="https://colab.research.google.com/github/fernandoarcevega/AI_Workshop/blob/main/Part_3/06_Lab/Lab_Dogs_vs_Cats.ipynb" target="_parent">
    <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" width="200">
  </a>
</p>

In [None]:
###############################################
# Author 1: Wilfrido GÃ³mez-Flores (CINVESTAV) #
# Author 2: Fernando Arce-Vega (CIO)          #
# e-mail 1: wgomez@cinvestav.mx               #
# e-mail 2: farce@cio.mx                      #
# Date:     nov/11/2025                       #
# Subject:  Dogs vs. Cats with CNN            #
###############################################

# Classifying `Dogs vs. Cats` Images Using a `CNN`
In this exercise, we will implement a `CNN` to classify images from the [`Dogs vs. Cats`](https://www.kaggle.com/c/dogs-vs-cats/data) database. We will also perform information augmentation and transfer learning.

The `Dogs vs. Cats` database consists of `25,000` training images of dogs and cats. The goal is to train a classifier to distinguish between these two types of pets. For this example, we will use `2,000` images to train our model, `1,000` for validation, and `2,000` for testing.

In [None]:
# Change: Runtime to GPU

In [None]:
# Check GPU details
!nvidia-smi

In [None]:
# Libraries
import numpy as np                                                    # Numerical array operations
import matplotlib.pyplot as plt                                       # Data plotting/visualization
import tensorflow as tf                                               # Machine learning
from tensorflow.keras.preprocessing.image import ImageDataGenerator   # Batch generator
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay  # Model evaluation
import os                                                             # Interaction with the operating system

In [None]:
# Install gdown
!pip install -q gdown

In [None]:
# Download dogs_vs_cats.zip dataset
!gdown --id 1zRFN_O3chFutGP7_4__z8rWuGJulc4BW --output dogs_vs_cats.zip

In [None]:
# Unzip dogs_vs_cats.zip file
!unzip '/content/dogs_vs_cats.zip' -d '/content/'

In [None]:
# Remove dogs_vs_cats.zip file
!rm '/content/dogs_vs_cats.zip'

In [None]:
# Paths
dataset = 'dogs-vs-cats'
path = '/content/' + dataset + '/'

train_dir = path + 'training/'
val_dir = path + 'validation/'
test_dir = path + 'testing/'

In [None]:
# Data visualization
ncols = 10
nrows = 10

folders = os.listdir(train_dir)
pictures = []
clas = []

# Concatenating images
for folder in folders:
  pictures += [train_dir + folder + '/' + name for name in os.listdir(train_dir + folder)]
  clas += [folder for name in os.listdir(train_dir + folder)]

# Shuffle data
ind = np.random.permutation(len(clas))
pictures = np.array(pictures)[ind]
clas = np.array(clas)[ind]

# Create figure
fig, axs = plt.subplots(figsize=(20, 20))
plt.axis('off')

print('Training images\n')

for i in range(ncols * nrows):

  axs = fig.add_subplot(nrows, ncols, i + 1)
  img = plt.imread(pictures[i])
  axs.imshow(img, cmap='gray')
  axs.axis('off')

  label = str(clas[i][:-1])
  plt.title(label)

plt.show()

In [None]:
# Batch generation
data_augmentation = True

# Data augmentation
if data_augmentation:
  train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1.0/255.0,
                                                                zoom_range=0.2,
                                                                rotation_range=0.2,
                                                                shear_range=0.2,
                                                                horizontal_flip=True)
# Without data augmentation
else:
  train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1.0/255.0)

val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1.0/255.0)
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1.0/255.0)

batch_size = 32

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size = (224, 224),
    batch_size = batch_size,
    color_mode = 'rgb',
    class_mode = 'binary')

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size = (224, 224),
    batch_size = batch_size,
    color_mode = 'rgb',
    class_mode = 'binary')

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size = (224, 224),
    batch_size = batch_size,
    color_mode = 'rgb',
    class_mode = 'binary')

In [None]:
# Visualization of the data augmentation
def plottingImages(images):
    plt.subplots(figsize = (20, 20))
    cols = 5
    rows = len(images) // cols
    plt.axis('off')

    for c, img in enumerate(images):
        plt.subplot(rows, cols, c + 1)
        plt.imshow(img, cmap = 'gray')
        plt.axis('off')

    plt.tight_layout()
    plt.show()

augmented_images = [train_generator[0][0][0] for i in range(20)]
plottingImages(augmented_images)

In [None]:
# TO_DO: Model


In [None]:
# TO_DO: Model description


In [None]:
# TO_DO: Compile model


In [None]:
# Callbacks
model_path = '/content/' + 'model.keras'

# Callbacks
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    patience=6,
    restore_best_weights=True)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy',
    factor=0.1,
    patience=3,
    min_lr=1e-6,
    verbose=1)

model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath = model_path,
    save_best_only = True,
    monitor = 'val_accuracy')

In [None]:
# TO_DO: Train model
epochs = 20


In [None]:
# Training and validation graphs
training_acc = history.history['accuracy']
validation_acc = history.history['val_accuracy']

training_loss = history.history['loss']
validation_loss = history.history['val_loss']

epocas = np.arange(len(training_loss))

plt.figure(figsize=(10, 4))
plt.plot(epocas, training_acc, color='blue', label='Training Acc')
plt.plot(epocas, validation_acc, color = 'green', label='Validation Acc')
plt.title('ACC')
plt.xlabel('Epochs')
plt.ylabel('Acc')
plt.legend()
plt.show()
print()

plt.figure(figsize = (10, 4))
plt.plot(epocas, training_loss, color='blue', label='Training MSR')
plt.plot(epocas, validation_loss, color = 'green', label='Validation MSR')
plt.title('MSE')
plt.xlabel('Epochs')
plt.ylabel('Error')
plt.legend()
plt.show()

In [None]:
# Model prediction
batch = next(test_generator)
images = batch[0]
targets = batch[1]

results = model.predict(images, verbose = 0)
results = np.round(results).reshape(len(results))

# Indexing classes
dict_classes = test_generator.class_indices
dict_classes = {v:k for k, v in dict_classes.items()}
print(dict_classes)

In [None]:
# Data visualization
ncols = 5
nrows = 6

# Create figure
fig, axs = plt.subplots(figsize=(20, 20))
plt.axis('off')

print('Testing images\n')

for i in range(ncols * nrows):

  axs = fig.add_subplot(nrows, ncols, i + 1)
  axs.imshow(images[i], cmap='gray')
  axs.axis('off')

  label = str(dict_classes[results[i]])
  label = label[:-1]

  if targets[i] != results[i]:
    plt.title(label, color='red')

  else:
    plt.title(label)

plt.show()

In [None]:
# Load model
filepath = model_path
model = tf.keras.models.load_model(filepath)

In [None]:
# Model performance
loss_training, acc_training = model.evaluate(train_generator, verbose=1)
loss_validation, acc_validation = model.evaluate(val_generator, verbose=1)
loss_testing, acc_testing = model.evaluate(test_generator, verbose=1)

In [None]:
# Print performance
print('Training loss:  ', loss_training)
print('Training acc:   ', acc_training)
print()
print('Validation loss:', loss_validation)
print('Validation acc: ', acc_validation)
print()
print('Test loss:      ', loss_testing)
print('Test acc:       ', acc_testing)

In [None]:
# Confusion matrix
cm = confusion_matrix(targets, results)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.show()