<a href="https://colab.research.google.com/github/asukul/DS201/blob/master/TensorFlow_Cat_vs_Dog_Classifier_(Colab)%202025.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)
Collecting tensorboard~=2.19.0 (from tensorflow)
  Downloading tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Collecting tensorflow-io-gcs-filesystem>=0.23.1 (from tensorflow)
  Downloading tensorflow_io_gcs_filesystem-0.37.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (14 kB)
Collecting wheel<1.0,>=0.23.0 (from astunparse>=1.6.0->tensorflow

In [None]:
# -*- coding: utf-8 -*-
"""
Cat vs Dog Image Classifier using TensorFlow in Colab.

This notebook demonstrates:
1. Loading the cats_vs_dogs dataset from TFDS.
2. Building and training a basic CNN.
3. Visualizing results (plots, TensorBoard) and identifying overfitting.
4. Applying data augmentation and dropout.
5. Training an improved model and visualizing its results.
"""

# @title # 1. Setup and Imports
# Import necessary libraries
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import numpy as np
import os
import datetime

# Helper function to plot accuracy and loss curves
def plot_history(history, title_prefix=""):
    """Plots training and validation accuracy/loss."""
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(acc) + 1)

    plt.figure(figsize=(14, 5))

    plt.subplot(1, 2, 1)
    plt.plot(epochs, acc, 'bo-', label='Training Accuracy')
    plt.plot(epochs, val_acc, 'ro-', label='Validation Accuracy')
    plt.title(f'{title_prefix}Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)

    plt.subplot(1, 2, 2)
    plt.plot(epochs, loss, 'bo-', label='Training Loss')
    plt.plot(epochs, val_loss, 'ro-', label='Validation Loss')
    plt.title(f'{title_prefix}Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)

    plt.tight_layout()
    plt.show()

# Helper function to display sample predictions
def show_sample_predictions(dataset, model, class_names, num_samples=16):
    """Shows images with their predicted and true labels."""
    plt.figure(figsize=(10, 10))
    # Take one batch from the dataset
    for images, labels in dataset.take(1):
        predictions = model.predict(images)
        # Squeeze predictions if necessary (e.g., if output shape is (batch, 1))
        if predictions.shape[-1] == 1:
            predictions = tf.squeeze(predictions, axis=-1)

        # Convert predictions to labels (0 or 1) based on threshold 0.5
        predicted_labels = (predictions > 0.5).astype(int)

        for i in range(min(num_samples, images.shape[0])):
            ax = plt.subplot(4, 4, i + 1)
            plt.imshow(images[i].numpy().astype("uint8"))
            true_label = class_names[labels[i]]
            pred_label = class_names[predicted_labels[i]]
            confidence = predictions[i] if predicted_labels[i] == 1 else 1 - predictions[i]

            plt.title(f"True: {true_label}\nPred: {pred_label}\nConf: {confidence:.2f}",
                      color=("green" if pred_label == true_label else "red"))
            plt.axis("off")
    plt.tight_layout()
    plt.show()

print("TensorFlow Version:", tf.__version__)

# @title # 2. Load and Prepare the Dataset

# Load the cats_vs_dogs dataset
# It contains images of cats and dogs, split into training set.
# We'll split the training set further into training and validation.
(raw_train, raw_validation, raw_test), metadata = tfds.load(
    'cats_vs_dogs',
    split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'], # 80% train, 10% val, 10% test
    with_info=True,
    as_supervised=True, # Returns (image, label) tuples
)

print("Raw Training Examples:", tf.data.experimental.cardinality(raw_train))
print("Raw Validation Examples:", tf.data.experimental.cardinality(raw_validation))
print("Raw Test Examples:", tf.data.experimental.cardinality(raw_test))

# Class names (0: cat, 1: dog - based on TFDS documentation)
CLASS_NAMES = ['cat', 'dog']

# Define image size and batch size
IMG_SIZE = 160 # All images will be resized to 160x160
BATCH_SIZE = 32

# Preprocessing function: resize and normalize images
def format_example(image, label):
    """Resizes image to IMG_SIZE x IMG_SIZE and normalizes pixels to [0, 1]."""
    image = tf.cast(image, tf.float32)
    image = (image / 255.0) # Normalize pixel values to [0, 1]
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    return image, label

# Apply preprocessing to datasets
train_dataset = raw_train.map(format_example)
validation_dataset = raw_validation.map(format_example)
test_dataset = raw_test.map(format_example)

# Shuffle and batch the datasets
# AUTOTUNE allows TensorFlow to find the best allocation of CPU resources.
AUTOTUNE = tf.data.AUTOTUNE

train_batches = train_dataset.shuffle(1000).batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
validation_batches = validation_dataset.batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
test_batches = test_dataset.batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)

# Verify the shape of the data
for image_batch, label_batch in train_batches.take(1):
    pass
print("Batch shape:", image_batch.shape, label_batch.shape)

# @title # 3. Build the Initial CNN Model
# A simple stack of Conv2D and MaxPooling2D layers

initial_model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3)), # Define input shape explicitly
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    # Output layer: Dense with 1 unit (binary classification) and sigmoid activation
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
initial_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                      loss='binary_crossentropy', # Suitable for binary (0/1) classification
                      metrics=['accuracy'])

initial_model.summary()

# @title # 4. Train the Initial Model
# Setup TensorBoard logs
log_dir_initial = os.path.join("logs", "initial_fit", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback_initial = tf.keras.callbacks.TensorBoard(log_dir=log_dir_initial, histogram_freq=1)

# Define number of epochs
EPOCHS_INITIAL = 15 # Adjust as needed, 15 is a starting point

print("\n--- Training Initial Model ---")
history_initial = initial_model.fit(
    train_batches,
    epochs=EPOCHS_INITIAL,
    validation_data=validation_batches,
    callbacks=[tensorboard_callback_initial]
)
print("--- Initial Model Training Complete ---")

# @title # 5. Evaluate the Initial Model

print("\n--- Evaluating Initial Model ---")
# Plot training history
plot_history(history_initial, title_prefix="Initial Model: ")

# Show sample predictions
print("Sample Predictions from Initial Model (on Test Set):")
# Use test_batches for showing predictions on unseen data
show_sample_predictions(test_batches, initial_model, CLASS_NAMES)

# Evaluate on the test set
loss_initial, accuracy_initial = initial_model.evaluate(test_batches)
print(f"\nInitial Model Test Loss: {loss_initial:.4f}")
print(f"Initial Model Test Accuracy: {accuracy_initial:.4f}")

# @markdown ---
# @markdown ### TensorBoard for Initial Model
# @markdown To view TensorBoard logs for the initial run, execute the following commands in a **separate Colab cell**:
# @markdown ```
# @markdown %load_ext tensorboard
# @markdown %tensorboard --logdir logs/initial_fit
# @markdown ```
# @markdown ---

# @title # 6. Addressing Overfitting
print("\n--- Addressing Overfitting ---")
# @markdown Looking at the 'Initial Model: Training and Validation Accuracy/Loss' plots above, we can observe:
# @markdown * **Training Accuracy:** Steadily increases and likely reaches a high value.
# @markdown * **Validation Accuracy:** Increases initially but then plateaus or even decreases, diverging from the training accuracy.
# @markdown * **Training Loss:** Steadily decreases.
# @markdown * **Validation Loss:** Decreases initially but then starts to increase, diverging from the training loss.
# @markdown
# @markdown This divergence is a classic sign of **overfitting**. The model is learning the training data *too* well, including its noise and specific patterns, and fails to generalize to new, unseen data (the validation set).
# @markdown
# @markdown **Techniques to Reduce Overfitting:**
# @markdown 1.  **Data Augmentation:** Artificially increase the diversity of the training data by applying random transformations (rotation, zoom, flip, etc.) to the existing images. This helps the model learn more robust features.
# @markdown 2.  **Dropout:** Randomly set a fraction of input units to 0 during training at each update step. This prevents units from co-adapting too much and forces the network to learn more redundant representations.
# @markdown 3.  **(Optional) Transfer Learning:** Use a pre-trained model (like MobileNetV2, ResNet) trained on a large dataset (like ImageNet) and fine-tune it on our specific task. This leverages learned features and often leads to better performance with less data. (We will focus on Augmentation and Dropout here).

# @title # 7. Implement Data Augmentation and Build Improved Model

# Define data augmentation layers
# These layers are active only during training.
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip('horizontal'),
    tf.keras.layers.RandomRotation(0.2),
    tf.keras.layers.RandomZoom(0.2),
    tf.keras.layers.RandomContrast(0.2),
    # Add more augmentations if needed
])

# Build the improved model with Data Augmentation and Dropout
improved_model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3)), # Input layer
    data_augmentation, # Apply augmentation first
    # Base Convolutional Layers (same as before or adjusted)
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    # Add Dropout before the dense layers
    tf.keras.layers.Dropout(0.3), # Dropout rate of 30%
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3), # Another dropout layer
    tf.keras.layers.Dense(1, activation='sigmoid') # Output layer
])

# Compile the improved model
# Often a slightly lower learning rate can be beneficial when using augmentation/dropout
improved_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
                       loss='binary_crossentropy',
                       metrics=['accuracy'])

improved_model.summary()

# @title # 8. Train the Improved Model
# Setup TensorBoard logs for the improved model
log_dir_improved = os.path.join("logs", "improved_fit", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback_improved = tf.keras.callbacks.TensorBoard(log_dir=log_dir_improved, histogram_freq=1)

# Define number of epochs for the improved model
# Might need more epochs as augmentation makes training harder
EPOCHS_IMPROVED = 25 # Adjust as needed

print("\n--- Training Improved Model (with Augmentation & Dropout) ---")
history_improved = improved_model.fit(
    train_batches, # Augmentation layers handle the transformation on-the-fly
    epochs=EPOCHS_IMPROVED,
    validation_data=validation_batches,
    callbacks=[tensorboard_callback_improved]
)
print("--- Improved Model Training Complete ---")

# @title # 9. Evaluate the Improved Model

print("\n--- Evaluating Improved Model ---")
# Plot training history
plot_history(history_improved, title_prefix="Improved Model: ")

# Show sample predictions
print("Sample Predictions from Improved Model (on Test Set):")
show_sample_predictions(test_batches, improved_model, CLASS_NAMES)

# Evaluate on the test set
loss_improved, accuracy_improved = improved_model.evaluate(test_batches)
print(f"\nImproved Model Test Loss: {loss_improved:.4f}")
print(f"Improved Model Test Accuracy: {accuracy_improved:.4f}")

print("\n--- Comparison ---")
print(f"Initial Model Test Accuracy: {accuracy_initial:.4f}")
print(f"Improved Model Test Accuracy: {accuracy_improved:.4f}")

# @markdown ---
# @markdown ### TensorBoard for Improved Model
# @markdown To view TensorBoard logs for the improved run and compare with the initial run, execute the following commands in a **separate Colab cell**:
# @markdown ```
# @markdown %load_ext tensorboard
# @markdown %tensorboard --logdir logs
# @markdown ```
# @markdown This will load logs from both the `initial_fit` and `improved_fit` directories, allowing direct comparison in the TensorBoard UI.
# @markdown ---

# @title # 10. Conclusion
# @markdown We trained an initial CNN for cat vs. dog classification and observed overfitting.
# @markdown By applying **Data Augmentation** and **Dropout**, we created an improved model.
# @markdown Comparing the training curves and test accuracies, the improved model generally shows:
# @markdown * Less divergence between training and validation metrics (reduced overfitting).
# @markdown * Often, a higher final validation and test accuracy, indicating better generalization.
# @markdown
# @markdown Further improvements could involve:
# @markdown * Trying different network architectures.
# @markdown * Fine-tuning hyperparameters (learning rate, dropout rate, number of neurons/layers).
# @markdown * Using **Transfer Learning** with a pre-trained model like MobileNetV2 or ResNet50V2.