<a href="https://colab.research.google.com/github/Ulyssesllc/cat_dog_classification/blob/main/Cats_vs_Dogs_Classification_with_Resnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
bhavikjikadara_dog_and_cat_classification_dataset_path = kagglehub.dataset_download('bhavikjikadara/dog-and-cat-classification-dataset')

print('Data source import complete.')


# I) Import and download

In [None]:
import os
import shutil
import torch
import torchvision
import random
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np


from torch import nn
from shutil import copyfile


In [None]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# II) The dataset

### *) Loading the dataset

In [None]:
root_folder='/kaggle/input/dog-and-cat-classification-dataset/PetImages'
cat_folder= os.path.join(root_folder,"Cat")
print(cat_folder)

## II.1) Overview

In [None]:
cat_img= os.listdir(cat_folder)
print(len(cat_img))
print(cat_img[:5])

In [None]:
cat_img_paths= [os.path.join(cat_folder, img) for img in cat_img]
print(cat_img_paths[:5])

In [None]:
fig, axes = plt.subplots(5,2, figsize=(10,20))
ten_img= cat_img_paths[:10]

for i, img in enumerate(ten_img):
    print(img)
    ax = axes[i//2,i%2]
    ax.imshow(plt.imread(img))
    ax.title.set_text(os.path.basename(img))
    ax.axis('on')



## II.2) Train & test split

In [None]:
try:
    os.mkdir('/tmp/cats-v-dogs')
    os.mkdir('/tmp/cats-v-dogs/training')
    os.mkdir('/tmp/cats-v-dogs/validation')
    os.mkdir('/tmp/cats-v-dogs/test')
    os.mkdir('/tmp/cats-v-dogs/training/cats')
    os.mkdir('/tmp/cats-v-dogs/training/dogs')
    os.mkdir('/tmp/cats-v-dogs/validation/cats')
    os.mkdir('/tmp/cats-v-dogs/validation/dogs')
    os.mkdir('/tmp/cats-v-dogs/test/cats')
    os.mkdir('/tmp/cats-v-dogs/test/dogs')
except OSError:
    print('Error failed to make directory')

****As we can see, the paths are yet to be created, so it's our mission to do that beforehand****

In [None]:
# Define paths
cat_path = '/kaggle/input/dog-and-cat-classification-dataset/PetImages/Cat'
dog_path = '/kaggle/input/dog-and-cat-classification-dataset/PetImages/Dog'

training_path = '/tmp/cats-v-dogs/training'
validation_path = '/tmp/cats-v-dogs/validation'

training_dog = os.path.join(training_path, 'dogs/')
validation_dog = os.path.join(validation_path, 'dogs/')

training_cat = os.path.join(training_path, 'cats/')
validation_cat = os.path.join(validation_path, 'cats/')

# Define whether to test split or not
include_test= True

In [None]:
print(len(os.listdir('/tmp/cats-v-dogs/training/cats')))
print(len(os.listdir('/tmp/cats-v-dogs/training/dogs')))

print(len(os.listdir('/tmp/cats-v-dogs/validation/cats')))
print(len(os.listdir('/tmp/cats-v-dogs/validation/dogs')))

print(len(os.listdir('/tmp/cats-v-dogs/test/cats')))
print(len(os.listdir('/tmp/cats-v-dogs/test/dogs')))

****Now we will create a function to split the data****

In [None]:
def split_data(main_dir, training_dir, validation_dir, test_dir=None, include_test_split = True,  split_size=0.9):
    """
    Splits the data into train validation and test sets (optional)

    Args:
    main_dir (string):  path containing the images
    training_dir (string):  path to be used for training
    validation_dir (string):  path to be used for validation
    test_dir (string):  path to be used for test
    include_test_split (boolen):  whether to include a test split or not
    split_size (float): size of the dataset to be used for training
    """
    files = []
    for file in os.listdir(main_dir):
        if  os.path.getsize(os.path.join(main_dir, file)): # check if the file's size isn't 0
            files.append(file) # appends file name to a list

    shuffled_files = random.sample(files,  len(files)) # shuffles the data
    split = int(0.9 * len(shuffled_files)) #the training split casted into int for numeric rounding
    train = shuffled_files[:split] #training split
    split_valid_test = int(split + (len(shuffled_files)-split)/2)

    if include_test_split:
        validation = shuffled_files[split:split_valid_test] # validation split
        test = shuffled_files[split_valid_test:]
    else:
        validation = shuffled_files[split:]

    for element in train:
        copyfile(os.path.join(main_dir,  element), os.path.join(training_dir, element)) # copy files into training directory

    for element in validation:
        copyfile(os.path.join(main_dir,  element), os.path.join(validation_dir, element))# copy files into validation directory

    if include_test_split:
        for element in test:
            copyfile(os.path.join(main_dir,  element), os.path.join(test_dir, element)) # copy files into test directory
    print("Split successful!")

In [None]:
split_data(cat_path, '/tmp/cats-v-dogs/training/cats', '/tmp/cats-v-dogs/validation/cats', '/tmp/cats-v-dogs/test/cats',include_test, 0.9)
split_data(dog_path, '/tmp/cats-v-dogs/training/dogs', '/tmp/cats-v-dogs/validation/dogs','/tmp/cats-v-dogs/test/dogs',include_test, 0.9)

****Now, lets check on the number of files stored in each recently created directories****

In [None]:
print(len(os.listdir('/tmp/cats-v-dogs/training/cats')))
print(len(os.listdir('/tmp/cats-v-dogs/training/dogs')))

print(len(os.listdir('/tmp/cats-v-dogs/validation/cats')))
print(len(os.listdir('/tmp/cats-v-dogs/validation/dogs')))


print(len(os.listdir('/tmp/cats-v-dogs/test/cats')))
print(len(os.listdir('/tmp/cats-v-dogs/test/dogs')))

## II.3) Creation of image loaders

****First, we define the transformation****

In [None]:
from torchvision import transforms, datasets
from torchvision.transforms import RandAugment

# Training transforms
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    RandAugment(num_ops=2, magnitude=9),  # RandAugment for robustness
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=0.1),  # Random erase for regularization
])

In [None]:
# Validation & testing transforms (no augmentation)
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

****Then, we create data loaders for previewing the images****

In [None]:
from torch.utils.data import DataLoader
# Load datasets
train_dataset = datasets.ImageFolder(root='/tmp/cats-v-dogs/training', transform=train_transform)
val_dataset = datasets.ImageFolder(root='/tmp/cats-v-dogs/validation', transform=transform)

if include_test:
    test_dataset = datasets.ImageFolder(root='/tmp/cats-v-dogs/validation', transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

if include_test:
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

****Now, make sure we got the correct data****

In [None]:
class_names = ['Cat', 'Dog']

def plot_data(data_loader, n_images):
    """
    Plots random data from a PyTorch DataLoader
    Args:
        data_loader: a PyTorch DataLoader instance
        n_images: number of images to plot
    """
    # Fetch a batch of images and labels
    images, labels = next(iter(data_loader))  # Use iter() and next() to get a batch

    # Calculate the number of rows and columns for subplots
    n_cols = 3  # Number of columns in the plot
    n_rows = (n_images + n_cols - 1) // n_cols  # Calculate rows dynamically

    plt.figure(figsize=(14, 15))

    for i in range(n_images):
        plt.subplot(n_rows, n_cols, i + 1)
        image = images[i].permute(1, 2, 0)  # Convert from (C, H, W) to (H, W, C) for matplotlib
        if image.shape[-1] == 1:  # Grayscale image
            plt.imshow(image.squeeze(), cmap='gray')
        else:  # RGB image
            plt.imshow(image)
        plt.title(class_names[labels[i].item()])  # Get the label as a Python scalar
        plt.axis('off')

    plt.tight_layout()  # Adjust layout to prevent overlap
    plt.show()

In [None]:
plot_data(train_loader, n_images=6)

In [None]:
plot_data(val_loader, n_images=6)

In [None]:
if include_test:
    plot_data(test_loader, n_images=9)

# III) Model

**Bottleneck Blocks**

In [None]:
import torch.nn.functional as F

class BottleneckD(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super().__init__()
        mid_channels = planes

        # Main path
        self.bn1 = nn.BatchNorm2d(inplanes)
        self.conv1 = nn.Conv2d(inplanes, mid_channels, 1, bias=False)

        self.bn2 = nn.BatchNorm2d(mid_channels)
        self.conv2 = nn.Conv2d(mid_channels, mid_channels, 3,
                              stride=stride, padding=1, bias=False)

        self.bn3 = nn.BatchNorm2d(mid_channels)
        self.conv3 = nn.Conv2d(mid_channels, planes * self.expansion, 1, bias=False)

        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = F.relu(self.bn1(x), inplace=True)
        out = self.conv1(out)

        out = F.relu(self.bn2(out), inplace=True)
        out = self.conv2(out)

        out = F.relu(self.bn3(out), inplace=True)
        out = self.conv3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        return F.relu(out, inplace=True)


****Define ResNet-50 architecture****

In [None]:
class ResNet50D(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()
        self.inplanes = 64

        # ResNet-D Stem
        self.stem = nn.Sequential(
            nn.Conv2d(3, 32, 3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, 3, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 64, 3, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(3, stride=2, padding=1)
        )

        # Layers
        self.layer1 = self._make_layer(BottleneckD, 64, 3, stride=1)
        self.layer2 = self._make_layer(BottleneckD, 128, 4, stride=2)
        self.layer3 = self._make_layer(BottleneckD, 256, 6, stride=2)
        self.layer4 = self._make_layer(BottleneckD, 512, 3, stride=2)

        # Head
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * BottleneckD.expansion, num_classes)

        # Initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None

        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample_layers = []

            if stride != 1:
                downsample_layers.append(
                    nn.AvgPool2d(kernel_size=2, stride=stride, ceil_mode=True)
                )

            downsample_layers.extend([
                nn.Conv2d(self.inplanes, planes * block.expansion, 1, stride=1, bias=False),
                nn.BatchNorm2d(planes * block.expansion)
            ])

            downsample = nn.Sequential(*downsample_layers)

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion

        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.stem(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

In [None]:
model = ResNet50D(num_classes=2).to(device)

****Now we define the loss function and optimizer used for the model****

In [None]:
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.amp import GradScaler


# Define the loss function
criterion = nn.CrossEntropyLoss()

# Define the optimizer (in this case Adam)
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=0.05)

# Learning rate scheduler
scheduler = CosineAnnealingLR(optimizer, T_max=10)  # LR scheduler

# Mixed precision
scaler = GradScaler()

# IV) Evaluation

****After all those implementations and setups, we are ready to run the model and present the results****

In [None]:
from torch.cuda.amp import autocast
from tqdm import tqdm  # For progress bars

In [None]:
# --- Training Phase ---
def train(model, train_loader, criterion, optimizer, scaler, epoch):
    model.train()
    train_loss = 0.0
    correct = 0
    total = 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch}"):
        images, labels = images.to(device), labels.to(device)

        with autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)

        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        train_loss += loss.item()

    return train_loss / len(train_loader), 100. * correct / total

In [None]:
# --- Validation Phase ---
def validate(model, val_loader, criterion):
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc="Validating"):
            images, labels = images.to(device), labels.to(device)

            with autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)

            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            val_loss += loss.item()

    return val_loss / len(val_loader), 100. * correct / total

In [None]:
# Initialize lists to store training and validation metrics
train_loss_history = []
train_acc_history = []
val_loss_history = []
val_acc_history = []
best_acc = 0.0

# Initialize the loop
for epoch in range(16):
    train_loss, train_acc = train(model, train_loader, criterion, optimizer, scaler, epoch)
    val_loss, val_acc = validate(model, val_loader, criterion)
    scheduler.step()

    # Store the training and testing data
    train_loss_history.append(train_loss)
    train_acc_history.append(train_acc)
    val_loss_history.append(val_loss)
    val_acc_history.append(val_acc)

    # Save model with best performance
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), "best_resnet50d_randaug_tta.pth")

    print(f"Epoch {epoch}: Train Loss: {train_loss:.4f} | Val Acc: {val_acc:.2f}%")

****Now we make evaluations on the test data****

In [None]:
# Load the best model weights
model.load_state_dict(torch.load("best_resnet50d_randaug_tta.pth"))
model.to(device)  # Ensure model is on the correct device

# Run testing
if include_test:
    model.eval()  # Set the model to evaluation mode
    test_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc="Testing"):
            images, labels = images.to(device), labels.to(device)

            with autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)

            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            val_loss += loss.item()

    # Compute average loss and accuracy
    test_loss /= len(test_loader)
    test_acc = 100 * correct / total

    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.2f}%")

## IV.1) Visualize the prediction

In [None]:
def plot_prediction(data_loader, model, n_images, class_names):
    """
    Test the model on random predictions
    Args:
        data_loader: PyTorch DataLoader instance
        model: Trained PyTorch model
        n_images: Number of images to plot
        class_names: List of class names (e.g., ['Cat', 'Dog'])
    """
    model.eval()  # Set the model to evaluation mode
    images, labels = next(iter(data_loader))  # Fetch a batch of images and labels

    # Move images and labels to the appropriate device (e.g., GPU if available)
    device = next(model.parameters()).device
    images = images.to(device)
    labels = labels.to(device)

    # Get model predictions
    with torch.no_grad():
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)  # Get the predicted class indices

    # Convert tensors to numpy arrays for visualization
    images = images.cpu().numpy()
    labels = labels.cpu().numpy()
    predictions = predictions.cpu().numpy()

    # Plot the images with predictions
    plt.figure(figsize=(14, 15))
    for i in range(min(n_images, len(images))):  # Ensure we don't exceed the batch size
        plt.subplot(4, 3, i + 1)
        image = np.transpose(images[i], (1, 2, 0))  # Convert from (C, H, W) to (H, W, C)
        if images[i].shape[0] == 1:  # Grayscale image
            image = image.squeeze()
            plt.imshow(image, cmap='gray')
        else:  # RGB image
            plt.imshow(image)

        # Set title color based on prediction correctness
        if predictions[i] == labels[i]:
            title_obj = plt.title(f"True: {class_names[labels[i]]}\nPred: {class_names[predictions[i]]}", color='g')
        else:
            title_obj = plt.title(f"True: {class_names[labels[i]]}\nPred: {class_names[predictions[i]]}", color='r')
        plt.axis('off')

    plt.tight_layout()  # Adjust layout to prevent overlap
    plt.show()

In [None]:
plot_prediction(val_loader, model, n_images=9, class_names=['Cat','Dog'])

In [None]:
if include_test:
    plot_prediction(test_loader, model, n_images=9, class_names=['Cat','Dog'])

## IV.2) Visualize training process

In [None]:
# Create a DataFrame to store the training history
results = pd.DataFrame({
    'epoch': range(1, len(train_loss_history) + 1),
    'train_loss': train_loss_history,
    'train_acc': train_acc_history,
    'val_loss': val_loss_history,
    'val_acc': val_acc_history
})

# Display the last few rows of the DataFrame
print(results.tail())

In [None]:
# Plot training and validation loss
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(results['epoch'], results['train_loss'], label='Train Loss')
plt.plot(results['epoch'], results['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Plot training and validation accuracy
plt.subplot(1, 2, 2)
plt.plot(results['epoch'], results['train_acc'], label='Train Accuracy')
plt.plot(results['epoch'], results['val_acc'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.tight_layout()
plt.show()
