# PuppyFinder: A Dog Image Classification Project

*Tyler Tan & Chase Reynders*

## Introduction

We are using the [Stanford Dogs Dataset](https://www.kaggle.com/datasets/jessicali9530/stanford-dogs-dataset?resource=download), downloaded from Kaggle.

Before running all chunks, **PLEASE FOLLOW THE SETUP SECTION TO LOAD THE DATASET**. Thank you!





## Setup

First, import the relevant packages:


In [61]:
from IPython.display import display, Image
import os
import random
import statistics

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR
import torch.optim as optim
from torch.utils.data import Dataset

import matplotlib.pyplot as plt
import numpy as np

import xml.etree.ElementTree as ET
from PIL import Image

In [20]:
# TODO: Ensure this variable is the stanford_dogs directory path.
DATASET_PATH = '/Users/chasereynders/Desktop/prac/stanford_dogs'

# try to cd to the specified path
try:
  os.environ['DATASET_PATH'] = DATASET_PATH
  os.chdir(os.environ['DATASET_PATH'])
  print('Directory "stanford_dogs" successfully "cd"-ed into. You may proceed!')
except:
  print('Invalid path :(')


Directory "stanford_dogs" successfully "cd"-ed into. You may proceed!


## Exploratory Data Analysis

### Dataset Summary Statistics:



In [22]:
image_dir = os.path.join(DATASET_PATH, 'images/Images')
files = os.listdir('images/Images')

print(files)

# Dictionary to store breed names and their corresponding image counts
breed_counts = {}

# Iterate through each file
for file in files:
    file_path = os.path.join(image_dir, file)
    if os.path.isdir(file_path):
        # Count the number of jpg files in the directory
        jpg_count = sum(1 for filename in os.listdir(file_path))
        # Store the breed name and its count in the dictionary
        breed_counts[file] = jpg_count

# Print the list of files and their corresponding image counts
for i, (breed, count) in enumerate(breed_counts.items(), start=1):
    print(f'{i}. {breed.split("-", 1)[-1].replace("_", " ")}: ({count} images)')
print()


# Some more summary stats
average = statistics.mean(breed_counts.values())
median = statistics.median(breed_counts.values())
std_dev = statistics.stdev(breed_counts.values())

print(f'{len(breed_counts)} total breeds')
print(f'{sum(breed_counts.values())} total images')
print("Average image count for each breed:", average)
print("Median image count for each breed:", median)
print("Image count standard deviation:", std_dev)

['n02097658-silky_terrier', 'n02092002-Scottish_deerhound', 'n02099849-Chesapeake_Bay_retriever', 'n02091244-Ibizan_hound', 'n02095314-wire-haired_fox_terrier', 'n02091831-Saluki', 'n02102318-cocker_spaniel', 'n02104365-schipperke', 'n02090622-borzoi', 'n02113023-Pembroke', 'n02105505-komondor', 'n02093256-Staffordshire_bullterrier', 'n02113799-standard_poodle', 'n02109961-Eskimo_dog', 'n02089973-English_foxhound', 'n02099601-golden_retriever', 'n02095889-Sealyham_terrier', 'n02085782-Japanese_spaniel', '.DS_Store', 'n02097047-miniature_schnauzer', 'n02110063-malamute', 'n02105162-malinois', 'n02086079-Pekinese', 'n02097130-giant_schnauzer', 'n02113978-Mexican_hairless', 'n02107142-Doberman', 'n02097209-standard_schnauzer', 'n02115913-dhole', 'n02106662-German_shepherd', 'n02106382-Bouvier_des_Flandres', 'n02110185-Siberian_husky', 'n02094258-Norwich_terrier', 'n02093991-Irish_terrier', 'n02094114-Norfolk_terrier', 'n02109525-Saint_Bernard', 'n02093754-Border_terrier', 'n02105251-briar

## Pre-Processing the Data & Random Sample

This code chunk normalizes each image and converts each one to a Tensor, a matrix-like datastructure.

Further, training, validation, and testing splits are established, an the datasets are parsed into data loaders.

In [122]:
# whippet 2349 misclassified?
# error saving image for n02105855-Shetland_sheepdog/n02105855_2933...I manually moved this image uncropped

PROCESSED_DATASET_PATH = '/Users/chasereynders/Desktop/prac/processed_stanford_dogs'

def count_objects_and_crop(dataset_path, xml_dir, xml_filename, output_directory):
    xml_file = os.path.join(dataset_path, f"annotations/Annotation/{xml_dir}/{xml_filename}")
    image_file = os.path.join(dataset_path, f"images/Images/{xml_dir}/{xml_filename}.jpg")

    # Parse XML file
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Get image size
    image_width = int(root.find("size/width").text)
    image_height = int(root.find("size/height").text)

    # Get objects and crop images
    object_count = 0
    for obj in root.findall("object"):
        object_count += 1

        # Get bounding box coordinates
        xmin = int(obj.find("bndbox/xmin").text)
        ymin = int(obj.find("bndbox/ymin").text)
        xmax = int(obj.find("bndbox/xmax").text)
        ymax = int(obj.find("bndbox/ymax").text)

        # Crop image
        cropped_image = Image.open(image_file).crop((xmin, ymin, xmax, ymax))

        # Create output directory if it doesn't exist
        output_subdirectory = os.path.join(output_directory, obj.find('name').text)
        if not os.path.exists(output_subdirectory):
            os.makedirs(output_subdirectory)

        # Save cropped image
        cropped_image_filename = os.path.join(output_subdirectory, f"{xml_filename}_{object_count}.jpg")
        try:
            cropped_image.save(cropped_image_filename)
        except:
            print(f'error saving image for {xml_dir}/{xml_filename}')

    return object_count

ANNOTATION_PATH = os.path.join(DATASET_PATH, "annotations/Annotation")
annotation_dirs = os.listdir(ANNOTATION_PATH)
for annotation in annotation_dirs:
    if annotation == '.DS_Store':
        continue
    BREED_PATH = os.path.join(ANNOTATION_PATH, annotation)
    breed_annotations = os.listdir(BREED_PATH)
    for breed_annotation in breed_annotations:
        count_objects_and_crop(dataset_path=DATASET_PATH, xml_dir=annotation, xml_filename=breed_annotation, output_directory=PROCESSED_DATASET_PATH)

# Ensure there are 120 classes as is expected
assert len(os.listdir(PROCESSED_DATASET_PATH)) == 120


error saving image for n02105855-Shetland_sheepdog/n02105855_2933


In [183]:
# Define your own transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize the images to a fixed size
    transforms.ToTensor(),           # Convert PIL Image to tensor
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize
])

# Load the dataset
full_dataset = torchvision.datasets.ImageFolder(root=PROCESSED_DATASET_PATH, transform=transform)

# Define the sizes of train, validation, and test sets
dataset_size = len(full_dataset)
train_size = int(0.8 * dataset_size)
val_size = int(0.1 * dataset_size)
test_size = dataset_size - train_size - val_size

# Split the dataset
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size, test_size])

# Define PyTorch data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=100, shuffle=True, num_workers=2)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=100, shuffle=False, num_workers=2)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=2)

print(test_loader)

# Visualize a sample image
sample_images, sample_labels = next(iter(train_loader))
sample_images = sample_images.numpy()
sample_labels = sample_labels.numpy()

# Denormalize the images
sample_images = sample_images * 0.5 + 0.5

# Define classes
classes = full_dataset.classes

# # Plot the images
# fig, axes = plt.subplots(1, len(sample_images), figsize=(12, 4))
# for idx, (image, label) in enumerate(zip(sample_images, sample_labels)):
#     axes[idx].imshow(np.transpose(image, (1, 2, 0)))
#     axes[idx].set_title(classes[label])
#     axes[idx].axis('off')
# plt.show()

<torch.utils.data.dataloader.DataLoader object at 0x169290740>


## Defining the CNN EVERYTHING PAST HERE IS IRRELEVANT FOR NOW

In [178]:
# Based on this: https://learn.microsoft.com/en-us/windows/ai/windows-ml/tutorials/pytorch-train-model#define-a-convolution-neural-network
# and this: https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=5, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(12)
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=12, kernel_size=5, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(12)
        self.pool = nn.MaxPool2d(2,2)
        self.conv4 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=5, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(24)
        self.conv5 = nn.Conv2d(in_channels=24, out_channels=24, kernel_size=5, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(24)
        self.fc1 = nn.Linear(24*58*58, 120) # not sure why this is 106...just printed prev shape to match

    def forward(self, input):
        output = F.relu(self.bn1(self.conv1(input)))      
        output = F.relu(self.bn2(self.conv2(output)))     
        output = self.pool(output)                        
        output = F.relu(self.bn4(self.conv4(output)))     
        output = F.relu(self.bn5(self.conv5(output)))   
        output = output.view(-1, 24*58*58)
        output = self.fc1(output)

        return output


## Training the Data

In [184]:
# Define the model, criterion, optimizer
net = Net()
net.train()  # Set the model to training mode
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=1.2, momentum=0.5)

# Define the learning rate scheduler
scheduler = StepLR(optimizer, step_size=50, gamma=0.1)

# Training loop with learning rate scheduling
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        print("Input shape:", inputs.shape)  # Debugging: Print input shape

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        print("Output shape:", outputs.shape)  # Debugging: Print output shape
        print("Output values:", outputs[0])     # Debugging: Print output values
        loss = criterion(outputs, labels)
        print("Loss:", loss.item())             # Debugging: Print loss
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 100:.5f}')
            running_loss = 0.0
    
    # Step the learning rate scheduler
    scheduler.step()

print('Finished Training')


Input shape: torch.Size([100, 3, 128, 128])
Output shape: torch.Size([100, 120])
Output values: tensor([ 2.0723e-01, -1.3523e-02, -6.9179e-01,  6.0131e-01,  7.0736e-02,
        -2.8204e-01, -1.2092e-02,  5.5465e-01, -1.5422e-01, -2.1334e-02,
        -6.1688e-01, -6.5450e-01, -1.9237e-01,  2.6872e-01, -8.7564e-02,
         4.8747e-01, -2.2140e-01,  7.8064e-01, -2.9377e-01, -8.0403e-01,
         4.6455e-02,  2.5214e-01,  3.7372e-01, -6.2058e-01,  1.9267e-01,
         3.0592e-01, -7.3406e-02,  1.2681e-01,  3.7211e-01,  1.6903e-01,
         1.9455e-01, -1.8075e-01, -1.3363e-01,  3.3185e-01,  4.8277e-02,
        -8.5681e-01,  4.6077e-01,  2.9338e-01, -4.1754e-01, -7.4134e-01,
        -4.6924e-01,  2.3188e-01, -3.3971e-01,  4.6345e-01,  2.7729e-01,
         1.1186e-01, -1.0702e-01,  2.4943e-01, -5.8264e-01,  8.6020e-01,
         3.5840e-02, -6.6693e-01,  3.4836e-01, -1.7840e-01,  5.4976e-01,
        -3.8613e-02,  2.8090e-01, -4.1441e-01,  3.5906e-01, -1.4291e-02,
         8.1734e-02,  5.7621

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x1251e4cc0>
Traceback (most recent call last):
  File "/Users/chasereynders/Desktop/prac/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
    self._shutdown_workers()
  File "/Users/chasereynders/Desktop/prac/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 1443, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
          ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/popen_fork.py", line 40, in wait
    if not wait([self.sentinel], timeout):
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/Cellar/python@3.12/3.12.2_1/Framew

KeyboardInterrupt: 

In [None]:
# transfer learning approach

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    # Create a temporary directory to save training checkpoints
    with TemporaryDirectory() as tempdir:
        best_model_params_path = os.path.join(tempdir, 'best_model_params.pt')

        torch.save(model.state_dict(), best_model_params_path)
        best_acc = 0.0

        for epoch in range(num_epochs):
            print(f'Epoch {epoch}/{num_epochs - 1}')
            print('-' * 10)

            # Each epoch has a training and validation phase
            for phase in ['train', 'val']:
                if phase == 'train':
                    model.train()  # Set model to training mode
                else:
                    model.eval()   # Set model to evaluate mode

                running_loss = 0.0
                running_corrects = 0

                # Iterate over data.
                for inputs, labels in dataloaders[phase]:
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    # zero the parameter gradients
                    optimizer.zero_grad()

                    # forward
                    # track history if only in train
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                        # backward + optimize only if in training phase
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()

                    # statistics
                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)
                if phase == 'train':
                    scheduler.step()

                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects.double() / dataset_sizes[phase]

                print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

                # deep copy the model
                if phase == 'val' and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    torch.save(model.state_dict(), best_model_params_path)

            print()

        time_elapsed = time.time() - since
        print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
        print(f'Best val Acc: {best_acc:4f}')

        # load best model weights
        model.load_state_dict(torch.load(best_model_params_path))
    return model

In [180]:
# Save the trained model
torch.save(net.state_dict(), os.path.join(PROCESSED_DATASET_PATH, 'model.pth'))
print("Model saved successfully.")

# Optionally, you can load the saved model later
# loaded_model = Net()
# loaded_model.load_state_dict(torch.load('model.pth'))
# loaded_model.eval()  # Set the model to evaluation mode

Model saved successfully.


In [181]:
# Define lists to store predictions and ground truth labels
all_predictions = []
all_labels = []

# Set the model to evaluation mode
net.eval()
net.load_state_dict(torch.load(os.path.join(PROCESSED_DATASET_PATH, 'model.pth')))


# Disable gradient computation for inference
with torch.no_grad():
    # Iterate over the test data
    for inputs, labels in test_loader:
        # Forward pass
        outputs = net(inputs)
        # Get the predicted class for each sample
        _, predicted = torch.max(outputs, 1)
        print(f'predicted class: {predicted}')
        # Append the predictions and ground truth labels to the lists
        all_predictions.extend(predicted.tolist())
        all_labels.extend(labels.tolist())

# Calculate accuracy
correct = sum(1 for pred, label in zip(all_predictions, all_labels) if pred == label)
total = len(all_predictions)
accuracy = correct / total

print(f'Test Accuracy: {accuracy * 100:.2f}%')


predicted class: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0])
predicted class: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0])
predicted class: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0

KeyboardInterrupt: 