# Dogs vs Cats - Part 3

Classify whether images contain either a dog or a cat. Download the data from: https://www.kaggle.com/c/dogs-vs-cats/data

Using an ensemble we achieve an accuracy of 99.0889% on our test set.

This notebook assumes you have already run the steps from the Dogs vs Cats - Part 1 notebook where you downloaded the images and created the training, validation, and test directories.

The dataset contains 25,000 images of dogs and cats (12,500 from each class). We will create a new dataset containing 3 subsets, a training set with 10,000 samples of each class, a validation dataset with 1250 of each class and a test set with 1250 samples of each class.


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torch.nn.functional as F
import torchvision
import torchvision.models as models
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt

In [None]:
model_resnet18 = torch.hub.load('pytorch/vision', 'resnet18', pretrained=True)
model_alxnet = torch.hub.load('pytorch/vision', 'alexnet', pretrained=True)

Using cache found in C:\Users\henri/.cache\torch\hub\pytorch_vision_main
Using cache found in C:\Users\henri/.cache\torch\hub\pytorch_vision_main
Using cache found in C:\Users\henri/.cache\torch\hub\pytorch_vision_main


RuntimeError: Cannot find callable lenet in hubconf

In [None]:
# Freeze all params except the BatchNorm layers, as here they are trained to the
# mean and standard deviation of ImageNet and we may lose some signal
for name, param in model_resnet18.named_parameters():
    if("bn" not in name):
        param.requires_grad = False

for name, param in model_alxnet.named_parameters():
    if("bn" not in name):
        param.requires_grad = False

In [None]:
# Replace the classifier
num_classes = 2

model_resnet18.fc = nn.Sequential(nn.Linear(model_resnet18.fc.in_features,512),
                                  nn.ReLU(),
                                  nn.Dropout(),
                                  nn.Linear(512, num_classes))

model_alxnet.classifier[6] = nn.Linear(4096,num_classes)

In [None]:
def train(model, optimizer, loss_fn, train_loader, val_loader, epochs=5, device="cpu"):
    for epoch in range(epochs):
        training_loss = 0.0
        valid_loss = 0.0
        model.train()
        for batch in train_loader:
            optimizer.zero_grad()
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            loss = loss_fn(output, targets)
            loss.backward()
            optimizer.step()
            training_loss += loss.data.item() * inputs.size(0)
        training_loss /= len(train_loader.dataset)
        
        model.eval()
        num_correct = 0 
        num_examples = 0
        for batch in val_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            output = model(inputs)
            targets = targets.to(device)
            loss = loss_fn(output,targets) 
            valid_loss += loss.data.item() * inputs.size(0)
                        
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)
            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]
        valid_loss /= len(val_loader.dataset)

        print('Epoch: {}, Training Loss: {:.4f}, Validation Loss: {:.4f}, accuracy = {:.4f}'.format(epoch, training_loss,
        valid_loss, num_correct / num_examples))

In [None]:
batch_size=64
img_dimensions = 224

# Normalize to the ImageNet mean and standard deviation
# Could calculate it for the cats/dogs data set, but the ImageNet
# values give acceptable results here.
train_transform = transforms.Compose([
    transforms.RandomRotation(30),
                                transforms.RandomHorizontalFlip(),
                                transforms.RandomResizedCrop(224, scale=(0.96, 1.0), ratio=(0.95, 1.05)),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                            ])

# Create testing transform (no data augmentation)
test_transform = transforms.Compose([
    transforms.Resize([224, 224]),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
                            ])

train_data_path = "data/train/"
train_data = torchvision.datasets.ImageFolder(root=train_data_path,transform=train_transform)

validation_data_path = "data/validation/"
validation_data = torchvision.datasets.ImageFolder(root=validation_data_path,transform=test_transform)

test_data_path = "data/test/"
test_data = torchvision.datasets.ImageFolder(root=test_data_path,transform=test_transform)

num_workers = 6
train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)
validation_data_loader = torch.utils.data.DataLoader(validation_data, batch_size=batch_size, shuffle=False, num_workers=num_workers)
test_data_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=num_workers)


if torch.cuda.is_available():
    device = torch.device("cuda") 
else:
    device = torch.device("cpu")

In [None]:
import matplotlib.pyplot as plt
import torchvision

def show_transformed_images(data_loader, num_images=8):
    # Get a batch of images from the data loader
    images, labels = next(iter(data_loader))
    
    # Denormalize the images to view them
    mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)
    std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)
    images = images * std + mean  # Denormalize to original colors

    # Set up the figure to display images
    plt.figure(figsize=(12, 12))
    
    for i in range(num_images):
        img = images[i].permute(1, 2, 0)  # Change channel order for plotting
        plt.subplot(2, 4, i + 1)
        plt.imshow(img.clamp(0, 1))  # Clamp values to [0, 1] for display
        plt.axis('off')
    
    plt.show()

# Display a batch of images with transformations applied
show_transformed_images(train_data_loader)

KeyboardInterrupt: 

In [None]:
print(f'Num training images: {len(train_data_loader.dataset)}')
print(f'Num validation images: {len(validation_data_loader.dataset)}')
print(f'Num test images: {len(test_data_loader.dataset)}')

Num training images: 2000
Num validation images: 600
Num test images: 400


### Train and test the models

In [None]:
def test_model(model):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_data_loader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('correct: {:d}  total: {:d}'.format(correct, total))
    print('accuracy = {:f}'.format(correct / total))

In [None]:
import os
os.cpu_count()

24

In [None]:
model_resnet18.to(device)
optimizer = optim.Adam(model_resnet18.parameters(), lr=0.001)
train(model_resnet18, optimizer, torch.nn.CrossEntropyLoss(), train_data_loader, validation_data_loader, epochs=2, device=device)

Epoch: 0, Training Loss: 0.1056, Validation Loss: 0.0739, accuracy = 0.9733
Epoch: 1, Training Loss: 0.0797, Validation Loss: 0.0682, accuracy = 0.9750


In [None]:
test_model(model_resnet18)

correct: 392  total: 400
accuracy = 0.980000


In [None]:
torch.save(model_resnet18.state_dict(), "./model_resnet18.pth")

In [None]:
model_alxnet.to(device)
optimizer = optim.Adam(model_alxnet.parameters(), lr=0.001)
train(model_alxnet, optimizer, torch.nn.CrossEntropyLoss(), train_data_loader, validation_data_loader, epochs=2, device=device)

Epoch: 0, Training Loss: 0.1672, Validation Loss: 0.1414, accuracy = 0.9550
Epoch: 1, Training Loss: 0.1117, Validation Loss: 0.1222, accuracy = 0.9617
