Dog vs. Cat Image Classifier. 

Methods:-
1. Fully Connected Neural Network
2. CNN
3. Transfer Learning

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import copy
import torch
import torch.nn as nn
from torchvision.io import read_image
from torchvision import datasets, models, transforms

%matplotlib inline

In [2]:
# Mount the drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
# Prepare dataset
dir1 = r'/content/drive/MyDrive/Projects (Self)/Dog vs. Cat/data/train'
dir2 = r'/content/drive/MyDrive/Projects (Self)/Dog vs. Cat/data/val'
dir3 = r'/content/drive/MyDrive/Projects (Self)/Dog vs. Cat/data/test'

transform = transforms.Compose([transforms.Resize([96, 96]), transforms.ToTensor(), 
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train = datasets.ImageFolder(root=dir1, transform=transform) # 1600
val = datasets.ImageFolder(root=dir2, transform=transform) # 300
test = datasets.ImageFolder(root=dir3, transform=transform) # 100

In [9]:
# Create dataloaders
train_loader = torch.utils.data.DataLoader(train, batch_size=32, shuffle=True) # 50 batches
val_loader = torch.utils.data.DataLoader(train, batch_size=10, shuffle=True) # 30 batches
test_loader = torch.utils.data.DataLoader(train, batch_size=100, shuffle=True) # 1 batch

## Fully Connected Neural Network

In [15]:
# Create a sequential model (3 hidden layers)
model = nn.Sequential(nn.Linear(3 * 96 * 96, 24),
                      nn.ReLU(),
                      nn.Linear(24, 12),
                      nn.ReLU(),
                      nn.Linear(12, 6),
                      nn.ReLU(),
                      nn.Linear(6, 2),
                      nn.Softmax(dim=1))

# Define loss and optimizer
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.05)

In [17]:
# Train the model on the data 
best_val_loss = float('inf')
best_model_wts = copy.deepcopy(model.state_dict())
num_epoch = 15
for i in range(num_epoch):
    total_train_loss, total_val_loss, train_loss, val_loss = 0, 0, 0, 0
    for train_images, train_labels in train_loader:
        # Flatten Images
        train_images = train_images.view(train_images.shape[0], -1)
        train_pred_prob = model(train_images)     
        train_loss = loss(train_pred_prob, train_labels)
        optimizer.zero_grad()
        train_loss.backward(retain_graph=True)
        optimizer.step()
        total_train_loss += train_loss
    total_train_loss /= len(train)

    for val_images, val_labels in val_loader:
        # Flatten Images
        val_images = val_images.view(val_images.shape[0], -1)
        val_pred_prob = model(val_images)
        val_loss = loss(val_pred_prob, val_labels)
        total_val_loss += val_loss
    total_val_loss /= len(val)
    
    print("The value of train loss at epoch {} is: {}".format(i+1, total_train_loss))
    print("The value of val loss at epoch {} is: {}".format(i+1, total_val_loss))
    print()

    if total_val_loss < best_val_loss:
        best_val_loss = total_val_loss
        best_model_wts = copy.deepcopy(model.state_dict())

print("The best val loss achieved is: {}".format(best_val_loss))

The value of train loss at epoch 1 is: 0.020256146788597107
The value of val loss at epoch 1 is: 0.3331693410873413

The value of train loss at epoch 2 is: 0.01960798352956772
The value of val loss at epoch 2 is: 0.32544469833374023

The value of train loss at epoch 3 is: 0.01875516027212143
The value of val loss at epoch 3 is: 0.3159807622432709

The value of train loss at epoch 4 is: 0.018143992871046066
The value of val loss at epoch 4 is: 0.2914224863052368

The value of train loss at epoch 5 is: 0.017607230693101883
The value of val loss at epoch 5 is: 0.2812132239341736

The value of train loss at epoch 6 is: 0.016959115862846375
The value of val loss at epoch 6 is: 0.2601413130760193

The value of train loss at epoch 7 is: 0.016231916844844818
The value of val loss at epoch 7 is: 0.2573521137237549

The value of train loss at epoch 8 is: 0.01576978527009487
The value of val loss at epoch 8 is: 0.2494174689054489

The value of train loss at epoch 9 is: 0.01531133335083723
The val

In [18]:
model.load_state_dict(best_model_wts)
# Make predictions on test data
for test_images, test_labels in test_loader:
    test_images = test_images.view(test_images.shape[0], -1)
    test_pred_prob = model(test_images)
    test_y_pred = test_pred_prob.argmax(1)
    test_acc = ((test_labels == test_y_pred)*1.0).mean() * 100 # Measure accuracy on test data
print("The accuracy achieved on the test data is: {}%".format(test_acc))

The accuracy achieved on the test data is: 92.0%


## CNN

In [32]:
# Create a sequential model (3 hidden layers)
cnn = nn.Sequential(nn.Conv2d(3, 18, 4, 2, padding='valid'),  # out_channels=no. of filters, kernel_size=filter size
                    nn.ReLU(),
                    nn.Conv2d(18, 36, 4, 2, padding='valid'),
                    nn.MaxPool2d(4))

linear = nn.Sequential(nn.Linear(900, 24),   # changed from 250 to 900, check why.
                       nn.ReLU(),
                       nn.Linear(24, 2),
                       nn.Softmax(dim=1))

# Define loss and optimizer
loss = nn.CrossEntropyLoss()
optimizer_cnn = torch.optim.SGD(cnn.parameters(), lr=0.18)
optimizer_linear = torch.optim.SGD(linear.parameters(), lr=0.18)

In [33]:
# Train the model on the data 
best_val_loss = float('inf')
best_cnn_wts = copy.deepcopy(cnn.state_dict())
best_linear_wts = copy.deepcopy(linear.state_dict())
num_epoch = 15
for i in range(num_epoch):
    total_train_loss, total_val_loss, train_loss, val_loss = 0, 0, 0, 0
    for train_images, train_labels in train_loader:
        out_cnn = cnn(train_images)    
        out_cnn = out_cnn.reshape(out_cnn.shape[0], -1) 
        train_pred_prob = linear(out_cnn)
        train_loss = loss(train_pred_prob, train_labels)
        optimizer_cnn.zero_grad()
        optimizer_linear.zero_grad()
        train_loss.backward(retain_graph=True)
        optimizer_cnn.step()
        optimizer_linear.step()
        total_train_loss += train_loss
    total_train_loss /= len(train)

    for val_images, val_labels in val_loader:
        out_linear = cnn(val_images)
        out_linear = out_linear.reshape(out_linear.shape[0], -1)
        val_pred_prob = linear(out_linear)
        val_loss = loss(val_pred_prob, val_labels)
        total_val_loss += val_loss
    total_val_loss /= len(val)
   
    print("The value of train loss at epoch {} is: {}".format(i+1, total_train_loss))
    print("The value of val loss at epoch {} is: {}".format(i+1, total_val_loss))
    print()

    if total_val_loss < best_val_loss:
        best_val_loss = total_val_loss
        best_cnn_wts = copy.deepcopy(cnn.state_dict())
        best_linear_wts = copy.deepcopy(linear.state_dict())

print("The best val loss achieved is: {}".format(best_val_loss))

The value of train loss at epoch 1 is: 0.02166030928492546
The value of val loss at epoch 1 is: 0.36872220039367676

The value of train loss at epoch 2 is: 0.02158530056476593
The value of val loss at epoch 2 is: 0.36632752418518066

The value of train loss at epoch 3 is: 0.021323544904589653
The value of val loss at epoch 3 is: 0.35675695538520813

The value of train loss at epoch 4 is: 0.020864540711045265
The value of val loss at epoch 4 is: 0.3440302908420563

The value of train loss at epoch 5 is: 0.020128043368458748
The value of val loss at epoch 5 is: 0.32871758937835693

The value of train loss at epoch 6 is: 0.0194637980312109
The value of val loss at epoch 6 is: 0.3676465153694153

The value of train loss at epoch 7 is: 0.01931377686560154
The value of val loss at epoch 7 is: 0.3305133879184723

The value of train loss at epoch 8 is: 0.018972836434841156
The value of val loss at epoch 8 is: 0.3112010359764099

The value of train loss at epoch 9 is: 0.018446894362568855
The v

In [34]:
cnn.load_state_dict(best_cnn_wts)
linear.load_state_dict(best_linear_wts)
# Make predictions on test data
for test_images, test_labels in test_loader:
    out = cnn(test_images)
    out = out.reshape(out.shape[0], -1)
    test_pred_prob = linear(out)
    test_y_pred = test_pred_prob.argmax(1)
    test_acc = ((test_labels == test_y_pred)*1.0).mean() * 100 # Measure accuracy on test data
print("The accuracy achieved on the test data is: {}%".format(test_acc))

The accuracy achieved on the test data is: 84.0%


## Transfer Learning

In [35]:
dir1 = r'/content/drive/MyDrive/Projects (Self)/Dog vs. Cat/data/train'
dir2 = r'/content/drive/MyDrive/Projects (Self)/Dog vs. Cat/data/val'
dir3 = r'/content/drive/MyDrive/Projects (Self)/Dog vs. Cat/data/test'

# Images ought to be normalized as per the requirements of the pre-trained model
transform = transforms.Compose([transforms.Resize([224, 224]), transforms.ToTensor(), 
                                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

train = datasets.ImageFolder(root=dir1, transform=transform) # 1600
val = datasets.ImageFolder(root=dir2, transform=transform) # 300
test = datasets.ImageFolder(root=dir3, transform=transform) # 100

# Create dataloaders
train_loader = torch.utils.data.DataLoader(train, batch_size=32, shuffle=True) # 50 batches
val_loader = torch.utils.data.DataLoader(train, batch_size=10, shuffle=True) # 30 batches
test_loader = torch.utils.data.DataLoader(train, batch_size=100, shuffle=True) # 1 batch

# Create model
resnet_18 = models.resnet18(pretrained=True)
# Set gradient equal to zero for earlier layers such that only final layer parameters are learned (Feature Extraction)
for param in resnet_18.parameters():
    param.requires_grad = False
resnet_18.fc = nn.Linear(512, 2)

# Define loss and optimizer
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(resnet_18.parameters(), lr=0.1)
softmax = nn.Softmax(dim=1)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


HBox(children=(FloatProgress(value=0.0, max=46830571.0), HTML(value='')))




In [36]:
# Perform feature extraction 
resnet_18.train()
best_val_loss = float('inf')
best_model_wts = copy.deepcopy(resnet_18.state_dict())
num_epoch = 5
for i in range(num_epoch):
    total_train_loss, total_val_loss, train_loss, val_loss = 0, 0, 0, 0
    for train_images, train_labels in train_loader:
        train_pred_prob = resnet_18(train_images)     
        train_loss = loss(train_pred_prob, train_labels)
        optimizer.zero_grad()
        train_loss.backward(retain_graph=True)
        optimizer.step()
        total_train_loss += train_loss
    total_train_loss /= len(train)

    for val_images, val_labels in val_loader:
        val_pred_prob = resnet_18(val_images)
        val_loss = loss(val_pred_prob, val_labels)
        total_val_loss += val_loss
    total_val_loss /= len(val)
            
    print("The value of train loss at epoch {} is: {}".format(i+1, total_train_loss))
    print("The value of val loss at epoch {} is: {}".format(i+1, total_val_loss))
    print()

    if total_val_loss < best_val_loss:
        best_val_loss = total_val_loss
        best_model_wts = copy.deepcopy(resnet_18.state_dict())

print("The best val loss achieved is: {}".format(best_val_loss))

The value of train loss at epoch 1 is: 0.030910074710845947
The value of val loss at epoch 1 is: 0.15487460792064667

The value of train loss at epoch 2 is: 0.0065624606795609
The value of val loss at epoch 2 is: 0.26391637325286865

The value of train loss at epoch 3 is: 0.005241422448307276
The value of val loss at epoch 3 is: 0.19746126234531403

The value of train loss at epoch 4 is: 0.006290162913501263
The value of val loss at epoch 4 is: 0.13073702156543732

The value of train loss at epoch 5 is: 0.004361941013485193
The value of val loss at epoch 5 is: 0.2406197339296341

The best val loss achieved is: 0.13073702156543732


In [37]:
resnet_18.load_state_dict(best_model_wts)
resnet_18.eval()
# Make predictions on test data
for test_images, test_labels in test_loader:
    test_pred_prob = softmax.forward(resnet_18(test_images))
    test_y_pred = test_pred_prob.argmax(1)
test_acc = ((test_labels == test_y_pred)*1.0).mean() * 100 # Measure accuracy on test data
print("The accuracy achieved on the test data is: {}%".format(test_acc))

The accuracy achieved on the test data is: 98.0%
