# Assignment 3 : Multi-label Image Classification

In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
from torchvision import transforms
from sklearn.metrics import average_precision_score
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
from kaggle_submission import output_submission_csv
from classifier import Classifier
from voc_dataloader import VocDataset, VOC_CLASSES

%matplotlib inline
%load_ext autoreload
%autoreload 2
torch.cuda.empty_cache()

In this assignment, you train a classifier to do multi-label classificaton on the PASCAL VOC 2007 dataset. The dataset has 20 different class which can appear in any given image. Your classifier will predict whether each class appears in an image. This task is slightly different from exclusive multiclass classification like the ImageNet competition where only a single most appropriate class is predicted for an image.

## Reading Pascal Data

### Loading Training Data

In the following cell we will load the training data and also apply some transforms to the data. Feel free to apply more [transforms](https://pytorch.org/docs/stable/torchvision/transforms.html) for data augmentation which can lead to better performance. 

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_cached(0)/1024**3,1), 'GB')

Using device: cuda

Tesla P100-PCIE-16GB
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


In [3]:
# Transforms applied to the training data
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std= [0.229, 0.224, 0.225])

train_transform = transforms.Compose([
            transforms.RandomResizedCrop(227),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize
        ])


In [4]:
ds_train = VocDataset('VOCdevkit_2007/VOC2007/','train',train_transform)

### Loading Validation Data

We will load the test data for the PASCAL VOC 2007 dataset. Do __NOT__ add data augmentation transforms to validation data.

In [5]:
# Transforms applied to the testing data
test_transform = transforms.Compose([
            transforms.Resize(227),
            transforms.CenterCrop(227),
            transforms.ToTensor(),
            normalize,
        ])

In [6]:
ds_val = VocDataset('VOCdevkit_2007/VOC2007/','val',test_transform)

### Visualizing the Data

PASCAL VOC has bounding box annotations in addition to class labels. Use the following code to visualize some random examples and corresponding annotations from the train set. 

# Classification

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [8]:
device

device(type='cuda', index=0)

In [9]:
train_loader = torch.utils.data.DataLoader(dataset=ds_train,
                                               batch_size=48, 
                                               shuffle=True,
                                               num_workers=10)

In [10]:
val_loader = torch.utils.data.DataLoader(dataset=ds_val,
                                               batch_size=48, 
                                               shuffle=True,
                                               num_workers=10)

In [11]:
def train_classifier(train_loader, classifier, criterion, optimizer):
    classifier.train()
    loss_ = 0.0
    losses = []
    for i, (images, labels, _) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        logits = classifier(images)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        losses.append(loss)
    return torch.stack(losses).mean().item()

In [12]:
def test_classifier(test_loader, classifier, criterion, print_ind_classes=True):
    classifier.eval()
    losses = []
    with torch.no_grad():
        y_true = np.zeros((0,21))
        y_score = np.zeros((0,21))
        for i, (images, labels, _) in enumerate(test_loader):
            images, labels = images.to(device), labels.to(device)
            logits = classifier(images)
            y_true = np.concatenate((y_true, labels.cpu().numpy()), axis=0)
            y_score = np.concatenate((y_score, logits.cpu().numpy()), axis=0)
            loss = criterion(logits, labels)
            losses.append(loss)
        aps = np.empty(0)
        # ignore first class which is background
        for i in range(1, y_true.shape[1]):
            ap = average_precision_score(y_true[:, i], y_score[:, i])
            if print_ind_classes:
                print('-------  Class: {:<12}     AP: {:>8.4f}  -------'.format(VOC_CLASSES[i], ap))
            aps = np.append(aps,ap)
            mAP = np.mean(aps)
            test_loss = torch.mean(torch.stack(losses))
            print('mAP: {0:.4f}'.format(mAP))
            print('Avg loss: {}'.format(test_loss))

    return mAP, test_loss, aps

## Modifying the network 

The network you are given as is will allow you to reach around 0.15-0.2 mAP. To meet the benchmark for this assignment you will need to improve the network. There are a variety of different approaches you should try:

* Network architecture changes
    * Number of layers: try adding layers to make your network deeper
    * Batch normalization: adding batch norm between layers will likely give you a significant performance increase
    * Residual connections: as you increase the depth of your network, you will find that having residual connections like those in ResNet architectures will be helpful
* Optimizer: Instead of plain SGD, you may want to add a learning rate schedule, add momentum, or use one of the other optimizers you have learned about like Adam. Check the `torch.optim` package for other optimizers
* Data augmentation: You should use the `torchvision.transforms` module to try adding random resized crops and horizontal flips of the input data. Check `transforms.RandomResizedCrop` and `transforms.RandomHorizontalFlip` for this
* Epochs: Once you have found a generally good hyperparameter setting try training for more epochs
* Loss function: You might want to add weighting to the `MultiLabelSoftMarginLoss` for classes that are less well represented or experiment with a different loss function



In [13]:
classifier = Classifier().to(device)

In [14]:
criterion = nn.MultiLabelSoftMarginLoss()
optimizer = torch.optim.SGD(classifier.parameters(), lr=0.01, momentum=0.9)

In [15]:
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')

In [17]:
# Training the Classifier
NUM_EPOCHS = 100
TEST_FREQUENCY = 10

for epoch in range(1, NUM_EPOCHS+1):
    print("Starting epoch number " + str(epoch))
    train_loss = train_classifier(train_loader, classifier, criterion, optimizer)
    scheduler.step(train_loss)
    print("Loss for Training on Epoch " +str(epoch) + " is "+ str(train_loss))
    if(epoch%TEST_FREQUENCY==0):
        mAP_val, val_loss, _ = test_classifier(val_loader, classifier, criterion)
        print('Evaluating classifier')
        print("Mean Precision Score for Testing on Epoch " +str(epoch) + " is "+ str(mAP_val))
        

Starting epoch number 1
Loss for Training on Epoch 1 is 0.18432274460792542
Starting epoch number 2
Loss for Training on Epoch 2 is 0.18352176249027252
Starting epoch number 3
Loss for Training on Epoch 3 is 0.18159033358097076
Starting epoch number 4
Loss for Training on Epoch 4 is 0.17998440563678741
Starting epoch number 5
Loss for Training on Epoch 5 is 0.18265478312969208
Starting epoch number 6
Loss for Training on Epoch 6 is 0.1813393235206604
Starting epoch number 7
Loss for Training on Epoch 7 is 0.17857010662555695
Starting epoch number 8
Loss for Training on Epoch 8 is 0.1790577918291092
Starting epoch number 9
Loss for Training on Epoch 9 is 0.1769711822271347
Starting epoch number 10
Loss for Training on Epoch 10 is 0.176627054810524
-------  Class: aeroplane        AP:   0.5877  -------
mAP: 0.5877
Avg loss: 0.183701753616333
-------  Class: bicycle          AP:   0.3804  -------
mAP: 0.4840
Avg loss: 0.183701753616333
-------  Class: bird             AP:   0.3241  ------

Loss for Training on Epoch 31 is 0.1624022126197815
Starting epoch number 32
Loss for Training on Epoch 32 is 0.16307410597801208
Starting epoch number 33
Loss for Training on Epoch 33 is 0.16485032439231873
Starting epoch number 34
Loss for Training on Epoch 34 is 0.16538307070732117
Starting epoch number 35
Loss for Training on Epoch 35 is 0.1616285890340805
Starting epoch number 36
Loss for Training on Epoch 36 is 0.16052711009979248
Starting epoch number 37
Loss for Training on Epoch 37 is 0.15968124568462372
Starting epoch number 38
Loss for Training on Epoch 38 is 0.1586819738149643
Starting epoch number 39
Loss for Training on Epoch 39 is 0.15662553906440735
Starting epoch number 40
Loss for Training on Epoch 40 is 0.15795305371284485
-------  Class: aeroplane        AP:   0.6505  -------
mAP: 0.6505
Avg loss: 0.18059642612934113
-------  Class: bicycle          AP:   0.4540  -------
mAP: 0.5523
Avg loss: 0.18059642612934113
-------  Class: bird             AP:   0.2953  -------

Loss for Training on Epoch 61 is 0.14252504706382751
Starting epoch number 62
Loss for Training on Epoch 62 is 0.14560554921627045
Starting epoch number 63
Loss for Training on Epoch 63 is 0.14786005020141602
Starting epoch number 64
Loss for Training on Epoch 64 is 0.14562571048736572
Starting epoch number 65
Loss for Training on Epoch 65 is 0.14148268103599548
Starting epoch number 66
Loss for Training on Epoch 66 is 0.1424628496170044
Starting epoch number 67
Loss for Training on Epoch 67 is 0.14440396428108215
Starting epoch number 68
Loss for Training on Epoch 68 is 0.14283351600170135
Starting epoch number 69
Loss for Training on Epoch 69 is 0.14145396649837494
Starting epoch number 70
Loss for Training on Epoch 70 is 0.14216147363185883
-------  Class: aeroplane        AP:   0.6654  -------
mAP: 0.6654
Avg loss: 0.17586827278137207
-------  Class: bicycle          AP:   0.4300  -------
mAP: 0.5477
Avg loss: 0.17586827278137207
-------  Class: bird             AP:   0.3801  -----

Loss for Training on Epoch 91 is 0.12875685095787048
Starting epoch number 92
Loss for Training on Epoch 92 is 0.13049253821372986
Starting epoch number 93
Loss for Training on Epoch 93 is 0.12729693949222565
Starting epoch number 94
Loss for Training on Epoch 94 is 0.12914158403873444
Starting epoch number 95
Loss for Training on Epoch 95 is 0.12787339091300964
Starting epoch number 96
Loss for Training on Epoch 96 is 0.1277117282152176
Starting epoch number 97
Loss for Training on Epoch 97 is 0.126104936003685
Starting epoch number 98
Loss for Training on Epoch 98 is 0.12957783043384552
Starting epoch number 99
Loss for Training on Epoch 99 is 0.1290506273508072
Starting epoch number 100
Loss for Training on Epoch 100 is 0.12924206256866455
-------  Class: aeroplane        AP:   0.6688  -------
mAP: 0.6688
Avg loss: 0.18790727853775024
-------  Class: bicycle          AP:   0.4897  -------
mAP: 0.5793
Avg loss: 0.18790727853775024
-------  Class: bird             AP:   0.3713  ------

In [18]:
# Save the clssifier network
# Suggestion: you can save checkpoints of your network during training and reload them later
torch.save(classifier.state_dict(), './voc_classifier.pth')

# Evaluate on test set



In [20]:
ds_test = VocDataset('VOCdevkit_2007/VOC2007test/','test', test_transform)

test_loader = torch.utils.data.DataLoader(dataset=ds_test,
                                               batch_size=48, 
                                               shuffle=False,
                                               num_workers=10)

mAP_test, test_loss, test_aps = test_classifier(test_loader, classifier, criterion)

-------  Class: aeroplane        AP:   0.6838  -------
mAP: 0.6838
Avg loss: 0.18324331939220428
-------  Class: bicycle          AP:   0.4408  -------
mAP: 0.5623
Avg loss: 0.18324331939220428
-------  Class: bird             AP:   0.3505  -------
mAP: 0.4917
Avg loss: 0.18324331939220428
-------  Class: boat             AP:   0.4529  -------
mAP: 0.4820
Avg loss: 0.18324331939220428
-------  Class: bottle           AP:   0.1997  -------
mAP: 0.4255
Avg loss: 0.18324331939220428
-------  Class: bus              AP:   0.4028  -------
mAP: 0.4217
Avg loss: 0.18324331939220428
-------  Class: car              AP:   0.6740  -------
mAP: 0.4578
Avg loss: 0.18324331939220428
-------  Class: cat              AP:   0.4378  -------
mAP: 0.4553
Avg loss: 0.18324331939220428
-------  Class: chair            AP:   0.4717  -------
mAP: 0.4571
Avg loss: 0.18324331939220428
-------  Class: cow              AP:   0.2629  -------
mAP: 0.4377
Avg loss: 0.18324331939220428
-------  Class: diningtable   

In [21]:
output_submission_csv('my_solution.csv', test_aps)