# CNN Image Classification



## Setup

In [1]:
import os
from tqdm import tqdm
import time

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter




In [2]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [3]:
writer = SummaryWriter()

In [4]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 64

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(3, 8, 3),
            nn.ReLU(),
            nn.Conv2d(8, 16, 3),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(16, 32, 3),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 5 * 5, 4096),
            nn.Linear(4096, 1000),
            nn.ReLU(),
            nn.Linear(1000, 10),
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x


In [6]:
num_epochs = 10
savepath = "./output"
os.makedirs(savepath, exist_ok=True)

In [7]:
def train(num_epochs, device, model, optimizer, criterion, trainloader, savepath):
    times = []
    for epoch in range(num_epochs): 
        s_time = time.time()
        running_loss = 0.0
        trainbar = tqdm(enumerate(trainloader), total=len(trainloader), desc=f"[TRAIN] Epoch {epoch+1}/{num_epochs}")
        for i, data in trainbar:
            inputs, labels = data

            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            acc = torch.mean((outputs.argmax(1) == labels).float())            

            writer.add_scalars(f'{device}/train', {'loss': loss, 'acc': acc}, epoch * len(trainloader) + i)
            loss.backward()
            optimizer.step()


            running_loss += loss.item()
            trainbar.set_postfix(loss=running_loss/(i+1), acc=acc.item())
        times.append(time.time()-s_time)
        print(f"{(epoch+1)/num_epochs*100:.2f}% - Loss: {running_loss/len(trainloader):.4f}", end="\r", flush=True)

    print(f"Training took {sum(times):.2f}s in total ({sum(times)/num_epochs:.2f}s per epoch)")

    torch.save(model.state_dict(), os.path.join(savepath, f"ckpt_{num_epochs}_{device}.pt"))
    return times

In [8]:
model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

device = "cpu"
model.to(device)
cpu_times = train(num_epochs, device, model, optimizer, criterion, trainloader, savepath) #399.06s - 39.91s per epoch

[TRAIN] Epoch 1/10: 100%|██████████| 782/782 [00:39<00:00, 19.88it/s, acc=0.25, loss=2.3]  

10.00% - Loss: 2.3017


[TRAIN] Epoch 2/10: 100%|██████████| 782/782 [00:38<00:00, 20.09it/s, acc=0.312, loss=2.28] 

20.00% - Loss: 2.2832


[TRAIN] Epoch 3/10: 100%|██████████| 782/782 [00:38<00:00, 20.16it/s, acc=0.438, loss=2.05] 

30.00% - Loss: 2.0488


[TRAIN] Epoch 4/10: 100%|██████████| 782/782 [00:38<00:00, 20.23it/s, acc=0.375, loss=1.89]

40.00% - Loss: 1.8862


[TRAIN] Epoch 5/10: 100%|██████████| 782/782 [00:38<00:00, 20.24it/s, acc=0.375, loss=1.75]

50.00% - Loss: 1.7521


[TRAIN] Epoch 6/10: 100%|██████████| 782/782 [00:38<00:00, 20.12it/s, acc=0.375, loss=1.62]

60.00% - Loss: 1.6156


[TRAIN] Epoch 7/10: 100%|██████████| 782/782 [00:38<00:00, 20.11it/s, acc=0.312, loss=1.49]

70.00% - Loss: 1.4918


[TRAIN] Epoch 8/10: 100%|██████████| 782/782 [00:39<00:00, 19.59it/s, acc=0.5, loss=1.4]   

80.00% - Loss: 1.3964


[TRAIN] Epoch 9/10: 100%|██████████| 782/782 [00:39<00:00, 19.56it/s, acc=0.5, loss=1.31]  

90.00% - Loss: 1.3143


[TRAIN] Epoch 10/10: 100%|██████████| 782/782 [00:39<00:00, 19.61it/s, acc=0.688, loss=1.23]

100.00% - Loss: 1.2343




Training took 391.89s in total (39.19s per epoch)


In [9]:
print(torch.cuda.is_available())
model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

device="cuda"
model.to(device)
gpu_times = train(num_epochs, device, model, optimizer, criterion, trainloader, savepath) # 39.91s - 3.99s per epoch

True


[TRAIN] Epoch 1/10:   2%|▏         | 13/782 [00:00<00:55, 13.80it/s, acc=0.0781, loss=2.3]


KeyboardInterrupt: 

## Testing

In [None]:
import os
from typing import List, Tuple

K = 3 # Top-K prediction
os.makedirs(savepath, exist_ok=True)
metrics = {classname: {'top1_pred': 0, 
                    'topk_pred': 0, 
                    'total_pred': 0} 
                    for classname in classes}

wrong_samples = list[Tuple]()
# Load model
model = CNN()
model.load_state_dict(torch.load(os.path.join(savepath, f"ckpt_{num_epochs}_{device}.pt")))
model.eval().to(device)
# again no gradients needed
with torch.no_grad():
    for idx, data in tqdm(enumerate(testloader), total=len(testloader), desc="[TEST]"):
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predictions = torch.topk(outputs, k=K, dim=1)  # Get top-K prediction

        total = labels.size(0)
        top1 = torch.sum(labels.eq(predictions[:, 0])).item()        
        topk = torch.any(labels.unsqueeze(1).eq(predictions), dim=1).sum().item()

        metrics[classes[labels[0]]]['top1_pred'] += top1
        metrics[classes[labels[0]]]['topk_pred'] += topk
        metrics[classes[labels[0]]]['total_pred'] += total
        
        for label, prediction in zip(labels, predictions):
            if label not in prediction:
                wrong_samples.append((images[label], label, prediction))
        
        
        


[TEST]: 100%|██████████| 157/157 [00:02<00:00, 62.94it/s]


In [None]:
# print overall accuracy
overall_top1 = sum([metrics[classname]['top1_pred'] for classname in classes]) / sum([metrics[classname]['total_pred'] for classname in classes])
print(f"Overall Top-1 Accuracy: {overall_top1*100:.2f}%")

overall_topk = sum([metrics[classname]['topk_pred'] for classname in classes]) / sum([metrics[classname]['total_pred'] for classname in classes])
print(f"Overall Top-{K} Accuracy: {overall_topk*100:.2f}%")

# print accuracy for each class
for classname in classes:
    top1 = metrics[classname]['top1_pred'] / metrics[classname]['total_pred']
    topk = metrics[classname]['topk_pred'] / metrics[classname]['total_pred']
    print(f"Class {classname} - Top-1 Accuracy: {top1*100:.2f}% - Top-{K} Accuracy: {topk*100:.2f}%")

Overall Top-1 Accuracy: 57.27%
Overall Top-3 Accuracy: 86.01%
Class plane - Top-1 Accuracy: 58.52% - Top-3 Accuracy: 86.22%
Class car - Top-1 Accuracy: 58.59% - Top-3 Accuracy: 86.27%
Class bird - Top-1 Accuracy: 58.46% - Top-3 Accuracy: 85.29%
Class cat - Top-1 Accuracy: 55.97% - Top-3 Accuracy: 85.85%
Class deer - Top-1 Accuracy: 60.38% - Top-3 Accuracy: 87.83%
Class dog - Top-1 Accuracy: 55.51% - Top-3 Accuracy: 85.94%
Class frog - Top-1 Accuracy: 60.47% - Top-3 Accuracy: 87.50%
Class horse - Top-1 Accuracy: 54.06% - Top-3 Accuracy: 84.98%
Class ship - Top-1 Accuracy: 55.26% - Top-3 Accuracy: 84.54%
Class truck - Top-1 Accuracy: 57.61% - Top-3 Accuracy: 86.55%


In [None]:
# Wrong predictions
import random
for wpred in random.sample(wrong_samples, 5):
    writer.add_image(f"Predicted: {classes[wpred[2][0]]} - Actual: {classes[wpred[1]]}", wpred[0], dataformats="CHW")

In [None]:
from tensorboard import notebook

notebook.list()

No known TensorBoard instances running.


In [None]:
notebook.display(port=6006, height=1000)

In [None]:
writer.close()