# CNN Image Classification



## Setup

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter
# .

In [2]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [3]:
writer = SummaryWriter()

In [4]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 64

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=0)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(3, 8, 3),
            nn.ReLU(),
            nn.Conv2d(8, 16, 3),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(16, 32, 3),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 5 * 5, 4096),
            nn.Linear(4096, 1000),
            nn.ReLU(),
            nn.Linear(1000, 10),
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x

        



In [6]:
import torch.optim as optim
from tqdm.notebook import tqdm
import time

In [7]:
num_epochs = 10
savepath = "./output"

In [8]:
model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

device = "cpu"
model.to(device)

times = []
for epoch in range(num_epochs): 
    s_time = time.time()
    running_loss = 0.0
    trainbar = tqdm(enumerate(trainloader,0), total=len(trainloader), desc=f"[TRAIN] Epoch {epoch+1}/{num_epochs}")
    for i, data in trainbar:
        inputs, labels = data

        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        writer.add_scalar('Loss/train', loss, epoch * len(trainloader) + i)
        loss.backward()
        optimizer.step()


        running_loss += loss.item()
        trainbar.set_postfix(loss=running_loss/(i+1))
    times.append(time.time()-s_time)

print(f"Training took {sum(times)}s in total ({sum(times)/num_epochs}s per epoch)")

[TRAIN] Epoch 1/10:   0%|          | 0/782 [00:00<?, ?it/s]

[TRAIN] Epoch 2/10:   0%|          | 0/782 [00:00<?, ?it/s]

[TRAIN] Epoch 3/10:   0%|          | 0/782 [00:00<?, ?it/s]

[TRAIN] Epoch 4/10:   0%|          | 0/782 [00:00<?, ?it/s]

[TRAIN] Epoch 5/10:   0%|          | 0/782 [00:00<?, ?it/s]

[TRAIN] Epoch 6/10:   0%|          | 0/782 [00:00<?, ?it/s]

[TRAIN] Epoch 7/10:   0%|          | 0/782 [00:00<?, ?it/s]

[TRAIN] Epoch 8/10:   0%|          | 0/782 [00:00<?, ?it/s]

[TRAIN] Epoch 9/10:   0%|          | 0/782 [00:00<?, ?it/s]

[TRAIN] Epoch 10/10:   0%|          | 0/782 [00:00<?, ?it/s]

Training took 504.0020754337311s in total (50.40020754337311s per epoch)


In [13]:
model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

device="cuda"
model.to(device)

times = []
for epoch in range(num_epochs): 
    s_time = time.time()
    running_loss = 0.0
    trainbar = tqdm(enumerate(trainloader), total=len(trainloader), desc=f"[TRAIN] Epoch {epoch+1}/{num_epochs}")
    for i, data in trainbar:
        inputs, labels = data

        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        writer.add_scalar('Loss/train', loss, epoch * len(trainloader) + i)
        f1 = torch.mean((outputs.argmax(1) == labels).float())
        writer.add_scalar('F1/train', f1, epoch * len(trainloader) + i)
        loss.backward()
        optimizer.step()


        running_loss += loss.item()
        trainbar.set_postfix(loss=running_loss/(i+1), f1=f1.item())
    times.append(time.time()-s_time)

print(f"Training took {sum(times):.2f}s in total ({sum(times)/num_epochs:.2f}s per epoch)")

torch.save(model.state_dict(), savepath)

[TRAIN] Epoch 1/10:   0%|          | 0/782 [00:00<?, ?it/s]

[TRAIN] Epoch 2/10:   0%|          | 0/782 [00:00<?, ?it/s]

## Testing

In [12]:
import os


os.makedirs(savepath, exist_ok=True)
correct_pred = {classname: 0 for classname in classes}
top3_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}
wrong_pred = {classname: [] for classname in classes}
# Load model
model = CNN()
model.load_state_dict(torch.load(savepath))
model.eval().to(device)
# again no gradients needed
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            if label in torch.topk(outputs, 3).indices:
                top3_pred[classes[label]] += 1
            
            if label not in torch.topk(outputs, 3).indices:
                wrong_pred[classes[label]].append(classes[prediction])
            total_pred[classes[label]] += 1


PermissionError: [Errno 13] Permission denied: './output'

In [None]:
# print accuracy for each class
overall_accuracy = 100 * sum(correct_pred.values()) / sum(total_pred.values())
print(f'Accuracy overall: {overall_accuracy:.1f} %\n')
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

In [None]:
# print top3 accuracy for each class
overall_accuracy = 100 * sum(top3_pred.values()) / sum(total_pred.values())
print(f'Accuracy overall: {overall_accuracy:.1f} %\n')
for classname, correct_count in top3_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

In [None]:
# Wrong predictions
import random


print("\nWrong predictions:")
for classname, wrongs in wrong_pred.items():
    print(f"Class {classname}: {len(wrongs)}")
    print(wrongs[:5])
    print()

with writer.as_default():
    for i, classname, wrongs in enumerate(random.sample(list(wrong_pred.items()), 5)):
        writer.add_image(f"Wrong/{classname}", wrongs[0], i)

In [None]:
from tensorboard import notebook

notebook.list()

In [None]:
notebook.display(port=6006, height=1000)

In [None]:
writer.close()