In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as T
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# Device config
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
# Hyper-parameters
num_epochs = 4
batch_size = 4
learning_rate = 0.001

In [4]:
# Dataset has PILImage images of range [0,1]
# We transform them to Tensors of normalized range [-1, 1]
transform = T.Compose(
    [
        T.ToTensor(),
        T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ]
)

In [5]:
train_dataset = torchvision.datasets.CIFAR10(
    root='./data',
    train=True,
    download=True,
    transform=transform
)

test_dataset = torchvision.datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=transform
)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

0.0%

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data\cifar-10-python.tar.gz


100.0%


Extracting ./data\cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [6]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [9]:
# Implement ConvNet (aka CNN)
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        # [VI] https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
        self.conv1 = nn.Conv2d(3, 6, 5) # channel size, output size, kernel size [5-by-5] [PS: We wont customize padding and strides here - just the default]
        self.pool = nn.MaxPool2d(2, 2) # kerne size, stride
        self.conv2 = nn.Conv2d(6, 16, 5) # input size must be the same as the last output size (from self.conv1)
        # Now let's add the fully-connected layers
        self.fc1 = nn.Linear(16*5*5, 120) # input size [See video starting around 13:00 - 17:00 to see why], output size (You can try any value)
        self.fc2 = nn.Linear(120, 84) # output size can also be any value
        self.fc3 = nn.Linear(84, 10) # output must be 10 since we have 10 different classes 

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x))) # first conv layer
        x = self.pool(F.relu(self.conv2(x))) # second conv layer
        # Now pass it to the first fully-connected layer
        x = x.view(-1, 16*5*5) # flatten it
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x) # no activation function needed (sofmtax will already be applied 'nn.CrossEntropyLoss')
        return x

In [10]:
model = ConvNet().to(device)

criterion = nn.CrossEntropyLoss()
optmizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [11]:
n_total_steps = len(train_loader)

# Training
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # origin shape: [4, 3, 32, 32] = 4, 3, 1024  (4 images in batch, 3 channels [RGB], 2D image)
        # input_layer: 3 input channels, 6 output channels, 5 kernel size
        images = images.to(device)
        labels = labels.to(device)

        # forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optmizer.zero_grad()
        loss.backward()
        optmizer.step()

        if (i+1) % 2000 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

print('Finished training') # PS: this will take a few minutes

Epoch [1/4], Step [2000/12500], Loss: 2.2857
Epoch [1/4], Step [4000/12500], Loss: 2.3171
Epoch [1/4], Step [6000/12500], Loss: 2.3209
Epoch [1/4], Step [8000/12500], Loss: 2.2898
Epoch [1/4], Step [10000/12500], Loss: 2.3501
Epoch [1/4], Step [12000/12500], Loss: 2.3118
Epoch [2/4], Step [2000/12500], Loss: 2.0760
Epoch [2/4], Step [4000/12500], Loss: 1.6644
Epoch [2/4], Step [6000/12500], Loss: 2.6618
Epoch [2/4], Step [8000/12500], Loss: 2.1372
Epoch [2/4], Step [10000/12500], Loss: 1.2589
Epoch [2/4], Step [12000/12500], Loss: 1.5953
Epoch [3/4], Step [2000/12500], Loss: 2.9272
Epoch [3/4], Step [4000/12500], Loss: 2.3455
Epoch [3/4], Step [6000/12500], Loss: 1.6517
Epoch [3/4], Step [8000/12500], Loss: 1.8119
Epoch [3/4], Step [10000/12500], Loss: 1.0731
Epoch [3/4], Step [12000/12500], Loss: 1.4473
Epoch [4/4], Step [2000/12500], Loss: 1.9019
Epoch [4/4], Step [4000/12500], Loss: 1.6949
Epoch [4/4], Step [6000/12500], Loss: 1.8215
Epoch [4/4], Step [8000/12500], Loss: 1.4959
Epoc

In [12]:
# Evaluate the model
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    n_class_correct = [0 for i in range(10)]
    n_class_samples = [0 for i in range(10)]

    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        # torch.max returns (value, index)
        _, predicted = torch.max(outputs, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

        for i in range(batch_size):
            label = labels[i]
            pred = predicted[i]
            if(label == pred):
                n_class_correct[label] += 1
            n_class_samples[label] += 1

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network: {acc}%')

    for i in range(10):
        acc = 100.0 * n_class_correct[i] / n_class_samples[i]
        print(f'Accuracy of {classes[i]}: {acc}%')

Accuracy of the network: 45.11%
Accuracy of plane: 63.4%
Accuracy of car: 52.9%
Accuracy of bird: 20.5%
Accuracy of cat: 38.4%
Accuracy of deer: 33.1%
Accuracy of dog: 28.0%
Accuracy of frog: 59.3%
Accuracy of horse: 63.3%
Accuracy of ship: 44.0%
Accuracy of truck: 48.2%


In [None]:
# PS, the accuracy was very low, mostly because we only did 4 epochs