In [None]:
%matplotlib inline

In [91]:
import datetime
from pathlib import Path
from typing import Final, Literal


import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.nn.functional as nnf
from torchvision import datasets, transforms

In [71]:
data_path = Path("../cifar_data")

class_names: list[str] = [
    'airplane','automobile','bird','cat','deer',
    'dog','frog','horse','ship','truck'
]

# Transform statistics taken from https://stackoverflow.com/a/69750247
cifar10_preprocessor = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]
)

cifar10_train = datasets.CIFAR10(
    data_path,
    train=True,
    download=True,
    transform=cifar10_preprocessor
)

cifar10_val = datasets.CIFAR10(
    data_path,
    train=False,
    download=True,
    transform=cifar10_preprocessor
)


Files already downloaded and verified
Files already downloaded and verified


Since I got the normalization values from the Internet, I should verify that these statistics are accurate. I will create numpy batch arrays and take the mean and std along the batch and 32x32 pixel axes.

In [36]:
# use np.concatenate to stick all the images together to form a (batch, 3, 32, 32) array
imgs = np.concatenate(
    np.asarray([[
        [
            cifar10_train[i][0][0].numpy(),
            cifar10_train[i][0][1].numpy(),
            cifar10_train[i][0][2].numpy()
        ]
        for i in range(len(cifar10_train))
    ]])
)

print(imgs.shape)

(50000, 3, 32, 32)


In [39]:
# calculate the mean along the (batch, pixel, pixel) axes
train_mean = np.mean(imgs, axis=(0, 2, 3))
print(train_mean)

[-0.00040607 -0.0005815  -0.00102856]


In [40]:
# calculate the std along the (batch, pixel, pixel) axes
train_std = np.std(imgs, axis=(0, 2, 3))
print(train_std)


[1.0001289  0.9999368  0.99995327]


Great! The data is normalized with zero mean and standard deviation of one (1).

In [72]:
label_map: dict[int, int] = {0: 0, 2: 1}

cifar2_class_names: list[str] = ['airplane', 'bird']
cifar10_to_2_indices: list[int] = [
    class_names.index(cifar2) for cifar2 in cifar2_class_names
]

cifar2 = [
    (img, label_map[label])
    for img, label in cifar10_train
    if label in cifar10_to_2_indices
]

cifar2_val = [
    (img, label_map[label])
    for img, label in cifar10_val
    if label in cifar10_to_2_indices
]


In [73]:
train_loader = DataLoader(cifar2, batch_size=64, shuffle=True)
val_loader = DataLoader(cifar2_val, batch_size=64, shuffle=False)

In [87]:
device: Final = (
    torch.device('cuda') if torch.cuda.is_available()
    else torch.device('cpu')
)
device

device(type='cpu')

In [92]:
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    """Run training on a CNN

    Parameters
    ----------
    n_epochs : int
        Number of training iterations
    optimizer : optim.Optimizer
        Optimizer
    model : nn.Module
        CNN model
    loss_fn : nn.Module
        Loss function module
    train_loader : DataLoader
        Batched data loader

    Returns
    -------
    None
    """
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs_t, labels_t in train_loader:
            imgs_t = imgs_t.to(device=device)
            labels_t = labels_t.to(device=device)
            outputs = model(imgs_t)
            loss = loss_fn(outputs, labels_t)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()

        if epoch < 4 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))

In [93]:
def validate(model, train_loader, val_loader):
    """Validate model training

    Parameters
    ----------
    model : nn.Module
        Trained model module
    train_loader : DataLoader
        Training data loading
    val_loader : DataLoader
        Validation data loader

    Returns
    -------
    dict[str, float]
        Accuracy dictionary for data loaders. The keys are ["train", "val"]
    """
    accdict: dict[Literal["train", "val"], float] = {}
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():
            for imgs_t, labels_t in loader:
                outputs = model(imgs_t)
                _, predicted_t = torch.max(outputs, dim=1)
                total += labels_t.shape[0]
                correct_t = predicted_t == labels_t
                correct += int(correct_t.sum())

        print("Accuracy {}: {:.2f}".format(name , correct / total))
        accdict[name] = correct / total
    return accdict

In [94]:
class Cifar2CNN(nn.Module):
    """CNN image classifier for two (2) classes"""

    def __init__(
        self,
        n_chans1=32
    ):
        """Instantiate a CNN CIFAR image classifier for two (2) classes.

        Parameters
        ----------
        n_chans1 : int
            Number of channels in the first layer

        """
        super().__init__()
        self.cifar_size: Final[int] = 32
        self.n_chans1: Final[int] = n_chans1
        self.cov_ker_size: Final[int] = 3
        self.cov_pad: Final[int] = 1
        # First convolutional layer (B, 3, 32, 32)
        self.conv1 = nn.Conv2d(
            3,
            n_chans1,
            self.cov_ker_size,
            padding=self.cov_pad
        )
        # Second convolutional layer, after applying pooling (B, n_chans1, 16, 16)
        self.conv2 = nn.Conv2d(
            n_chans1,
            n_chans1 // 2,
            self.cov_ker_size,
            padding=self.cov_pad
        )
        # Third convolutional layer, after applying pooling (B, n_chans1, 8, 8)
        self.conv3 = nn.Conv2d(
            n_chans1 // 2,
            n_chans1 // 2,
            self.cov_ker_size,
            padding=self.cov_pad
        )
        # Functional layer after convolutions and view/reshape (B, n_chans1 * 8 * 8, 32)
        self.fcn4 = nn.Linear(
            ((self.cifar_size // 4) ** 2) * (n_chans1 // 2),
            32
        )
        # Functional layer after functional (B, 2)
        self.fcn5 = nn.Linear(32, 2)

    def forward(self, batch):
        """Propagate the batch forward through NN.

        Parameters
        ----------
        batch : torch.Tensor
            Batch of images

        Returns
        -------
        torch.Tensor
            Forward propagated tensor
        """
        out = nnf.max_pool2d(torch.tanh(self.conv1(batch)), 2)
        out = nnf.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = out.view(-1, self.fcn4.in_features)
        out = torch.tanh(self.fcn4(out))
        out = self.fcn5(out)
        return out


model = Cifar2CNN().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 3,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

validate(model, train_loader, val_loader)

2022-12-24 10:43:18.876998 Epoch 1, Training loss 0.5529749898394202
2022-12-24 10:43:30.517862 Epoch 2, Training loss 0.47473541300767547
2022-12-24 10:43:41.394309 Epoch 3, Training loss 0.4375302218327856
Accuracy train: 0.82
Accuracy val: 0.82


{'train': 0.8172, 'val': 0.823}

In [76]:
l = nn.Linear(8 * 8 * 32 // 2, 32)
l.out_features

32

In [77]:
c = nn.Conv2d(3, 32, kernel_size=3, padding=1)
c.out_channels

32

In [82]:
cifar10_train[0][0].shape

torch.Size([3, 32, 32])

In [83]:
cifar10_train[0][0].view(-1, 8 * 8 * 16).shape

torch.Size([3, 1024])