<a href="https://www.kaggle.com/code/danilaaxyonov/cifar-100-image-recognition-using-cnn?scriptVersionId=262918227" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

### Hello everyone. In this notebook I'll try to build a CNN for recognition images from CIFAR-100 dataset. That's my first time of using CNN, so I don't know how to choose architecture for my model (number of convolutional layers, kernel's size, number of fully-connected flat layers). I hope that I'll beat 99% accuracy. Wish me luck!

# 0. Setup.

In [1]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import v2 # recommended by developers themselves!

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Current device:', device)

print('Setup completed.')

Current device: cuda
Setup completed.


# 1. Loading and normalizing CIFAR-100 dataset.

In [2]:
CIFAR100_unnormalized_transform = v2.Compose([
    v2.ToImage(), # dtype=torch.uint8; from 0 to 255 inclusively.
    v2.ToDtype(torch.float32) # still from 0 to 255 inclusively.
])
print('CIFAR100_unnormalized_transform created.')

CIFAR100_unnormalized_transform created.


In [3]:
train = datasets.CIFAR100(
    root='./',
    download=True,
    train=True,
    transform=CIFAR100_unnormalized_transform
)
test = datasets.CIFAR100(
    root='./',
    download=True,
    train=False,
    transform=CIFAR100_unnormalized_transform
)
print('Unnormalized CIFAR-100 dataset loaded.')

100%|██████████| 169M/169M [00:04<00:00, 39.2MB/s]


Unnormalized CIFAR-100 dataset loaded.


In [4]:
r = torch.stack([train[i][0][0] for i in range(len(train))] + [test[i][0][0] for i in range(len(test))]).to(device) # red
g = torch.stack([train[i][0][1] for i in range(len(train))] + [test[i][0][1] for i in range(len(test))]).to(device) # green
b = torch.stack([train[i][0][2] for i in range(len(train))] + [test[i][0][2] for i in range(len(test))]).to(device) # blue

CIFAR100_mean = torch.Tensor([
    r.mean().item(),
    g.mean().item(),
    b.mean().item()
])
CIFAR100_std = torch.Tensor([
    r.std().item(),
    g.std().item(),
    b.std().item()
])
print('mean:', CIFAR100_mean)
print('std:', CIFAR100_std)

del r, g, b # to free memory.

mean: tensor([129.3773, 124.1058, 112.4776])
std: tensor([68.2095, 65.4312, 70.4587])


In [5]:
CIFAR100_normalized_transform = v2.Compose([
    CIFAR100_unnormalized_transform,
    v2.Normalize(mean=CIFAR100_mean, std=CIFAR100_std) # niceee)
])
print('CIFAR100_normalized_transform created.')

CIFAR100_normalized_transform created.


In [6]:
train = datasets.CIFAR100(
    root='./',
    download=True,
    train=True,
    transform=CIFAR100_normalized_transform
)
test = datasets.CIFAR100(
    root='./',
    download=True,
    train=False,
    transform=CIFAR100_normalized_transform
)
print('Normalized CIFAR-100 dataset loaded.')

Normalized CIFAR-100 dataset loaded.


# 2. Creating a Dataloader.

In [7]:
BATCH_SIZE = 256 # don't change my traditions)

train_DL = DataLoader(train, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
test_DL = DataLoader(test, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

print('Both DataLoaders created.')

Both DataLoaders created.


In [8]:
batch_example = next(iter(train_DL))
print(batch_example[0].shape)
print(batch_example[1].shape)

torch.Size([256, 3, 32, 32])
torch.Size([256])


# 3. Choosing a CNN's architecture and writing a model class.

In [9]:
class CIFAR100CNN(nn.Module):
    def __init__(self) -> None:
        super().__init__()

        self.features = nn.Sequential(
            # now input.shape == (3, 32, 32).
            
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding='same'), # padding='same' means (in.h, in.w) == (out.h, out.w).
            # now input.shape == (32, 32, 32).
            nn.ReLU(), # adding non-linearity, because Conv2d is linear and ReLU(x) = max(x, 0) is non-linear.
            nn.Dropout2d(p=0.15),
            
            nn.MaxPool2d(kernel_size=2, stride=2), # kernel_size == stride   ==>   non-crossing applications of the same kernel.
            # now input.shape == (32, 16, 16).
            
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding='same'),
            # now input.shap == (64, 16, 16).
            nn.ReLU(),
            nn.Dropout2d(p=0.15),

            nn.MaxPool2d(kernel_size=2, stride=2)
            # now input.shape == (64, 8, 8).
        )

        self.classifier = nn.Sequential(
            nn.Flatten(), # (64, 8, 8) => (64 * 8 * 8,) somehow.
            # 64 * 8 * 8 = 4'096.
            
            nn.Linear(4096, 2048),
            nn.ReLU(),
            nn.Dropout(p=0.4),
            
            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.4),
            
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(p=0.4),
            
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(p=0.4),
            
            nn.Linear(256, 100),
            nn.Softmax() # not necessary actually.
        )

        return

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = self.classifier(x)
        return x

print('CIFAR100CNN class created.')

CIFAR100CNN class created.


# 4. Creating a model itself.

In [10]:
model = CIFAR100CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
print('Model, loss function and optimizer created.')

Model, loss function and optimizer created.
