# Speed Train CNN Cifar-10

This notebook will train a CNN on cifar-10 dataset using pytorch and optimized hyperparameters to achieve a high accuracy and a very low training time.

## Hyperparameter choices for the CNN on CIFAR-10 dataset
- **Batch size**: 128
- **Learning rate**: 0.001
- **Epochs**: 10
- **Optimizer**: Adam
- **Loss function**: CrossEntropyLoss
- **Data augmentation**: RandomHorizontalFlip, RandomCrop
- **Normalization**: Normalize with mean and std of CIFAR-10 dataset
- **Model architecture**: 3 convolutional layers with ReLU activation, followed by max pooling and dropout, and a fully connected layer at the end.
- **Regularization**: Dropout with a probability of 0.5

In [None]:
# Load Cifar-10 dataset
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.models as models
import numpy as np
import random
import os


# Set random seed for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)


# Set the random seed
set_seed(42)

# Define the device to be used for training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define the data transformations
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ]
)


# Load the CIFAR-10 dataset
train_dataset = datasets.CIFAR10(
    root="./data", train=True, download=True, transform=transform
)
test_dataset = datasets.CIFAR10(
    root="./data", train=False, download=True, transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)


Using device: cpu


In [None]:
import lightning as L


# Define the ResNet model
# Define the basic residual block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()

        self.conv1 = nn.Conv2d(
            in_channels,
            out_channels,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False,
        )
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(
            out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False
        )
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(
                    in_channels, out_channels, kernel_size=1, stride=stride, bias=False
                ),
                nn.BatchNorm2d(out_channels),
            )

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += self.shortcut(residual)
        out = self.relu(out)

        return out


# Define the ResNet model for CIFAR-10
class ResNetCIFAR10(L.LightningModule):
    def __init__(self, num_classes=10, lr: float = 0.001):
        self.lr = lr
        super(ResNetCIFAR10, self).__init__()

        # Initial convolution
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)

        # 3 residual blocks (with conv layers)
        self.layer1 = ResidualBlock(64, 64)
        self.layer2 = ResidualBlock(64, 128, stride=2)
        self.layer3 = ResidualBlock(128, 256, stride=2)

        # Max pooling
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Dropout
        self.dropout = nn.Dropout(0.5)

        # Fully connected layer
        self.fc = nn.Linear(256 * 4 * 4, num_classes)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)

        out = self.maxpool(out)
        out = self.dropout(out)

        out = out.view(out.size(0), -1)  # Flatten
        out = self.fc(out)

        return out

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=self.lr)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
        return [optimizer], [scheduler]

    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(inputs)
        loss = F.cross_entropy(outputs, labels)
        self.log("train_loss", loss)
        # log accuracy
        _, preds = torch.max(outputs, 1)
        acc = (preds == labels).float().mean()
        self.log("train_acc", acc)
        return loss

    def validation_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(inputs)
        loss = F.cross_entropy(outputs, labels)
        self.log("val_loss", loss)
        # log accuracy
        _, preds = torch.max(outputs, 1)
        acc = (preds == labels).float().mean()
        self.log("val_acc", acc)
        return loss

: 

## Load the data

In [None]:
transform_train = transforms.Compose(
    [
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)),
    ]
)

transform_test = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)),
    ]
)

## Search for optimal Learning rate

## Training loop

In [None]:
wandb_logger = L.pytorch.loggers.WandbLogger(
    project="cifar10",
    log_model="all",
    save_dir="./logs",
)

trainer = L.Trainer(
    max_epochs=15,
    accelerator="gpu" if torch.cuda.is_available() else "cpu",
    devices=1 if torch.cuda.is_available() else None,
    logger=wandb_logger,
    callbacks=[L.pytorch.callbacks.EarlyStopping(monitor="train_loss", patience=3)],
)

## Train the model

In [None]:
# Initialize the model
model = ResNetCIFAR10().to(device)

In [None]:
trainer.fit(model=model, train_dataloaders=train_loader, val_dataloaders=test_loader)

Epoch 1/10:   0%|                                                           | 0/782 [00:00<?, ?it/s]

Epoch 1/10: 100%|████████████████████████| 782/782 [05:31<00:00,  2.36it/s, loss=0.7974, acc=48.46%]
Testing: 100%|███████████████████████████| 157/157 [00:37<00:00,  4.22it/s, loss=1.1746, acc=62.87%]


Epoch 1/10 - Test Accuracy: 62.87%


Epoch 2/10: 100%|████████████████████████| 782/782 [06:17<00:00,  2.07it/s, loss=0.6046, acc=66.41%]
Testing: 100%|███████████████████████████| 157/157 [00:35<00:00,  4.39it/s, loss=0.6129, acc=71.82%]


Epoch 2/10 - Test Accuracy: 71.82%


Epoch 3/10:  12%|██▉                      | 90/782 [00:55<05:18,  2.17it/s, loss=0.6200, acc=72.29%]