In [None]:
import torch
from torch import nn
from torchvision.datasets import CIFAR100
from torchvision import transforms
from torch.utils.data import DataLoader

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")
print(f"Torch version: {torch.__version__}")

Device: cpu
Torch version: 2.8.0+cu126


# Datapipeline

In [None]:
CIFAR100_MEAN = (0.5071, 0.4867, 0.4408)
CIFAR100_STD = (0.2675, 0.2565, 0.2761)

In [None]:
train_transformations = transforms.Compose([transforms.RandomCrop(size=32, padding=4),
                                      transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.02),
                                      transforms.RandomHorizontalFlip(p=0.5),
                                      transforms.ToTensor(),
                                      transforms.Normalize(CIFAR100_MEAN, CIFAR100_STD)])

test_transformations = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR100_MEAN, CIFAR100_STD),
])

In [None]:
train_data = CIFAR100(root="data",
                      train=True,
                      transform=train_transformations,
                      download=True)

test_data = CIFAR100(root="data",
                      train=False,
                      transform=test_transformations,
                      download=True)

100%|██████████| 169M/169M [00:01<00:00, 86.2MB/s]


In [None]:
NUM_WORKERS = 0

train_dataloader = DataLoader(dataset=train_data,
                              batch_size=32,
                              shuffle=True,
                              num_workers=NUM_WORKERS,
                              pin_memory=True,
                              persistent_workers=(NUM_WORKERS > 0),
                              drop_last=True)

test_dataloader = DataLoader(dataset=test_data,
                              batch_size=32,
                              shuffle=False,
                              num_workers=NUM_WORKERS,
                              pin_memory=True,
                              persistent_workers=(NUM_WORKERS > 0),
                              drop_last=False)

In [None]:
len(train_dataloader), len(test_dataloader)

(1562, 313)

In [None]:
image, label = next(iter(train_dataloader))
flattener = nn.Flatten()
print(image[0].shape)
flattener(image[0]).shape

torch.Size([3, 32, 32])




torch.Size([3, 1024])

# Creating Model

In [None]:
class TheImageClassification_inatorV0(nn.Module):
  def __init__(self):
    super().__init__()
    self.block_1 = nn.Sequential(nn.Conv2d(in_channels=3,
                                          out_channels=64,
                                          kernel_size=3,
                                          padding=1),
                                 nn.ReLU(),
                                 nn.Conv2d(in_channels=64,
                                          out_channels=64,
                                          kernel_size=3,
                                          padding=1),
                                 nn.ReLU(),
                                 nn.MaxPool2d(kernel_size=2,
                                              stride=2))
    self.block_2 = nn.Sequential(nn.Conv2d(in_channels=64,
                                          out_channels=64,
                                          kernel_size=3,
                                          padding=1),
                                 nn.ReLU(),
                                 nn.Conv2d(in_channels=64,
                                          out_channels=64,
                                          kernel_size=3,
                                          padding=1),
                                 nn.ReLU(),
                                 nn.MaxPool2d(kernel_size=2,
                                              stride=2))
    self.block_3 = nn.Sequential(nn.Conv2d(in_channels=64,
                                          out_channels=128,
                                          kernel_size=3,
                                          padding=1),
                                 nn.ReLU(),
                                 nn.Conv2d(in_channels=128,
                                          out_channels=128,
                                          kernel_size=3,
                                          padding=1),
                                 nn.ReLU(),
                                 nn.MaxPool2d(kernel_size=2,
                                              stride=2))

    self.classifier = nn.Sequential(nn.Flatten(),
                                    nn.Linear(in_features=128 * 4 * 4,
                                              out_features=100))
    self.layer_stack = nn.Sequential(self.block_1,
                                     self.block_2,
                                     self.block_3,
                                     self.classifier)

  def forward(self, X:torch.Tensor):
    return self.layer_stack(X)

model_0 = TheImageClassification_inatorV0().to(device)
model_0

TheImageClassification_inatorV0(
  (block_1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block_2): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block_3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): L

In [None]:
with torch.inference_mode():
    pred = model_0(image)
print(pred.shape)

torch.Size([32, 100])


# Creating helper functions

In [None]:
from timeit import default_timer as timer
import requests
from pathlib import Path
from tqdm.auto import tqdm

In [None]:
def train_time(start: float, end: float, device: torch.device):
  """ Function to compute the total train time
  Args:
  start (float): training start time
  end (float): training end time

  Returns:
  The total train time
  """
  total_time = end - start
  print(total_time)
  return total_time

In [None]:
if Path("./helper_functions.py").is_file():
  print("helper_functions.py found skipping download...")
else:
  request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/refs/heads/main/helper_functions.py")
  print("Downloading helper_functions.py")
  with open("helper_functions.py", "wb") as f:
    f.write(request.content)

from helper_functions import accuracy_fn

Downloading helper_functions.py


In [None]:
def train_step(model: nn.Module,
               train_dataloader: torch.utils.data.DataLoader,
               loss_fn: nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy_fn,
               device: torch.device=device):
  """ A single epoch of training
  Args:
  model (nn.Module): The model
  train_dataloader (torch.utils.data.DataLoader): The dataloader with training data
  loss_fn (nn.Module): loss function
  optimizer (torch.optim.Optimizer): optimizer for updating parameters
  device (torch.device): device on which training is done
  accuracy_fn: function to compute accuracy of predictions
  """
  model.to(device)
  model.train()
  batch_train_acc, batch_train_loss = 0, 0
  for X, y in tqdm(train_dataloader, desc="Training in progress"):
    X, y = X.to(device), y.to(device)
    # 1. Forward pass
    train_pred = model(X)
    train_pred_label = train_pred.argmax(dim=1)

    # 2. Calculate Loss
    train_loss = loss_fn(train_pred, y)
    batch_train_loss += train_loss.item()
    batch_train_acc += accuracy_fn(y, train_pred_label)

    # 3. Zero grad
    optimizer.zero_grad()

    # 4. Backpropagation
    train_loss.backward()

    # 5. Optimizer step
    optimizer.step()

  batch_train_acc /= len(train_dataloader)
  batch_train_loss /= len(train_dataloader)

  print(f"Train loss: {batch_train_loss:.4f} | Train acc: {batch_train_acc:.2f}%")


In [None]:
def test_step(model: nn.Module,
              test_dataloader: torch.utils.data.DataLoader,
              loss_fn: nn.Module,
              accuracy_fn,
              device: torch.device=device):
  """ A single epoch of testing
  Args:
  model (nn.Module): The model
  test_dataloader (torch.utils.data.DataLoader): The dataloader with testing data
  loss_fn (nn.Module): loss function
  device (torch.device): device on which testing is done
  accuracy_fn: function to compute accuracy of predictions
  """
  model.eval()
  batch_test_loss, batch_test_acc = 0, 0
  with torch.inference_mode():
    for X, y in tqdm(test_dataloader):
      X, y = X.to(device), y.to(device)
      # 1. Forward pass
      test_pred = model(X)
      test_pred_label = test_pred.argmax(dim=1)

      # 2. Calculate Loss
      test_loss = loss_fn(test_pred, y)
      batch_test_loss += test_loss.item()
      batch_test_acc += accuracy_fn(y, test_pred_label)

  batch_test_acc /= len(test_dataloader)
  batch_test_loss /= len(test_dataloader)
  print(f"Test loss: {batch_test_loss:.4f} | Test acc: {batch_test_acc:.2f}%")

In [None]:
def eval_model(model: nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: nn.Module,
               accuracy_fn,
               device: torch.device=device):
  """ Evaluates a model
  Args:
  model (nn.Module): The model
  data_loader (torch.utils.data.DataLoader): The dataloader with testing data
  loss_fn (nn.Module): loss function
  accuracy_fn: function to compute accuracy of predictions
  device (torch.device): device on which testing is done

  Returns: A dictionary containing model_name, model_acc, model_loss
  """
  model.eval()
  batch_test_loss, batch_test_acc = 0, 0
  with torch.inference_mode():
    for X, y in tqdm(data_loader):
      X, y = X.to(device), y.to(device)
      # 1. Forward pass
      test_pred = model(X)
      test_pred_label = test_pred.argmax(dim=1)

      # 2. Calculate Loss
      test_loss = loss_fn(test_pred, y)
      batch_test_loss += test_loss.item()
      batch_test_acc += accuracy_fn(y, test_pred_label)

  batch_test_acc /= len(test_dataloader)
  batch_test_loss /= len(test_dataloader)

  return {"model_name": model.__class__.__name__,
          "model_acc": batch_test_acc,
          "model_loss": batch_test_loss}

# Training the model

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(
    model_0.parameters(),
    lr=0.01,           # or 0.05 or 0.1
    momentum=0.9,      # seriously helps
    weight_decay=5e-4  # standard CIFAR regularization
)

In [None]:
epochs = 6
start_timer = timer()
for epoch in tqdm(range(epochs)):
  print(f"Epoch: {epoch}\n----------------------------------")
  train_step(model=model_0,
             train_dataloader=train_dataloader,
             loss_fn=loss_fn,
             optimizer=optimizer,
             accuracy_fn=accuracy_fn)
  test_step(model=model_0,
            test_dataloader=test_dataloader,
            loss_fn=loss_fn,
            accuracy_fn=accuracy_fn)
end_timer = timer()
model_0_train_time = train_time(start=start_timer,
                                end=end_timer,
                                device=device)

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch: 0
----------------------------------


Training in progress:   0%|          | 0/1562 [00:00<?, ?it/s]

Train loss: 2.2453 | Train acc: 41.31%


  0%|          | 0/313 [00:00<?, ?it/s]

Test loss: 2.0586 | Test acc: 45.36%
Epoch: 1
----------------------------------


Training in progress:   0%|          | 0/1562 [00:00<?, ?it/s]

Train loss: 2.1490 | Train acc: 43.51%


  0%|          | 0/313 [00:00<?, ?it/s]

Test loss: 2.0817 | Test acc: 46.27%
Epoch: 2
----------------------------------


Training in progress:   0%|          | 0/1562 [00:00<?, ?it/s]

Train loss: 2.0687 | Train acc: 45.08%


  0%|          | 0/313 [00:00<?, ?it/s]

Test loss: 2.0889 | Test acc: 45.26%
Epoch: 3
----------------------------------


Training in progress:   0%|          | 0/1562 [00:00<?, ?it/s]

Train loss: 2.0064 | Train acc: 46.73%


  0%|          | 0/313 [00:00<?, ?it/s]

Test loss: 1.9646 | Test acc: 48.19%
Epoch: 4
----------------------------------


Training in progress:   0%|          | 0/1562 [00:00<?, ?it/s]

Train loss: 1.9526 | Train acc: 47.68%


  0%|          | 0/313 [00:00<?, ?it/s]

Test loss: 2.0091 | Test acc: 47.75%
Epoch: 5
----------------------------------


Training in progress:   0%|          | 0/1562 [00:00<?, ?it/s]

Train loss: 1.9188 | Train acc: 48.54%


  0%|          | 0/313 [00:00<?, ?it/s]

Test loss: 1.9836 | Test acc: 48.18%
3607.5093086570005


In [None]:
torch.save(model_0.state_dict(), "model.pth")

In [None]:
loaded_model = TheImageClassification_inatorV0()
loaded_model.load_state_dict(torch.load('model (2).pth'))
loaded_model

TheImageClassification_inatorV0(
  (block_1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block_2): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block_3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): L

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(
    loaded_model.parameters(),
    lr=0.01,           # or 0.05 or 0.1
    momentum=0.9,      # seriously helps
    weight_decay=5e-4  # standard CIFAR regularization
)

In [None]:
model_0_results = eval_model(loaded_model, test_dataloader, loss_fn, accuracy_fn, device)
model_0_results

  0%|          | 0/313 [00:00<?, ?it/s]

{'model_name': 'TheImageClassification_inatorV0',
 'model_acc': 53.15495207667732,
 'model_loss': 1.8301136564142026}

In [None]:
epochs = 1
start_timer = timer()
for epoch in tqdm(range(epochs)):
  print(f"Epoch: {epoch}\n----------------------------------")
  train_step(model=loaded_model,
             train_dataloader=train_dataloader,
             loss_fn=loss_fn,
             optimizer=optimizer,
             accuracy_fn=accuracy_fn)
  test_step(model=loaded_model,
            test_dataloader=test_dataloader,
            loss_fn=loss_fn,
            accuracy_fn=accuracy_fn)
end_timer = timer()
model_0_train_time = train_time(start=start_timer,
                                end=end_timer,
                                device=device)

  0%|          | 0/1 [00:00<?, ?it/s]

Epoch: 0
----------------------------------


Training in progress:   0%|          | 0/1562 [00:00<?, ?it/s]

Train loss: 1.6092 | Train acc: 55.61%


  0%|          | 0/313 [00:00<?, ?it/s]

Test loss: 1.7846 | Test acc: 52.95%
675.1196786170003


In [None]:
torch.save(loaded_model.state_dict(), "loaded_model.pth")