In [1]:
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [2]:
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.transforms import v2
from torchvision.datasets import ImageFolder

# Data Transform and Loader

In [3]:
def pil_loader(path):
    with open(path, "rb") as f:
        img = Image.open(f)
        return img.convert("RGB")

In [4]:
IMAGE_SIZE = 224

transforms = v2.Compose([
    v2.ToImage(),
    v2.Resize(size=(IMAGE_SIZE, IMAGE_SIZE)),
    v2.ToDtype(torch.float32, scale=True),
])

In [5]:
train_dataset = ImageFolder(root="./data/seg_train/seg_train",
                            transform=transforms,
                            loader=pil_loader)

val_dataset = ImageFolder(root="./data/seg_test/seg_test",
                            transform=transforms,
                            loader=pil_loader)

In [6]:
train_loader = DataLoader(train_dataset,
                          batch_size=16,
                          shuffle=True,
                          num_workers=2)

val_loader = DataLoader(val_dataset,
                        batch_size=16,
                        shuffle=False,
                        num_workers=2)

# Model

In [7]:
from CNN import CNN


num_classes = len(train_dataset.classes)
model = CNN(IMAGE_SIZE, num_conv_layers=5, num_classes=num_classes)

# use gpu is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

CNN(
  (conv_layers): ModuleList(
    (0): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu): ReLU()
  (fc1): Linear(in_features=6272, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=6, bias=True)
)

# Training

In [8]:
NUM_EPOCH = 25

loss_fn = nn.CrossEntropyLoss()
optimiser = torch.optim.AdamW(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimiser,
                                                       T_max=NUM_EPOCH*len(train_loader))

In [9]:
def train(model, loader, optimizer, loss_fn, device):
    model.train()
    total_loss = 0
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)  # output shape: [batch size, 6]
        loss = loss_fn(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        total_loss += loss.item()
    return total_loss / len(loader)

In [10]:
def evaluate(model, loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)  # output shape: [batch size, 6]
            predicted = torch.argmax(outputs, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

In [11]:
for epoch in range(NUM_EPOCH):
    train_loss = train(model, train_loader, optimiser, loss_fn, device)
    val_acc = evaluate(model, val_loader, device)
    print(f"Epoch {epoch+1}: Training Loss = {train_loss:.4f}, Accuracy = {val_acc:.4f}")

Epoch 1: Training Loss = 1.0438, Accuracy = 0.7100
Epoch 2: Training Loss = 0.7388, Accuracy = 0.7317
Epoch 3: Training Loss = 0.6356, Accuracy = 0.7680
Epoch 4: Training Loss = 0.5375, Accuracy = 0.7880
Epoch 5: Training Loss = 0.4632, Accuracy = 0.8003
Epoch 6: Training Loss = 0.3913, Accuracy = 0.8170
Epoch 7: Training Loss = 0.3195, Accuracy = 0.8220
Epoch 8: Training Loss = 0.2570, Accuracy = 0.8020
Epoch 9: Training Loss = 0.1924, Accuracy = 0.8040
Epoch 10: Training Loss = 0.1416, Accuracy = 0.7993
Epoch 11: Training Loss = 0.1046, Accuracy = 0.8057
Epoch 12: Training Loss = 0.0731, Accuracy = 0.8063
Epoch 13: Training Loss = 0.0630, Accuracy = 0.8003
Epoch 14: Training Loss = 0.0289, Accuracy = 0.7997
Epoch 15: Training Loss = 0.0206, Accuracy = 0.8183
Epoch 16: Training Loss = 0.0222, Accuracy = 0.8043
Epoch 17: Training Loss = 0.0127, Accuracy = 0.8107
Epoch 18: Training Loss = 0.0080, Accuracy = 0.8157
Epoch 19: Training Loss = 0.0068, Accuracy = 0.8160
Epoch 20: Training Lo