In [1]:
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
from torchvision import datasets, transforms
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import MultiStepLR

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)


class LetterCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)

        self.fc1 = nn.Linear(in_features=16 * 5 * 5, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=84)
        self.out = nn.Linear(in_features=84, out_features=3)

    def forward(self, t):
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        # torch.Size([1, 6, 14, 14])

        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        # torch.Size([1, 16, 5, 5])

        t = t.reshape(-1, 16 * 5 * 5)
        t = self.fc1(t)
        t = F.relu(t)
        # torch.Size([1, 120])

        t = self.fc2(t)
        t = F.relu(t)
        # torch.Size([1, 84])

        t = self.out(t)
        # torch.Size([1, 3])
        return t

Device: cuda


In [2]:
mean = (0.5, 0.5, 0.5)
std = (0.5, 0.5, 0.5)

train_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.RandomRotation(10),
    transforms.RandomAffine(
        degrees=0,
        translate=(0.1, 0.1),
    ),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

test_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

train_set = datasets.ImageFolder(
    root="data/train",
    transform=train_transform,
)

test_set = datasets.ImageFolder(
    root="data/test",
    transform=test_transform,
)

train_loader = DataLoader(
    dataset=train_set,
    batch_size=10,
    shuffle=True,
    num_workers=2,
    pin_memory=True,
)

test_loader = DataLoader(
    dataset=test_set,
    batch_size=10,
    shuffle=False,
    num_workers=2,
    pin_memory=True,
)

print(train_set.classes)
print(test_set.classes)


['p', 'sh', 't']
['p', 'sh', 't']


In [3]:
model = LetterCNN().to(device)
torch.save(model.state_dict(), "init_state.pth")

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(
    model.parameters(),
    lr=1e-3,
    weight_decay=1e-4
)
scheduler = optim.lr_scheduler.MultiStepLR(
    optimizer,
    milestones=[20, 40],
    gamma=0.1
)

num_epochs = 60
best_acc = 0.0


@torch.no_grad()
def evaluate(model, data_loader, device):
    model.eval()
    correct = 0
    total = 0
    running_loss = 0.0

    for images, labels in data_loader:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        outputs = model(images)
        loss = criterion(outputs, labels)

        running_loss += loss.item() * images.size(0)
        _, preds = outputs.max(1)
        correct += preds.eq(labels).sum().item()
        total += labels.size(0)

    avg_loss = running_loss / total
    acc = correct / total
    return avg_loss, acc

In [4]:
for epoch in range(1, num_epochs + 1):
    model.train()
    running_loss = 0.0
    total = 0
    correct = 0

    for images, labels in train_loader:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        total += labels.size(0)
        _, preds = outputs.max(1)
        correct += preds.eq(labels).sum().item()

    train_loss = running_loss / total
    train_acc = correct / total

    val_loss, val_acc = evaluate(model, test_loader, device)
    scheduler.step()

    print(
        f"Epoch [{epoch}/{num_epochs}] "
        f"train_loss={train_loss:.4f} train_acc={train_acc:.4f} "
        f"val_loss={val_loss:.4f} val_acc={val_acc:.4f} "
        f"lr={scheduler.get_last_lr()[0]:.5f}"
    )

    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), "best.pth")
        print(f"  -> new best model saved (val_acc={best_acc:.4f})")

Epoch [1/60] train_loss=1.0979 train_acc=0.3498 val_loss=1.0809 val_acc=0.5155 lr=0.00100
  -> new best model saved (val_acc=0.5155)
Epoch [2/60] train_loss=1.0302 train_acc=0.4593 val_loss=1.0148 val_acc=0.5271 lr=0.00100
  -> new best model saved (val_acc=0.5271)
Epoch [3/60] train_loss=0.9278 train_acc=0.5523 val_loss=0.8280 val_acc=0.6163 lr=0.00100
  -> new best model saved (val_acc=0.6163)
Epoch [4/60] train_loss=0.7522 train_acc=0.6512 val_loss=0.6040 val_acc=0.7636 lr=0.00100
  -> new best model saved (val_acc=0.7636)
Epoch [5/60] train_loss=0.6404 train_acc=0.7141 val_loss=0.5069 val_acc=0.7907 lr=0.00100
  -> new best model saved (val_acc=0.7907)
Epoch [6/60] train_loss=0.5645 train_acc=0.7636 val_loss=0.4047 val_acc=0.8411 lr=0.00100
  -> new best model saved (val_acc=0.8411)
Epoch [7/60] train_loss=0.4884 train_acc=0.8052 val_loss=0.3838 val_acc=0.8333 lr=0.00100
Epoch [8/60] train_loss=0.4123 train_acc=0.8488 val_loss=0.3700 val_acc=0.8372 lr=0.00100
Epoch [9/60] train_los