In [8]:
from __future__ import print_function, division

import torch
import torch.optim as optim

from torch.optim import lr_scheduler
from torchvision import datasets, transforms
import time
import os

import torch.nn as nn


In [14]:
class Net(nn.Module):
    """
    Input - 1x32x32 (Gray image)
    Output - 10 (Number for classification)
    """
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Flatten(),  # flatten the image
            nn.Linear(32 * 32 * 3, 512),  # fully connected layers
            # nn.Linear(32 * 32 * 3, 512) for color image handling
            nn.ReLU(),
            nn.Dropout(0.5),          # dropout for overfitting
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 10)
        )
        
    def forward(self, xb):
        return self.network(xb)

"""
# 3) ARCHITECTURE TUNING
# ==================================================================


class NetTuned(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Flatten(),

            nn.Linear(32*32*1, 512),
            nn.BatchNorm1d(512),         # internal covariate shift reduction
            nn.LeakyReLU(0.1),           # nonzero gradient for x<0
            nn.Dropout(0.5),             # regularization

            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.4),

            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.3),

            nn.Linear(128, 10)           # logits for 10 classes
        )

    def forward(self, x):
        return self.network(x)

# In main:
model_ft = NetTuned().to(device)

"""

In [15]:
# you can try different types of data augementation to increase the performance on test data.
#Original Data Augmentation
data_transforms = {
    'train': transforms.Compose([
        transforms.Grayscale(num_output_channels=1),  
        transforms.RandomAffine(degrees=10, translate=(0,0.1)),
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])  # normalization
    ]),
    'test': transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5]),
    ]),
}
"""
# 2) DATA AUGMENTATION TUNING
# ==================================================================
# For different Data Augmentation, now in color

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(32, scale=(0.8, 1.0)),  # random crop
        transforms.RandomHorizontalFlip(p=0.5),              # flip
        transforms.ColorJitter(                               
            brightness=0.2, contrast=0.2, saturation=0.2
        ),                                                    # color jitter
        transforms.RandomAffine(degrees=10, translate=(0.1,0.1)),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ]),
    'test': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ]),
}
"""

In [16]:
def train_test(model, criterion, optimizer, scheduler, num_epochs=25):
    train_loss = []
    train_accuracy = []
    history = {}

    # Training phase
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        running_corrects = 0
        start_time = time.time()

        for i, (inputs, labels) in enumerate(train_dataloader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            running_corrects += (preds == labels).sum().item()

            if (i + 1) % 100 == 0:
                print(f"Epoch [{epoch+1}] Iter [{i+1}] loss: {running_loss/((i+1)*inputs.size(0)):.3f}")

        epoch_loss = running_loss / train_size
        epoch_acc = running_corrects / train_size * 100
        print(f"Epoch [{epoch+1}] Training Acc: {epoch_acc:.2f}% Loss: {epoch_loss:.3f}")
        train_loss.append(epoch_loss)
        train_accuracy.append(epoch_acc)
        scheduler.step()
        print(f"Epoch time: {time.time() - start_time:.1f}s")

    history['train_loss'] = train_loss
    history['train_accuracy'] = train_accuracy

    # Evaluation phase with overall & per-class accuracy
    model.eval()
    num_classes = len(image_datasets['test'].classes)
    class_correct = [0] * num_classes
    class_total = [0] * num_classes
    total_correct = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in test_dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            total_samples += labels.size(0)
            total_correct += (preds == labels).sum().item()

            for t, p in zip(labels, preds):
                class_total[t.item()] += 1
                class_correct[t.item()] += int(p == t)

    overall_acc = total_correct / total_samples * 100
    print(f"Test Accuracy: {overall_acc:.2f}%")

    print("\nPer-class Accuracy:")
    for idx, cls in enumerate(image_datasets['test'].classes):
        acc = class_correct[idx] / class_total[idx] * 100
        print(f"  Class {cls:>2s}: {acc:.2f}% ({class_correct[idx]}/{class_total[idx]})")

    history['val_accuracy'] = overall_acc
    history['per_class'] = {cls: class_correct[i] / class_total[i] * 100
                            for i, cls in enumerate(image_datasets['test'].classes)}

    return history, overall_acc

In [17]:
if __name__ == '__main__':

    # change the data-path, recommand for relative path
    data_dir = './data'  # change with the true parh
    image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                            data_transforms[x])
                    for x in ['train', 'test']}

    data_dir = './data' # Suppose the dataset is stored under this folder
    image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                            data_transforms[x])
                    for x in ['train', 'test']} # Read train and test sets, respectively.

    train_dataloader = torch.utils.data.DataLoader(image_datasets['train'], batch_size=128,
                                                shuffle=True, num_workers=4)

    test_dataloader = torch.utils.data.DataLoader(image_datasets['test'], batch_size=128,
                                                shuffle=False, num_workers=4)

    train_size =len(image_datasets['train'])


    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Set device to "cpu" if you have no gpu

    end = time.time()
    model_ft = Net().to(device)
    print(model_ft.network)
    criterion = nn.CrossEntropyLoss()

    # paramters for optimizer
    optimizer_ft = optim.AdamW(
    model_ft.parameters(),
    lr=1e-3,
    weight_decay=1e-2
    )
    # Tuning Optimizer
    """
    # 1) OPTIMIZER TUNING
    # ==================================================================
    # Original paramters for optimizer
    optimizer_ft = optim.Adam(model_ft.parameters(), lr=1e-3)  """
    
    """ SGD with Momentum
    optimizer_ft = optim.SGD(
    model_ft.parameters(),
    lr=1e-3,
    momentum=0.9,
    ) 
    # AdamW
    optimizer = optim.AdamW(
    model_ft.parameters(),
    lr=1e-3,
    weight_decay=1e-2
    )"""


    # learning rate scheduler
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=15, gamma=0.9)
    
    # learning epoch
    history, accuracy = train_test(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
               num_epochs=50)
    
    print("time required %.2fs" %(time.time() - end))

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=3072, out_features=512, bias=True)
  (2): ReLU()
  (3): Dropout(p=0.5, inplace=False)
  (4): Linear(in_features=512, out_features=256, bias=True)
  (5): ReLU()
  (6): Dropout(p=0.3, inplace=False)
  (7): Linear(in_features=256, out_features=10, bias=True)
)
Epoch [1] Iter [100] loss: 2.341
Epoch [1] Iter [200] loss: 2.323
Epoch [1] Training Acc: 11.18% Loss: 2.319
Epoch time: 46.0s
Epoch [2] Iter [100] loss: 2.268
Epoch [2] Iter [200] loss: 2.250
Epoch [2] Training Acc: 16.23% Loss: 2.243
Epoch time: 43.5s
Epoch [3] Iter [100] loss: 2.184
Epoch [3] Iter [200] loss: 2.172
Epoch [3] Training Acc: 20.23% Loss: 2.168
Epoch time: 46.6s
Epoch [4] Iter [100] loss: 2.108
Epoch [4] Iter [200] loss: 2.098
Epoch [4] Training Acc: 24.26% Loss: 2.095
Epoch time: 46.0s
Epoch [5] Iter [100] loss: 2.066
Epoch [5] Iter [200] loss: 2.050
Epoch [5] Training Acc: 26.58% Loss: 2.043
Epoch time: 43.7s
Epoch [6] Iter [100] loss: 2.

In [None]:
# 4) LR DECAY STRATEGY TUNING
# ==================================================================

from torch.optim import lr_scheduler

# (a) Cosine annealing over full training
optimizer = optim.Adam(model_ft.parameters(), lr=1e-3)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=50)
hist_cos, acc_cos = train_test(model_ft, criterion, optimizer, scheduler, num_epochs=50)
print(f"CosineAnnealing → {acc_cos:.4f}")

# (b) Exponential decay with warm‐up
class WarmupExpLR(lr_scheduler._LRScheduler):
    def __init__(self, optimizer, warmup_epochs=5, gamma=0.95, last_epoch=-1):
        self.warmup_epochs = warmup_epochs
        self.gamma = gamma
        super().__init__(optimizer, last_epoch)

    def get_lr(self):
        if self.last_epoch < self.warmup_epochs:
            # ramp from 0.1× to 1.0× over warmup
            return [
                base_lr * (0.1 + 0.9 * self.last_epoch / self.warmup_epochs)
                for base_lr in self.base_lrs
            ]
        # then exponential decay
        return [group['lr'] * self.gamma for group in self.optimizer.param_groups]

scheduler = WarmupExpLR(optimizer, warmup_epochs=5, gamma=0.97)
hist_we, acc_we = train_test(model_ft, criterion, optimizer, scheduler, num_epochs=50)
print(f"Warmup+ExpDecay → {acc_we:.4f}")
