In [1]:
import os
import random
import numpy as np
from pathlib import Path
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

# Set fixed seeds for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [2]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print(f"Using device: {device}")


Using device: cpu


In [3]:
DATA_DIR = Path("dataset/train")  # training set path (mixed clean + noisy)
VAL_CLEAN_DIR = Path("dataset/val/clean")
VAL_NOISY_DIR = Path("dataset/val/noisy")

# Transforms
train_transforms = transforms.Compose([
    transforms.Resize((250, 250)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize((250, 250)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

# Datasets
train_dataset = datasets.ImageFolder(DATA_DIR, transform=train_transforms)
val_clean_dataset = datasets.ImageFolder(VAL_CLEAN_DIR, transform=val_transforms)
val_noisy_dataset = datasets.ImageFolder(VAL_NOISY_DIR, transform=val_transforms)

# Dataloaders
BATCH_SIZE = 16  # adjust: HP CPU=8–16, M2=32+
NUM_WORKERS = 0  # adjust for OS and machine
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
val_clean_loader = DataLoader(val_clean_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
val_noisy_loader = DataLoader(val_noisy_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of validation (clean) samples: {len(val_clean_dataset)}")
print(f"Number of validation (noisy) samples: {len(val_noisy_dataset)}")

Number of training samples: 9800
Number of validation (clean) samples: 2800
Number of validation (noisy) samples: 2800


In [4]:
NUM_CLASSES = 35

# Load pretrained EfficientNet-B0
model = models.efficientnet_b0(pretrained=True)

# Replace classifier
in_features = model.classifier[1].in_features
model.classifier = nn.Sequential(
    nn.Dropout(p=0.3),
    nn.Linear(in_features, NUM_CLASSES)
)

model = model.to(device)
print(model)



EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [5]:
criterion = nn.CrossEntropyLoss()

# Phase 1: Head-only training
for param in model.features.parameters():  # freeze backbone
    param.requires_grad = False

optimizer = optim.AdamW(model.classifier.parameters(), lr=1e-3, weight_decay=1e-4)

# Fix for PyTorch >=2.1: Remove verbose argument
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=3
)


In [6]:
from tqdm.auto import tqdm

def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    progress_bar = tqdm(loader, desc="Training", unit="batch")
    
    for inputs, labels in progress_bar:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

        # update tqdm postfix
        progress_bar.set_postfix({
            "loss": f"{running_loss/total:.4f}",
            "acc": f"{correct/total:.4f}"
        })

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc


  from .autonotebook import tqdm as notebook_tqdm


In [7]:
def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    progress_bar = tqdm(loader, desc="Validating", unit="batch")
    
    with torch.no_grad():
        for inputs, labels in progress_bar:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

            progress_bar.set_postfix({
                "val_loss": f"{running_loss/total:.4f}",
                "val_acc": f"{correct/total:.4f}"
            })

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [8]:
EPOCHS_HEAD = 10
best_acc = 0.0

for epoch in range(EPOCHS_HEAD):
    print(f"\nEpoch {epoch+1}/{EPOCHS_HEAD}")
    print(f"Current LR: {optimizer.param_groups[0]['lr']:.6f}")
    
    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate(model, val_clean_loader, criterion, device)

    print(f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")

    scheduler.step(val_loss)  # adjust LR based on val loss
    print(f"Updated LR: {optimizer.param_groups[0]['lr']:.6f}")

    # Save best model
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), "best_model_head.pth")

print(f"✅ Head-only training done. Best validation acc: {best_acc:.4f}")


Epoch 1/10
Current LR: 0.001000


Training: 100%|██████████| 613/613 [06:26<00:00,  1.59batch/s, loss=2.7941, acc=0.2711]
Validating: 100%|██████████| 175/175 [01:33<00:00,  1.87batch/s, val_loss=2.1188, val_acc=0.4632]


Train Loss: 2.7941, Acc: 0.2711
Val Loss: 2.1188, Acc: 0.4632
Updated LR: 0.001000

Epoch 2/10
Current LR: 0.001000


Training: 100%|██████████| 613/613 [06:06<00:00,  1.67batch/s, loss=2.1687, acc=0.4211]
Validating: 100%|██████████| 175/175 [01:30<00:00,  1.93batch/s, val_loss=1.8366, val_acc=0.5193]


Train Loss: 2.1687, Acc: 0.4211
Val Loss: 1.8366, Acc: 0.5193
Updated LR: 0.001000

Epoch 3/10
Current LR: 0.001000


Training: 100%|██████████| 613/613 [06:22<00:00,  1.60batch/s, loss=1.9477, acc=0.4685]
Validating: 100%|██████████| 175/175 [01:30<00:00,  1.93batch/s, val_loss=1.6562, val_acc=0.5618]


Train Loss: 1.9477, Acc: 0.4685
Val Loss: 1.6562, Acc: 0.5618
Updated LR: 0.001000

Epoch 4/10
Current LR: 0.001000


Training: 100%|██████████| 613/613 [06:08<00:00,  1.66batch/s, loss=1.8503, acc=0.4841]
Validating: 100%|██████████| 175/175 [01:31<00:00,  1.91batch/s, val_loss=1.5776, val_acc=0.5686]


Train Loss: 1.8503, Acc: 0.4841
Val Loss: 1.5776, Acc: 0.5686
Updated LR: 0.001000

Epoch 5/10
Current LR: 0.001000


Training: 100%|██████████| 613/613 [06:08<00:00,  1.66batch/s, loss=1.7699, acc=0.5069]
Validating: 100%|██████████| 175/175 [01:30<00:00,  1.94batch/s, val_loss=1.4994, val_acc=0.5854]


Train Loss: 1.7699, Acc: 0.5069
Val Loss: 1.4994, Acc: 0.5854
Updated LR: 0.001000

Epoch 6/10
Current LR: 0.001000


Training: 100%|██████████| 613/613 [06:03<00:00,  1.69batch/s, loss=1.7264, acc=0.5133]
Validating: 100%|██████████| 175/175 [01:30<00:00,  1.94batch/s, val_loss=1.4866, val_acc=0.5875]


Train Loss: 1.7264, Acc: 0.5133
Val Loss: 1.4866, Acc: 0.5875
Updated LR: 0.001000

Epoch 7/10
Current LR: 0.001000


Training: 100%|██████████| 613/613 [06:05<00:00,  1.68batch/s, loss=1.6786, acc=0.5272]
Validating: 100%|██████████| 175/175 [01:29<00:00,  1.95batch/s, val_loss=1.4727, val_acc=0.5989]


Train Loss: 1.6786, Acc: 0.5272
Val Loss: 1.4727, Acc: 0.5989
Updated LR: 0.001000

Epoch 8/10
Current LR: 0.001000


Training: 100%|██████████| 613/613 [06:03<00:00,  1.69batch/s, loss=1.6572, acc=0.5353]
Validating: 100%|██████████| 175/175 [01:29<00:00,  1.95batch/s, val_loss=1.4434, val_acc=0.5943]


Train Loss: 1.6572, Acc: 0.5353
Val Loss: 1.4434, Acc: 0.5943
Updated LR: 0.001000

Epoch 9/10
Current LR: 0.001000


Training: 100%|██████████| 613/613 [06:05<00:00,  1.68batch/s, loss=1.6078, acc=0.5395]
Validating: 100%|██████████| 175/175 [01:29<00:00,  1.95batch/s, val_loss=1.4381, val_acc=0.5946]


Train Loss: 1.6078, Acc: 0.5395
Val Loss: 1.4381, Acc: 0.5946
Updated LR: 0.001000

Epoch 10/10
Current LR: 0.001000


Training: 100%|██████████| 613/613 [06:02<00:00,  1.69batch/s, loss=1.5965, acc=0.5495]
Validating: 100%|██████████| 175/175 [01:29<00:00,  1.95batch/s, val_loss=1.4100, val_acc=0.5986]

Train Loss: 1.5965, Acc: 0.5495
Val Loss: 1.4100, Acc: 0.5986
Updated LR: 0.001000
✅ Head-only training done. Best validation acc: 0.5989





In [9]:
# Reinitialize model structure (same as before)
model = models.efficientnet_b0(pretrained=True)
in_features = model.classifier[1].in_features
model.classifier = nn.Sequential(
    nn.Dropout(p=0.3),
    nn.Linear(in_features, NUM_CLASSES)
)

# Load head-only trained weights
model.load_state_dict(torch.load("best_model_head.pth", map_location=device))
model = model.to(device)
print("Head-only model loaded for fine-tuning.")



Head-only model loaded for fine-tuning.


In [10]:
# -------------------------------
# Cell 11: Unfreeze Backbone for Fine-tuning
# -------------------------------
# Option 1: unfreeze entire backbone
for param in model.features.parameters():
    param.requires_grad = True

# Option 2: unfreeze only top N blocks (if CPU) -- adjust N if needed
# for name, param in model.features.named_parameters():
#     if "4" in name or "5" in name:  # e.g., last 2 blocks
#         param.requires_grad = True
#     else:
#         param.requires_grad = False


In [11]:
# -------------------------------
# Cell 12: Fine-tuning Loss & Optimizer
# -------------------------------
criterion = nn.CrossEntropyLoss()

# Use lower LR for fine-tuning
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=3
)


In [12]:
# -------------------------------
# Cell 13: Fine-tuning Loop
# -------------------------------
EPOCHS_FINE = 10  # adjust based on time
best_val_acc = 0.0

for epoch in range(EPOCHS_FINE):
    print(f"\nFine-tuning Epoch {epoch+1}/{EPOCHS_FINE}")
    print(f"Current LR: {optimizer.param_groups[0]['lr']:.6f}")
    
    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate(model, val_clean_loader, criterion, device)

    print(f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")

    scheduler.step(val_loss)
    print(f"Updated LR: {optimizer.param_groups[0]['lr']:.6f}")

    # Save best fine-tuned model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_model.pth")
        print(f"✅ New best model saved at epoch {epoch+1} with val_acc: {best_val_acc:.4f}")

print(f"✅ Fine-tuning completed. Best validation acc: {best_val_acc:.4f}")


Fine-tuning Epoch 1/10
Current LR: 0.000100


Training: 100%|██████████| 613/613 [18:58<00:00,  1.86s/batch, loss=1.1062, acc=0.6792]
Validating: 100%|██████████| 175/175 [01:30<00:00,  1.94batch/s, val_loss=0.6306, val_acc=0.8221]


Train Loss: 1.1062, Acc: 0.6792
Val Loss: 0.6306, Acc: 0.8221
Updated LR: 0.000100
✅ New best model saved at epoch 1 with val_acc: 0.8221

Fine-tuning Epoch 2/10
Current LR: 0.000100


Training: 100%|██████████| 613/613 [18:57<00:00,  1.86s/batch, loss=0.5755, acc=0.8337]
Validating: 100%|██████████| 175/175 [01:29<00:00,  1.95batch/s, val_loss=0.4636, val_acc=0.8700]


Train Loss: 0.5755, Acc: 0.8337
Val Loss: 0.4636, Acc: 0.8700
Updated LR: 0.000100
✅ New best model saved at epoch 2 with val_acc: 0.8700

Fine-tuning Epoch 3/10
Current LR: 0.000100


Training: 100%|██████████| 613/613 [18:50<00:00,  1.84s/batch, loss=0.3530, acc=0.9046]
Validating: 100%|██████████| 175/175 [01:31<00:00,  1.92batch/s, val_loss=0.3943, val_acc=0.8925]


Train Loss: 0.3530, Acc: 0.9046
Val Loss: 0.3943, Acc: 0.8925
Updated LR: 0.000100
✅ New best model saved at epoch 3 with val_acc: 0.8925

Fine-tuning Epoch 4/10
Current LR: 0.000100


Training: 100%|██████████| 613/613 [18:46<00:00,  1.84s/batch, loss=0.2236, acc=0.9385]
Validating: 100%|██████████| 175/175 [01:30<00:00,  1.94batch/s, val_loss=0.3941, val_acc=0.8932]


Train Loss: 0.2236, Acc: 0.9385
Val Loss: 0.3941, Acc: 0.8932
Updated LR: 0.000100
✅ New best model saved at epoch 4 with val_acc: 0.8932

Fine-tuning Epoch 5/10
Current LR: 0.000100


Training: 100%|██████████| 613/613 [18:45<00:00,  1.84s/batch, loss=0.1376, acc=0.9642]
Validating: 100%|██████████| 175/175 [01:30<00:00,  1.93batch/s, val_loss=0.3882, val_acc=0.9025]


Train Loss: 0.1376, Acc: 0.9642
Val Loss: 0.3882, Acc: 0.9025
Updated LR: 0.000100
✅ New best model saved at epoch 5 with val_acc: 0.9025

Fine-tuning Epoch 6/10
Current LR: 0.000100


Training: 100%|██████████| 613/613 [18:47<00:00,  1.84s/batch, loss=0.1023, acc=0.9723]
Validating: 100%|██████████| 175/175 [01:31<00:00,  1.92batch/s, val_loss=0.3826, val_acc=0.9021]


Train Loss: 0.1023, Acc: 0.9723
Val Loss: 0.3826, Acc: 0.9021
Updated LR: 0.000100

Fine-tuning Epoch 7/10
Current LR: 0.000100


Training: 100%|██████████| 613/613 [26:49<00:00,  2.63s/batch, loss=0.0928, acc=0.9747]    
Validating: 100%|██████████| 175/175 [01:32<00:00,  1.90batch/s, val_loss=0.3814, val_acc=0.9075]


Train Loss: 0.0928, Acc: 0.9747
Val Loss: 0.3814, Acc: 0.9075
Updated LR: 0.000100
✅ New best model saved at epoch 7 with val_acc: 0.9075

Fine-tuning Epoch 8/10
Current LR: 0.000100


Training: 100%|██████████| 613/613 [19:53<00:00,  1.95s/batch, loss=0.0779, acc=0.9782]
Validating: 100%|██████████| 175/175 [01:31<00:00,  1.91batch/s, val_loss=0.3753, val_acc=0.9104]


Train Loss: 0.0779, Acc: 0.9782
Val Loss: 0.3753, Acc: 0.9104
Updated LR: 0.000100
✅ New best model saved at epoch 8 with val_acc: 0.9104

Fine-tuning Epoch 9/10
Current LR: 0.000100


Training: 100%|██████████| 613/613 [19:33<00:00,  1.92s/batch, loss=0.0616, acc=0.9823]
Validating: 100%|██████████| 175/175 [01:31<00:00,  1.91batch/s, val_loss=0.3763, val_acc=0.9096]


Train Loss: 0.0616, Acc: 0.9823
Val Loss: 0.3763, Acc: 0.9096
Updated LR: 0.000100

Fine-tuning Epoch 10/10
Current LR: 0.000100


Training: 100%|██████████| 613/613 [19:21<00:00,  1.90s/batch, loss=0.0508, acc=0.9866]
Validating: 100%|██████████| 175/175 [01:31<00:00,  1.92batch/s, val_loss=0.3898, val_acc=0.9068]

Train Loss: 0.0508, Acc: 0.9866
Val Loss: 0.3898, Acc: 0.9068
Updated LR: 0.000100
✅ Fine-tuning completed. Best validation acc: 0.9104



