In [19]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, WeightedRandomSampler
import numpy as np
from tqdm import tqdm
from torchvision.models.efficientnet import EfficientNet_B0_Weights

In [20]:
# Paths
data_dir = "../FBMM/Unsplitted_Ready_Sets/set_01_class_balanced_augs_applied_splitted"
train_dir = os.path.join(data_dir, "train")
val_dir = os.path.join(data_dir, "val")
test_dir = os.path.join(data_dir, "test")
model_save_path = "./models/optimized_efficientnet_b0_emotion_model.pth"

In [21]:
# Configuration
batch_size = 16  # Increased for better GPU utilization
num_epochs = 50
initial_lr = 1e-4
weight_decay = 1e-4
num_classes = 7
img_height, img_width = 224, 224
seed = 42
accumulation_steps = 4  # Reduced to prevent NaNs
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [22]:
# Emotion categories
emotion_classes = ["Anger", "Disgust", "Fear", "Happy", "Neutral", "Sad", "Surprise"]

# ✅ Use EfficientNet-B0 Weights
weights = EfficientNet_B0_Weights.IMAGENET1K_V1

In [23]:
# ✅ Improved Data Augmentation
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(0.2, 0.2, 0.2, 0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transform_val_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [24]:
# Load Datasets
train_dataset = datasets.ImageFolder(root=train_dir, transform=transform_train)
val_dataset = datasets.ImageFolder(root=val_dir, transform=transform_val_test)
test_dataset = datasets.ImageFolder(root=test_dir, transform=transform_val_test)

In [25]:
# ✅ Compute Class Weights
labels = [label for _, label in train_dataset.samples]
class_counts = np.bincount(labels, minlength=num_classes)
class_weights = 1.0 / (class_counts + 1e-6)
class_weights /= class_weights.sum()
sample_weights = [class_weights[label] for _, label in train_dataset.samples]
sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

# ✅ Create Data Loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, num_workers=8, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True)

In [26]:
# ✅ Load Pretrained EfficientNet-B0 & Fine-Tune
def load_model(num_classes):
    print("Loading and configuring the EfficientNet-B0 model...")

    model = models.efficientnet_b0(weights=weights)
    model = model.to(memory_format=torch.channels_last)

    # Freeze all layers initially
    for param in model.parameters():
        param.requires_grad = False

    # Unlock last 20% of convolutional layers + classifier head
    total_layers = len(list(model.features.children()))
    fine_tune_layers = int(total_layers * 0.35)

    for layer in list(model.features.children())[-fine_tune_layers:]:
        for param in layer.parameters():
            param.requires_grad = True

    # Modify classifier head
    model.classifier = nn.Sequential(
        nn.Dropout(0.4),  # Reduced dropout for better feature retention
        nn.Linear(model.classifier[1].in_features, num_classes)
    )

    # Ensure classifier is trainable
    for param in model.classifier.parameters():
        param.requires_grad = True

    return model.to(device)

# Load model
model = load_model(num_classes)

Loading and configuring the EfficientNet-B0 model...


In [27]:
# ✅ Label Smoothing to Prevent NaNs
criterion = nn.CrossEntropyLoss(weight=torch.tensor(class_weights, dtype=torch.float32).to(device), label_smoothing=0.1)

# ✅ Use AdamW with Weight Decay
optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=initial_lr, weight_decay=weight_decay)

# ✅ ReduceLROnPlateau for Adaptive Learning Rate
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=3)

# ✅ Enable Mixed Precision Training
scaler = torch.amp.GradScaler(device="cuda")

In [28]:
# ✅ Early Stopping Implementation
class EarlyStopping:
    def __init__(self, patience=7, delta=0.01):
        self.patience = patience
        self.delta = delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

early_stopping = EarlyStopping(patience=5)

In [29]:
# ✅ Training Loop with Full Logging
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs):
    best_val_loss = np.inf

    for epoch in range(1, num_epochs + 1):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for i, (images, labels) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs}")):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            with torch.cuda.amp.autocast():
                outputs = model(images)
                loss = criterion(outputs, labels) / accumulation_steps

            scaler.scale(loss).backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

            if (i + 1) % accumulation_steps == 0:
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()

            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

        train_loss = running_loss / total
        train_acc = correct / total

        # ✅ Validation Step
        model.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * images.size(0)
                _, predicted = torch.max(outputs, 1)
                val_correct += (predicted == labels).sum().item()
                val_total += labels.size(0)

        val_loss /= val_total
        val_acc = val_correct / val_total
        scheduler.step(val_loss)

        print(f"\nEpoch {epoch}: Train Loss: {train_loss:.4f} | Train Accuracy: {train_acc:.4f}")
        print(f"Validation Loss: {val_loss:.4f} | Validation Accuracy: {val_acc:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), model_save_path)
            print("✅ Model Saved!")
            early_stopping.counter = 0
        else:
            early_stopping(val_loss)
            if early_stopping.early_stop:
                print("⏳ Early Stopping Triggered!")
                break

# ✅ Evaluate Model
def evaluate_model(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total
    print(f"\n🎯 Final Test Accuracy: {accuracy:.4f}")

# Train and Evaluate
train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs)
model.load_state_dict(torch.load(model_save_path))
evaluate_model(model, test_loader)

  with torch.cuda.amp.autocast():
Epoch 1/50: 100%|██████████| 7751/7751 [1:07:27<00:00,  1.92it/s]



Epoch 1: Train Loss: 0.4085 | Train Accuracy: 0.3920
Validation Loss: 1.3932 | Validation Accuracy: 0.5291
✅ Model Saved!


Epoch 2/50: 100%|██████████| 7751/7751 [1:01:11<00:00,  2.11it/s]



Epoch 2: Train Loss: 0.3487 | Train Accuracy: 0.5278
Validation Loss: 1.2997 | Validation Accuracy: 0.5781
✅ Model Saved!


Epoch 3/50: 100%|██████████| 7751/7751 [54:51<00:00,  2.36it/s]  



Epoch 3: Train Loss: 0.3336 | Train Accuracy: 0.5591
Validation Loss: 1.2702 | Validation Accuracy: 0.5956
✅ Model Saved!


Epoch 4/50: 100%|██████████| 7751/7751 [54:10<00:00,  2.38it/s]  



Epoch 4: Train Loss: 0.3272 | Train Accuracy: 0.5757
Validation Loss: 1.2587 | Validation Accuracy: 0.5987
✅ Model Saved!


Epoch 5/50:   2%|▏         | 160/7751 [50:28<39:55:01, 18.93s/it]   


RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "c:\Users\Tuf\anaconda3\envs\DeepLearn\lib\site-packages\torch\utils\data\_utils\worker.py", line 351, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "c:\Users\Tuf\anaconda3\envs\DeepLearn\lib\site-packages\torch\utils\data\_utils\fetch.py", line 55, in fetch
    return self.collate_fn(data)
  File "c:\Users\Tuf\anaconda3\envs\DeepLearn\lib\site-packages\torch\utils\data\_utils\collate.py", line 398, in default_collate
    return collate(batch, collate_fn_map=default_collate_fn_map)
  File "c:\Users\Tuf\anaconda3\envs\DeepLearn\lib\site-packages\torch\utils\data\_utils\collate.py", line 211, in collate
    return [
  File "c:\Users\Tuf\anaconda3\envs\DeepLearn\lib\site-packages\torch\utils\data\_utils\collate.py", line 212, in <listcomp>
    collate(samples, collate_fn_map=collate_fn_map)
  File "c:\Users\Tuf\anaconda3\envs\DeepLearn\lib\site-packages\torch\utils\data\_utils\collate.py", line 155, in collate
    return collate_fn_map[elem_type](batch, collate_fn_map=collate_fn_map)
  File "c:\Users\Tuf\anaconda3\envs\DeepLearn\lib\site-packages\torch\utils\data\_utils\collate.py", line 270, in collate_tensor_fn
    storage = elem._typed_storage()._new_shared(numel, device=elem.device)
  File "c:\Users\Tuf\anaconda3\envs\DeepLearn\lib\site-packages\torch\storage.py", line 1180, in _new_shared
    untyped_storage = torch.UntypedStorage._new_shared(
  File "c:\Users\Tuf\anaconda3\envs\DeepLearn\lib\site-packages\torch\storage.py", line 400, in _new_shared
    return cls._new_using_filename_cpu(size)
RuntimeError: Couldn't open shared file mapping: <torch_25752_4176519947_40>, error code: <1455>
