In [2]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, WeightedRandomSampler
import numpy as np
from tqdm import tqdm
from torchvision.models.efficientnet import EfficientNet_B2_Weights

# Paths
data_dir = "../FBMM/test"
train_dir = os.path.join(data_dir, "train")
val_dir = os.path.join(data_dir, "val")
test_dir = os.path.join(data_dir, "test")
model_save_path = "./models/optimized_efficientnet_b2_emotion_model.pth"

# Configuration
batch_size = 32  # ✅ Reduced for better generalization
num_epochs = 50  # ✅ Increased to 50 with early stopping
initial_lr = 1e-3
weight_decay = 1e-4  # ✅ L2 Regularization
num_classes = 7
img_height, img_width = 260, 260
seed = 42  # For reproducibility
accumulation_steps = 2  # ✅ Gradient accumulation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Emotion categories
emotion_classes = ["Anger", "Disgust", "Fear", "Happy", "Neutral", "Sad", "Surprise"]

# Data Augmentation & Normalization
weights = EfficientNet_B2_Weights.IMAGENET1K_V1
transform = transforms.Compose([
    transforms.Resize((img_height, img_width)),
    transforms.ToTensor(),
    weights.transforms()
])

# Load Datasets
train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
val_dataset = datasets.ImageFolder(root=val_dir, transform=transform)
test_dataset = datasets.ImageFolder(root=test_dir, transform=transform)

# Compute Class Weights
def compute_class_weights(dataset, num_classes):
    labels = np.array([label for _, label in dataset.samples])
    class_counts = np.bincount(labels, minlength=num_classes)
    class_weights = 1.0 / (class_counts + 1e-6)
    class_weights /= class_weights.sum()
    return torch.tensor(class_weights, dtype=torch.float32).to(device)

class_weights = compute_class_weights(train_dataset, num_classes)

# Data Loaders with Optimized Multi-Processing
def get_sampler(dataset):
    labels = np.array([label for _, label in dataset.samples])
    class_sample_counts = np.bincount(labels)
    weights = 1.0 / (class_sample_counts[labels] + 1e-6)
    return WeightedRandomSampler(weights, len(weights))

train_loader = DataLoader(
    train_dataset, batch_size=batch_size, sampler=get_sampler(train_dataset),
    num_workers=0, pin_memory=True  # 🚀 Fix for Windows multiprocessing issue
)

val_loader = DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False,
    num_workers=0, pin_memory=True
)

test_loader = DataLoader(
    test_dataset, batch_size=batch_size, shuffle=False,
    num_workers=0, pin_memory=True
)

# Load Pretrained EfficientNet Model & Fine-Tuning
def load_model(num_classes):
    print("Loading and configuring the model...")
    
    model = models.efficientnet_b2(weights=weights)  # ✅ Using ImageNet weights
    model = model.to(memory_format=torch.channels_last)  # ✅ Optimize memory format for better GPU efficiency

    # Freeze all layers initially
    for param in model.parameters():
        param.requires_grad = False

    # Unlock last 20% of convolutional layers + classifier head
    total_layers = len(list(model.features.children()))
    fine_tune_layers = int(total_layers * 0.2)

    for layer in list(model.features.children())[-fine_tune_layers:]:
        for param in layer.parameters():
            param.requires_grad = True

    # Modify classifier head
    model.classifier = nn.Sequential(
        nn.Dropout(0.6),
        nn.Linear(model.classifier[1].in_features, num_classes)
    )

    # Ensure classifier is trainable
    for param in model.classifier.parameters():
        param.requires_grad = True

    return model.to(device)

# Load model
model = load_model(num_classes)

# Loss & Optimizer
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=initial_lr, weight_decay=weight_decay)  # ✅ AdamW with L2 regularization

# ✅ Learning Rate Warm-Up & Scheduling
scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=initial_lr, epochs=num_epochs, steps_per_epoch=len(train_loader), pct_start=0.1)

# ✅ Enable Mixed Precision Training
scaler = torch.amp.GradScaler(device="cuda")

# Training Loop with Early Stopping & Optimizations
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs, patience=7):
    best_val_loss = np.inf
    epochs_no_improve = 0

    for epoch in range(1, num_epochs + 1):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for i, (images, labels) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs}")):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            with torch.cuda.amp.autocast():  # ✅ Mixed Precision Training
                outputs = model(images)
                loss = criterion(outputs, labels) / accumulation_steps  # ✅ Gradient Accumulation

            scaler.scale(loss).backward()

            if (i + 1) % accumulation_steps == 0:  # ✅ Update weights after accumulation steps
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()
                scheduler.step()  # ✅ Apply LR warm-up & decay

            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

        train_loss = running_loss / total
        train_acc = correct / total

        # Validation
        model.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * images.size(0)
                _, predicted = torch.max(outputs, 1)
                val_correct += (predicted == labels).sum().item()
                val_total += labels.size(0)

        val_loss /= val_total
        val_acc = val_correct / val_total

        print(f"\nEpoch {epoch}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

        # Early Stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), model_save_path)
            print("Model Saved!")
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print("Early stopping triggered!")
                break

# Train the model
train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs)

# Evaluation on Test Set
def evaluate_model(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total
    print(f"Test Accuracy: {accuracy:.4f}")

# Load best model for testing
model.load_state_dict(torch.load(model_save_path))
evaluate_model(model, test_loader)

Loading and configuring the model...


  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():  # ✅ Mixed Precision Training
Epoch 1/50: 100%|██████████| 5/5 [00:06<00:00,  1.34s/it]



Epoch 1: Train Loss: 0.9867, Train Acc: 0.1286 | Val Loss: 1.9407, Val Acc: 0.1500
Model Saved!


Epoch 2/50: 100%|██████████| 5/5 [00:06<00:00,  1.31s/it]



Epoch 2: Train Loss: 1.0071, Train Acc: 0.1071 | Val Loss: 1.9424, Val Acc: 0.1643


Epoch 3/50: 100%|██████████| 5/5 [00:06<00:00,  1.30s/it]



Epoch 3: Train Loss: 0.9697, Train Acc: 0.1857 | Val Loss: 1.9444, Val Acc: 0.1571


Epoch 4/50: 100%|██████████| 5/5 [00:06<00:00,  1.29s/it]



Epoch 4: Train Loss: 0.9936, Train Acc: 0.1143 | Val Loss: 1.9376, Val Acc: 0.1500
Model Saved!


Epoch 5/50: 100%|██████████| 5/5 [00:06<00:00,  1.30s/it]



Epoch 5: Train Loss: 0.9764, Train Acc: 0.1786 | Val Loss: 1.9313, Val Acc: 0.1500
Model Saved!


Epoch 6/50: 100%|██████████| 5/5 [00:06<00:00,  1.32s/it]



Epoch 6: Train Loss: 0.9527, Train Acc: 0.1857 | Val Loss: 1.9214, Val Acc: 0.1857
Model Saved!


Epoch 7/50: 100%|██████████| 5/5 [00:06<00:00,  1.31s/it]



Epoch 7: Train Loss: 0.9390, Train Acc: 0.2143 | Val Loss: 1.9120, Val Acc: 0.1857
Model Saved!


Epoch 8/50: 100%|██████████| 5/5 [00:06<00:00,  1.30s/it]



Epoch 8: Train Loss: 0.8950, Train Acc: 0.3786 | Val Loss: 1.9060, Val Acc: 0.1786
Model Saved!


Epoch 9/50: 100%|██████████| 5/5 [00:06<00:00,  1.27s/it]



Epoch 9: Train Loss: 0.8917, Train Acc: 0.3357 | Val Loss: 1.8966, Val Acc: 0.2071
Model Saved!


Epoch 10/50: 100%|██████████| 5/5 [00:06<00:00,  1.29s/it]



Epoch 10: Train Loss: 0.8798, Train Acc: 0.3714 | Val Loss: 1.8898, Val Acc: 0.2143
Model Saved!


Epoch 11/50: 100%|██████████| 5/5 [00:06<00:00,  1.27s/it]



Epoch 11: Train Loss: 0.8289, Train Acc: 0.5000 | Val Loss: 1.8833, Val Acc: 0.2000
Model Saved!


Epoch 12/50: 100%|██████████| 5/5 [00:06<00:00,  1.31s/it]



Epoch 12: Train Loss: 0.8131, Train Acc: 0.4786 | Val Loss: 1.8780, Val Acc: 0.2071
Model Saved!


Epoch 13/50: 100%|██████████| 5/5 [00:06<00:00,  1.29s/it]



Epoch 13: Train Loss: 0.7528, Train Acc: 0.6143 | Val Loss: 1.8725, Val Acc: 0.1929
Model Saved!


Epoch 14/50: 100%|██████████| 5/5 [00:06<00:00,  1.32s/it]



Epoch 14: Train Loss: 0.7538, Train Acc: 0.5857 | Val Loss: 1.8666, Val Acc: 0.2286
Model Saved!


Epoch 15/50: 100%|██████████| 5/5 [00:06<00:00,  1.34s/it]



Epoch 15: Train Loss: 0.6885, Train Acc: 0.6929 | Val Loss: 1.8605, Val Acc: 0.2643
Model Saved!


Epoch 16/50: 100%|██████████| 5/5 [00:06<00:00,  1.31s/it]



Epoch 16: Train Loss: 0.6596, Train Acc: 0.6786 | Val Loss: 1.8510, Val Acc: 0.2929
Model Saved!


Epoch 17/50: 100%|██████████| 5/5 [00:06<00:00,  1.29s/it]



Epoch 17: Train Loss: 0.6608, Train Acc: 0.6714 | Val Loss: 1.8420, Val Acc: 0.3143
Model Saved!


Epoch 18/50: 100%|██████████| 5/5 [00:06<00:00,  1.28s/it]



Epoch 18: Train Loss: 0.6323, Train Acc: 0.7000 | Val Loss: 1.8321, Val Acc: 0.3071
Model Saved!


Epoch 19/50: 100%|██████████| 5/5 [00:06<00:00,  1.28s/it]



Epoch 19: Train Loss: 0.5588, Train Acc: 0.8286 | Val Loss: 1.8178, Val Acc: 0.3143
Model Saved!


Epoch 20/50: 100%|██████████| 5/5 [00:06<00:00,  1.27s/it]



Epoch 20: Train Loss: 0.5289, Train Acc: 0.7929 | Val Loss: 1.8071, Val Acc: 0.3214
Model Saved!


Epoch 21/50: 100%|██████████| 5/5 [00:06<00:00,  1.30s/it]



Epoch 21: Train Loss: 0.4862, Train Acc: 0.8429 | Val Loss: 1.7999, Val Acc: 0.3429
Model Saved!


Epoch 22/50: 100%|██████████| 5/5 [00:06<00:00,  1.27s/it]



Epoch 22: Train Loss: 0.5443, Train Acc: 0.7571 | Val Loss: 1.7929, Val Acc: 0.3286
Model Saved!


Epoch 23/50: 100%|██████████| 5/5 [00:06<00:00,  1.27s/it]



Epoch 23: Train Loss: 0.4901, Train Acc: 0.7929 | Val Loss: 1.7915, Val Acc: 0.3214
Model Saved!


Epoch 24/50: 100%|██████████| 5/5 [00:06<00:00,  1.28s/it]



Epoch 24: Train Loss: 0.4713, Train Acc: 0.8214 | Val Loss: 1.7925, Val Acc: 0.2929


Epoch 25/50: 100%|██████████| 5/5 [00:06<00:00,  1.27s/it]



Epoch 25: Train Loss: 0.4183, Train Acc: 0.8857 | Val Loss: 1.7883, Val Acc: 0.2929
Model Saved!


Epoch 26/50: 100%|██████████| 5/5 [00:06<00:00,  1.28s/it]



Epoch 26: Train Loss: 0.4408, Train Acc: 0.8357 | Val Loss: 1.7951, Val Acc: 0.3071


Epoch 27/50: 100%|██████████| 5/5 [00:06<00:00,  1.28s/it]



Epoch 27: Train Loss: 0.3916, Train Acc: 0.8786 | Val Loss: 1.7846, Val Acc: 0.2857
Model Saved!


Epoch 28/50: 100%|██████████| 5/5 [00:06<00:00,  1.30s/it]



Epoch 28: Train Loss: 0.3694, Train Acc: 0.8929 | Val Loss: 1.7860, Val Acc: 0.2857


Epoch 29/50: 100%|██████████| 5/5 [00:06<00:00,  1.28s/it]



Epoch 29: Train Loss: 0.3389, Train Acc: 0.9286 | Val Loss: 1.7959, Val Acc: 0.2857


Epoch 30/50: 100%|██████████| 5/5 [00:06<00:00,  1.28s/it]



Epoch 30: Train Loss: 0.3327, Train Acc: 0.9214 | Val Loss: 1.7980, Val Acc: 0.2929


Epoch 31/50: 100%|██████████| 5/5 [00:06<00:00,  1.28s/it]



Epoch 31: Train Loss: 0.3076, Train Acc: 0.9714 | Val Loss: 1.7901, Val Acc: 0.3000


Epoch 32/50: 100%|██████████| 5/5 [00:06<00:00,  1.28s/it]



Epoch 32: Train Loss: 0.3178, Train Acc: 0.9357 | Val Loss: 1.7867, Val Acc: 0.3071


Epoch 33/50: 100%|██████████| 5/5 [00:06<00:00,  1.28s/it]



Epoch 33: Train Loss: 0.3152, Train Acc: 0.9214 | Val Loss: 1.7810, Val Acc: 0.2929
Model Saved!


Epoch 34/50: 100%|██████████| 5/5 [00:06<00:00,  1.28s/it]



Epoch 34: Train Loss: 0.3002, Train Acc: 0.9357 | Val Loss: 1.7835, Val Acc: 0.3071


Epoch 35/50: 100%|██████████| 5/5 [00:06<00:00,  1.28s/it]



Epoch 35: Train Loss: 0.2551, Train Acc: 0.9500 | Val Loss: 1.7832, Val Acc: 0.2929


Epoch 36/50: 100%|██████████| 5/5 [00:06<00:00,  1.29s/it]



Epoch 36: Train Loss: 0.2616, Train Acc: 0.9786 | Val Loss: 1.7870, Val Acc: 0.2857


Epoch 37/50: 100%|██████████| 5/5 [00:06<00:00,  1.28s/it]



Epoch 37: Train Loss: 0.2959, Train Acc: 0.8929 | Val Loss: 1.7959, Val Acc: 0.2714


Epoch 38/50: 100%|██████████| 5/5 [00:06<00:00,  1.29s/it]



Epoch 38: Train Loss: 0.2656, Train Acc: 0.9429 | Val Loss: 1.8027, Val Acc: 0.2929


Epoch 39/50:  20%|██        | 1/5 [00:03<00:12,  3.06s/it]


KeyboardInterrupt: 