### Data Loading Pipeline

In [None]:
import os
import numpy as np
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms


def toTensor(arr, long=False):     # utility function to convert numpy array to torch tensor
    # Convert numpy array (possibly string-typed) to torch tensor
    if arr.dtype.kind in {'U', 'S'}:  # string
        arr = arr.astype(float)
    if arr.shape == ():  # scalar
        val = arr.item()
        return torch.tensor(val, dtype=torch.long if long else torch.float32)
    else:  # vector/array
        return torch.tensor(arr, dtype=torch.long if long else torch.float32)


class FacialDataset(Dataset):       # Custom Dataset class for facial images and annotations
    def __init__(self, imgDir, anno_dir, img_size=224, transform=None):
        self.img_dir = imgDir
        self.anno_dir = anno_dir
        self.transform = transform if transform else transforms.Compose([
            transforms.Resize((img_size, img_size)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])
        # list all images
        self.image_files = sorted([
            f for f in os.listdir(imgDir) if f.endswith(".jpg")
        ], key=lambda x: int(x.split(".")[0]))  # sort by numeric order

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):     # get item by index
        img_name = self.image_files[idx]
        img_path = os.path.join(self.img_dir, img_name)
        img = Image.open(img_path).convert("RGB")

        base_id = os.path.splitext(img_name)[0]  # e.g. "0"
        exp = np.load(os.path.join(self.anno_dir, f"{base_id}_exp.npy"))
        val = np.load(os.path.join(self.anno_dir, f"{base_id}_val.npy"))
        aro = np.load(os.path.join(self.anno_dir, f"{base_id}_aro.npy"))
        lnd = np.load(os.path.join(self.anno_dir, f"{base_id}_lnd.npy"))

        # conversions
        exp_t = toTensor(exp, long=True)
        val_t = toTensor(val)
        aro_t = toTensor(aro)

        lnd = lnd.astype(float).reshape(-1, 2)  # (68, 2)
        lnd_t = torch.tensor(lnd, dtype=torch.float32)

        img = self.transform(img)

        return img, exp_t, val_t, aro_t, lnd_t


def getDataloader(root="Dataset", batch_size=32, img_size=224, num_workers=0):     # utility function to create DataLoader
    imgDir = os.path.join(root, "images")
    annoDir = os.path.join(root, "annotations")
    dataset = FacialDataset(imgDir, annoDir, img_size=img_size)
    loader = DataLoader(dataset, batch_size=batch_size,
                        shuffle=True, num_workers=num_workers, pin_memory=True)
    return loader


### Architecture 1 - ConvNeXt 

In [None]:
# ConvNeXt Training Pipeline 
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from torch.utils.data import random_split, DataLoader
from torchvision import transforms

def getConvnextModel(num_classes, freezeBackbone=True):      # Load pre-trained ConvNeXt and modify for our task
    model = models.convnext_tiny(weights=models.ConvNeXt_Tiny_Weights.IMAGENET1K_V1)    # load pre-trained ConvNeXt-Tiny

    inFeatures = model.classifier[2].in_features
    model.classifier[2] = nn.Linear(inFeatures, num_classes)

    if freezeBackbone:  # freeze backbone at start
        for param in model.features.parameters():
            param.requires_grad = False

    return model

# Training function
def trainModel(model, train_loader, val_loader, criterion, optimizer, scheduler, device, num_epochs=30, unfreezeEpoch=5):
    bestValAcc = 0.0

    for epoch in range(num_epochs):

        # Unfreeze backbone after `unfreeze_epoch`
        if epoch == unfreezeEpoch:
            for param in model.features.parameters():
                param.requires_grad = True
            print("Backbone unfrozen – now fine-tuning full model")

        # Training
        model.train()
        runningLoss, correct, total = 0.0, 0, 0
        for imgs, exp, _, _, _ in train_loader:   # only using expression labels
            imgs, exp = imgs.to(device), exp.to(device)

            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, exp)
            loss.backward()
            optimizer.step()

            runningLoss += loss.item() * imgs.size(0)
            _, preds = outputs.max(1)
            correct += preds.eq(exp).sum().item()
            total += exp.size(0)

        trainLoss = runningLoss / total
        trainAcc = correct / total

        # Validation
        model.eval()
        valLoss, valCorrect, valTotal = 0.0, 0, 0
        with torch.no_grad():
            for imgs, exp, _, _, _ in val_loader:
                imgs, exp = imgs.to(device), exp.to(device)
                outputs = model(imgs)
                loss = criterion(outputs, exp)

                valLoss += loss.item() * imgs.size(0)
                _, preds = outputs.max(1)
                valCorrect += preds.eq(exp).sum().item()
                valTotal += exp.size(0)

        valLoss /= valTotal
        valAcc = valCorrect / valTotal

        scheduler.step(valAcc)

        print(f"Epoch {epoch+1}: Train Loss={trainLoss:.4f}, Train Acc={trainAcc:.4f}, "
              f"Val Loss={valLoss:.4f}, Val Acc={valAcc:.4f}")
        
        # Save the model if validation accuracy improves
        if valAcc > bestValAcc:
            bestValAcc = valAcc
            torch.save({
                "epoch": epoch+1,
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
                "val_acc": valAcc
            }, "best_convnext.pth")
            print(f"Model saved at epoch {epoch+1} with Val Acc={valAcc:.4f}")


if __name__ == "__main__":      # Main execution block

    trainTfms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),  
    transforms.ToTensor(),
])      # Data augmentation for training

    valTfms = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
    ])      # Transformations for validation

    # Load dataset 
    fullDataset = FacialDataset(
        img_dir="Dataset/images",
        anno_dir="Dataset/annotations",
        img_size=224,
        transform=None   
    )

    # Train/validation split (80/20)
    trainSize = int(0.8 * len(fullDataset))
    valSize = len(fullDataset) - trainSize
    trainDataset, valDataset = random_split(fullDataset, [trainSize, valSize])

    trainLoader = DataLoader(trainDataset, batch_size=32, shuffle=True, num_workers=0, pin_memory=True)
    valLoader = DataLoader(valDataset, batch_size=32, shuffle=False, num_workers=0, pin_memory=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    numClasses = 8
    model = getConvnextModel(numClasses, freezeBackbone=True).to(device)    # load model

    # Loss, optimizer, scheduler
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=5e-3)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", factor=0.5, patience=2)

    # Train ConvNeXt
    trainModel(model, trainLoader, valLoader, criterion, optimizer, scheduler, device,
                num_epochs=30, unfreezeEpoch=5)


Epoch 1: Train Loss=2.0744, Train Acc=0.1594, Val Loss=2.0610, Val Acc=0.1625
Model saved at epoch 1 with Val Acc=0.1625
Epoch 2: Train Loss=2.0259, Train Acc=0.2088, Val Loss=2.0287, Val Acc=0.1975
Model saved at epoch 2 with Val Acc=0.1975
Epoch 3: Train Loss=1.9947, Train Acc=0.2398, Val Loss=2.0064, Val Acc=0.2150
Model saved at epoch 3 with Val Acc=0.2150
Epoch 4: Train Loss=1.9695, Train Acc=0.2523, Val Loss=1.9864, Val Acc=0.2225
Model saved at epoch 4 with Val Acc=0.2225
Epoch 5: Train Loss=1.9485, Train Acc=0.2651, Val Loss=1.9755, Val Acc=0.2288
Model saved at epoch 5 with Val Acc=0.2288
Backbone unfrozen – now fine-tuning full model
Epoch 6: Train Loss=1.7942, Train Acc=0.3420, Val Loss=1.6495, Val Acc=0.3962
Model saved at epoch 6 with Val Acc=0.3962
Epoch 7: Train Loss=1.3939, Train Acc=0.5746, Val Loss=1.6181, Val Acc=0.4412
Model saved at epoch 7 with Val Acc=0.4412
Epoch 8: Train Loss=0.9980, Train Acc=0.7906, Val Loss=1.6427, Val Acc=0.4462
Model saved at epoch 8 with 

### Architecture 2 - EfficientNetV2

In [None]:
# EfficientNetV2 Training Pipeline 
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from torch.utils.data import random_split, DataLoader

# Model definition
def getEfficientnetv2Model(num_classes, freezeBackbone=True):
    model = models.efficientnet_v2_s(weights=models.EfficientNet_V2_S_Weights.IMAGENET1K_V1)
    inFeatures = model.classifier[1].in_features
    model.classifier = nn.Sequential(
        nn.Dropout(0.4),
        nn.Linear(inFeatures, num_classes)
    )
    if freezeBackbone:
        for param in model.features.parameters():
            param.requires_grad = False
    return model

# Training loop
def trainModel(model, train_loader, val_loader, criterion, optimizer, scheduler, device, num_epochs=30, unfreeze_epoch=5):
    bestValAcc = 0.0
    for epoch in range(num_epochs):
        if epoch == unfreeze_epoch:
            for param in model.features.parameters():       # unfreeze backbone
                param.requires_grad = True
            print("Backbone unfrozen – now fine-tuning full model")

        # Training
        model.train()
        runningLoss, correct, total = 0.0, 0, 0
        for imgs, exp, _, _, _ in train_loader:     # only using expression labels
            imgs, exp = imgs.to(device), exp.to(device)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, exp)
            loss.backward()
            optimizer.step()
            runningLoss += loss.item() * imgs.size(0)
            _, preds = outputs.max(1)
            correct += preds.eq(exp).sum().item()
            total += exp.size(0)

        trainLoss = runningLoss / total
        trainAcc = correct / total

        # Validation
        model.eval()        # evaluate mode
        valLoss, valCorrect, valTotal = 0.0, 0, 0
        with torch.no_grad():       # no gradient computation
            for imgs, exp, _, _, _ in val_loader:
                imgs, exp = imgs.to(device), exp.to(device)
                outputs = model(imgs)
                loss = criterion(outputs, exp)
                valLoss += loss.item() * imgs.size(0)
                _, preds = outputs.max(1)
                valCorrect += preds.eq(exp).sum().item()
                valTotal += exp.size(0)

        valLoss /= valTotal
        valAcc = valCorrect / valTotal
        scheduler.step(valAcc)

        print(f"Epoch {epoch+1}: Train Loss={trainLoss:.4f}, Train Acc={trainAcc:.4f}, "
              f"Val Loss={valLoss:.4f}, Val Acc={valAcc:.4f}")

        if valAcc > bestValAcc:
            bestValAcc = valAcc
            torch.save(model.state_dict(), "efficientnetv2_best.pth")
            print(f"Model saved at epoch {epoch+1} with Val Acc={valAcc:.4f}")

if __name__ == "__main__":
    # Load dataset from the data loading pipeline
    fullDataset = FacialDataset(
        img_dir="Dataset/images",
        anno_dir="Dataset/annotations",
        img_size=224,
        transform=None   
    )

    # Train/validation split
    trainSize = int(0.8 * len(fullDataset))       
    valSize = len(fullDataset) - trainSize
    trainDataset, valDataset = random_split(fullDataset, [trainSize, valSize])
    trainLoader = DataLoader(trainDataset, batch_size=32, shuffle=True, num_workers=0, pin_memory=True)
    valLoader = DataLoader(valDataset, batch_size=32, shuffle=False, num_workers=0, pin_memory=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    numClasses = 8
    model = getEfficientnetv2Model(numClasses, freezeBackbone=True).to(device)      # load model

    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=5e-3)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", factor=0.5, patience=3)

    trainModel(model, trainLoader, valLoader, criterion, optimizer, scheduler, device,
                num_epochs=30, unfreeze_epoch=5)


Downloading: "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth" to C:\Users\HP/.cache\torch\hub\checkpoints\efficientnet_v2_s-dd5fe13b.pth


100%|██████████| 82.7M/82.7M [00:37<00:00, 2.31MB/s]


Epoch 1: Train Loss=2.0964, Train Acc=0.1357, Val Loss=2.0790, Val Acc=0.1500
Model saved at epoch 1 with Val Acc=0.1500
Epoch 2: Train Loss=2.0692, Train Acc=0.1694, Val Loss=2.0635, Val Acc=0.1737
Model saved at epoch 2 with Val Acc=0.1737
Epoch 3: Train Loss=2.0460, Train Acc=0.1841, Val Loss=2.0475, Val Acc=0.1913
Model saved at epoch 3 with Val Acc=0.1913
Epoch 4: Train Loss=2.0389, Train Acc=0.2066, Val Loss=2.0355, Val Acc=0.2150
Model saved at epoch 4 with Val Acc=0.2150
Epoch 5: Train Loss=2.0211, Train Acc=0.2191, Val Loss=2.0244, Val Acc=0.2137
Backbone unfrozen – now fine-tuning full model
Epoch 6: Train Loss=1.8740, Train Acc=0.3007, Val Loss=1.7473, Val Acc=0.3663
Model saved at epoch 6 with Val Acc=0.3663
Epoch 7: Train Loss=1.4864, Train Acc=0.5139, Val Loss=1.6690, Val Acc=0.4263
Model saved at epoch 7 with Val Acc=0.4263
Epoch 8: Train Loss=1.1102, Train Acc=0.7165, Val Loss=1.7795, Val Acc=0.4263
Epoch 9: Train Loss=0.8019, Train Acc=0.8809, Val Loss=1.8811, Val Acc=