In [10]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader
from tqdm import tqdm
from torch.optim.lr_scheduler import CyclicLR

In [11]:
# Enable CuDNN optimizations
torch.backends.cudnn.benchmark = True

In [12]:
# Paths
data_dir = "../FBMM/Unsplitted_Ready_Sets/set_01_class_balanced_augs_applied_splitted"
model_save_path = "./models/efficientnet_b0_emotion_model.pth"
checkpoint_path = "training_checkpoint.pth"

# Configuration
batch_size = 64
accumulation_steps = 4
num_epochs = 50
learning_rate = 1e-4
num_classes = 7  # Number of emotion categories
patience = 5  # Early stopping patience
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Emotion categories
emotion_classes = ["Anger", "Disgust", "Fear", "Happy", "Neutral", "Sad", "Surprise"]

Using device: cuda


In [13]:
# Early Stopping Class
class EarlyStopping:
    def __init__(self, patience=3, delta=0.01):
        self.patience = patience
        self.delta = delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

In [14]:
# Data Prepare
def prepare_data_loaders(data_dir, batch_size):
    transform_train = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # RGB normalization
    ])
    
    transform_val_test = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # RGB normalization
    ])

    print("Loading datasets...")
    train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=transform_train)
    val_dataset = datasets.ImageFolder(os.path.join(data_dir, "val"), transform=transform_val_test)
    test_dataset = datasets.ImageFolder(os.path.join(data_dir, "test"), transform=transform_val_test)
    print("Datasets loaded successfully.")

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True, prefetch_factor=2)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True, prefetch_factor=2)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True, prefetch_factor=2)

    return train_loader, val_loader, test_loader

In [15]:
# Load EfficientNet-B0 with ImageNet weights
model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1)

# Freeze 80% of the network, train last 20% + last classifier layer
num_layers = len(list(model.features.children()))
trainable_layers = int(num_layers * 0.2)  # 20% of the layers

for i, child in enumerate(model.features.children()):
    if i < num_layers - trainable_layers:  # Freeze first 80% of layers
        for param in child.parameters():
            param.requires_grad = False

# Unlock classifier for training
model.classifier = nn.Sequential(
    nn.Dropout(p=0.6),
    nn.Linear(model.classifier[1].in_features, num_classes),
)
model.classifier[1].requires_grad = True  # Ensure classifier is trainable

model = torch.compile(model)

In [16]:
# Loaders and Criterion
train_loader, val_loader, test_loader = prepare_data_loaders(data_dir, batch_size)
criterion = nn.CrossEntropyLoss()  # Using standard cross-entropy loss
optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, weight_decay=1e-3)
scheduler = CyclicLR(optimizer, base_lr=1e-5, max_lr=1e-3, step_size_up=2000, mode='triangular')


Loading datasets...
Datasets loaded successfully.


In [17]:
# Training Function
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs, model_save_path, checkpoint_path):
    best_val_loss = float("inf")
    early_stopping = EarlyStopping(patience=patience)

    start_epoch = 0
    if os.path.exists(checkpoint_path):
        checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(checkpoint["model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
        scheduler.load_state_dict(checkpoint["scheduler_state_dict"])
        start_epoch = checkpoint["epoch"] + 1
        print(f"Resuming training from epoch {start_epoch}.")

    for epoch in range(start_epoch, num_epochs):
        model.train()
        train_loss = 0.0
        print(f"\nEpoch {epoch + 1}/{num_epochs}: Training...")
        optimizer.zero_grad()
        for i, (inputs, labels) in enumerate(tqdm(train_loader, desc="Training Batches", leave=False)):
            inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            outputs = model(inputs)
            loss = criterion(outputs, labels) / accumulation_steps
            loss.backward()
            train_loss += loss.item()
            if (i + 1) % accumulation_steps == 0 or (i + 1) == len(train_loader):
                optimizer.step()
                scheduler.step()
                optimizer.zero_grad()
        train_loss /= len(train_loader)

        model.eval()
        val_loss, correct, total = 0.0, 0, 0
        print(f"Epoch {epoch + 1}/{num_epochs}: Validating...")
        with torch.no_grad():
            for inputs, labels in tqdm(val_loader, desc="Validation Batches", leave=False):
                inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        val_loss /= len(val_loader)
        val_accuracy = correct / total

        print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, "
              f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy * 100:.2f}%")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), model_save_path)
            print(f"Model saved at epoch {epoch + 1} with Val Accuracy: {val_accuracy * 100:.2f}%")

        if early_stopping(val_loss):
            print("Early stopping triggered.")
            break

train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs, model_save_path, checkpoint_path)


Epoch 1/50: Training...


                                                          

TorchRuntimeError: Failed running call_function <function batch_norm at 0x0000021D2A254160>(*(FakeTensor(..., device='cuda:0', size=(64, 32, 112, 112)), FakeTensor(..., size=(32,)), FakeTensor(..., size=(32,)), Parameter(FakeTensor(..., size=(32,))), Parameter(FakeTensor(..., size=(32,))), True, 0.1, 1e-05), **{}):
Unhandled FakeTensor Device Propagation for aten.add.Tensor, found two different devices cuda:0, cpu

from user code:
   File "c:\Users\Tuf\anaconda3\envs\DeepLearn\lib\site-packages\torchvision\models\efficientnet.py", line 343, in forward
    return self._forward_impl(x)
  File "c:\Users\Tuf\anaconda3\envs\DeepLearn\lib\site-packages\torchvision\models\efficientnet.py", line 333, in _forward_impl
    x = self.features(x)
  File "c:\Users\Tuf\anaconda3\envs\DeepLearn\lib\site-packages\torch\nn\modules\container.py", line 250, in forward
    input = module(input)
  File "c:\Users\Tuf\anaconda3\envs\DeepLearn\lib\site-packages\torch\nn\modules\container.py", line 250, in forward
    input = module(input)
  File "c:\Users\Tuf\anaconda3\envs\DeepLearn\lib\site-packages\torch\nn\modules\batchnorm.py", line 193, in forward
    return F.batch_norm(

Set TORCH_LOGS="+dynamo" and TORCHDYNAMO_VERBOSE=1 for more information


You can suppress this exception and fall back to eager by setting:
    import torch._dynamo
    torch._dynamo.config.suppress_errors = True


In [None]:
# Testing Function
def test_model(model, test_loader, emotion_classes, model_save_path):
    model.load_state_dict(torch.load(model_save_path))
    model.eval()
    correct = [0] * len(emotion_classes)
    total = [0] * len(emotion_classes)
    print("Testing the model...")
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc="Testing Progress"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            for i in range(len(labels)):
                label = labels[i].item()
                total[label] += 1
                correct[label] += (predicted[i] == label).item()

    overall_accuracy = sum(correct) / sum(total)
    print(f"Overall Test Accuracy: {overall_accuracy * 100:.2f}%")

test_model(model, test_loader, emotion_classes, model_save_path)