In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("ananthu017/emotion-detection-fer")

print("Path to dataset files:", path)

In [None]:
import os
data_path='/kaggle/input/emotion-detection-fer'
data=os.listdir(data_path)
print(f"Total folders in this Dataset: {len(data)}")
print(f"Files in this Dataset: {data}")

In [None]:
for split in data:
    split_path = os.path.join(data_path, split)
    classes = os.listdir(split_path)
    print(f"{split} classes:", classes)

    counts = {cls: len(os.listdir(os.path.join(split_path, cls))) for cls in classes}
    print(f"{split} samples per class:", counts)

In [None]:
import matplotlib.pyplot as plt
import random
from glob import glob
from PIL import Image

train_dir = os.path.join(data_path, "train")

example_path = glob(os.path.join(data_path, "train", "*", "*.png"))
print("Total training images:", len(example_path))

# 7 Random Images (1 images from each Class)
fig, axes = plt.subplots(1, 7, figsize=(22,8))
for ax in axes:
    img_path = random.choice(example_path)
    label = img_path.split(os.sep)[-2]
    img = Image.open(img_path).convert("L")  # grayscale
    ax.imshow(img, cmap="gray")
    ax.set_title(label)
    ax.axis("off")
plt.show()
print("Image size (W x H):", img.size)

### Data Preprocessing

In [None]:
import torch
import numpy as np
import pandas as pd

from torch.utils.data import Subset, DataLoader, Dataset
import torchvision.datasets as datasets
import torchvision.transforms as transforms

from sklearn.model_selection import StratifiedShuffleSplit

In [None]:
# Transformations
train_tf = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((48,48)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ToTensor()
])

eval_tf = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((48,48)),
    transforms.ToTensor(),
])

# Train Dataset
train_dataset = datasets.ImageFolder(os.path.join(data_path, "train"), transform=train_tf)
print(f"Train set size: {len(train_dataset)}")

# Test Dataset
test_dataset  = datasets.ImageFolder(os.path.join(data_path, "test"), transform=eval_tf)
print(f"Test set size: {len(test_dataset)}")

# Class names
class_names = train_dataset.classes

In [None]:
BATCH_SIZE=64

class OneHotImageFolder(Dataset):
    def __init__(self, image_folder_dataset, num_classes):
        self.dataset = image_folder_dataset
        self.num_classes = num_classes

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image, label = self.dataset[idx]
        # Convert to one-hot
        one_hot_label = torch.nn.functional.one_hot(torch.tensor(label), num_classes=self.num_classes).float()
        return image, one_hot_label

# Number of classes
num_classes = 7

# Wrap train and test datasets
train_dataset_oh = OneHotImageFolder(train_dataset, num_classes=num_classes)
test_dataset_oh  = OneHotImageFolder(test_dataset, num_classes=num_classes)

# Data Loaders
train_loader = DataLoader(train_dataset_oh, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
test_loader  = DataLoader(test_dataset_oh, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

### Model

In [None]:
import torch
import torch.nn as nn

class EmotionManipulator(nn.Module):
    def __init__(self, num_classes=7, dropout_p=0.3):
        super(EmotionManipulator, self).__init__()

        self.features = nn.Sequential(
            # --- Block 1 ---
            nn.Conv2d(1, 32, kernel_size=3, padding=1),   # 48x48 -> 48x48
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),  # 48x48 -> 48x48
            nn.ReLU(),
            # nn.BatchNorm2d(32),
            nn.MaxPool2d(2),                              # 48x48 -> 24x24
            nn.Dropout(p=dropout_p),

            # --- Block 2 ---
            nn.Conv2d(32, 64, kernel_size=3, padding=1),  # 24x24 -> 24x24
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),  # 24x24 -> 24x24
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2),                              # 24x24 -> 12x12
            nn.Dropout(p=dropout_p),

            # --- Block 3 ---
            nn.Conv2d(64, 128, kernel_size=3, padding=1), # 12x12 -> 12x12
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),# 12x12 -> 12x12
            nn.ReLU(),
            # nn.BatchNorm2d(128),
            nn.MaxPool2d(2),                              # 12x12 -> 6x6
            nn.Dropout(p=dropout_p),

            # --- Block 4 ---
            nn.Conv2d(128, 256, kernel_size=3, padding=1),# 6x6 -> 6x6
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),# 6x6 -> 6x6
            nn.ReLU(),
            nn.BatchNorm2d(256),
            nn.MaxPool2d(2),                              # 6x6 -> 3x3
            nn.Dropout(p=dropout_p)
        )

        # --- Fully Connected Layers ---
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 3 * 3, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(p=dropout_p),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x


num_classes = 7
model = EmotionManipulator(num_classes=num_classes)
dummy_input = torch.randn(4, 1, 48, 48)
output = model(dummy_input)
print(output.shape)  # torch.Size([4, 7])

total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params:,}")

In [None]:
import torchvision.models as models

class EmotionManipulatorResnet(nn.Module):
    def __init__(self, num_classes=7, backbone='resnet18', pretrained=True, dropout_p=0.4, fine_tune_layers=1):
        super(EmotionManipulatorResnet, self).__init__()

        # Load pretrained ResNet
        resnet = getattr(models, backbone)(weights='IMAGENET1K_V1' if pretrained else None)

        # Modify first conv layer â†’ grayscale input
        old_weights = resnet.conv1.weight.data
        new_weights = old_weights.mean(dim=1, keepdim=True)
        resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        resnet.conv1.weight.data = new_weights

        # Extract all layers except final FC
        self.feature_extractor = nn.Sequential(*list(resnet.children())[:-1])

        # Freeze most of backbone (optionally unfreeze top few layers)
        for name, param in self.feature_extractor.named_parameters():
            param.requires_grad = False

        if fine_tune_layers > 0:
            # Unfreeze last 'fine_tune_layers' blocks of layer4
            for name, param in list(self.feature_extractor[-1].named_parameters())[-fine_tune_layers:]:
                param.requires_grad = True

        # Richer classifier head
        self.classifier = nn.Sequential(
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(dropout_p),

            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(dropout_p),

            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(dropout_p * 0.8),

            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        with torch.set_grad_enabled(any(p.requires_grad for p in self.feature_extractor.parameters())):
            x = self.feature_extractor(x)
        x = torch.flatten(x, 1)
        out = self.classifier(x)
        return out

num_classes = 7
model = EmotionManipulatorResnet(num_classes=num_classes, fine_tune_layers=2)
dummy_input = torch.randn(4, 1, 48, 48)
output = model(dummy_input)
print(output.shape)  # torch.Size([4, 7])

trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable parameters: {trainable_params:,}")

In [None]:
import torch
import torch.nn as nn

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, dropout_p=0.3, use_batchnorm=True):
        super(ResidualBlock, self).__init__()

        self.conv_block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(inplace=False),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(inplace=False),
        )

        self.bn = nn.BatchNorm2d(out_channels) if use_batchnorm else nn.Identity()
        self.shortcut = (
            nn.Conv2d(in_channels, out_channels, kernel_size=1)
            if in_channels != out_channels else nn.Identity()
        )

        self.pool = nn.MaxPool2d(2)
        self.dropout = nn.Dropout(p=dropout_p)

    def forward(self, x):
        identity = self.shortcut(x)
        out = self.conv_block(x)
        out = self.bn(out)
        out = out + identity
        out = torch.relu(out)
        out = self.pool(out)
        out = self.dropout(out)
        return out



class EmotionManipulatorResidual(nn.Module):
    def __init__(self, num_classes=7, dropout_p=0.3):
        super(EmotionManipulatorResidual, self).__init__()

        # --- Residual Feature Extractor ---
        self.features = nn.Sequential(
            ResidualBlock(1, 32, dropout_p, use_batchnorm=False),   # 48x48 -> 24x24
            ResidualBlock(32, 64, dropout_p, use_batchnorm=True),   # 24x24 -> 12x12
            ResidualBlock(64, 128, dropout_p, use_batchnorm=False), # 12x12 -> 6x6
            ResidualBlock(128, 256, dropout_p, use_batchnorm=True), # 6x6 -> 3x3
        )

        # --- Classifier ---
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 3 * 3, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(p=dropout_p),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x


model = EmotionManipulatorResidual(num_classes=7)
dummy_input = torch.randn(4, 1, 48, 48)
output = model(dummy_input)

print(output.shape)  # torch.Size([4, 7])
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters (Residual): {total_params:,}")

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    pbar = tqdm(train_loader, desc='Training', leave=False)
    for images, labels in pbar:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)  # shape: (batch, num_classes)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

        # Accuracy calculation: compare predicted class with one-hot
        preds = torch.argmax(outputs, dim=1)
        targets = torch.argmax(labels, dim=1)
        correct += (preds == targets).sum().item()
        total += labels.size(0)

        pbar.set_postfix({
            'loss': f"{running_loss/total:.4f}",
            'acc': f"{100.*correct/total:.2f}%"
        })

    epoch_loss = running_loss / total
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

In [None]:
def evaluate(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        pbar = tqdm(val_loader, desc='Evaluating', leave=False)
        for images, labels in pbar:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)

            preds = torch.argmax(outputs, dim=1)
            targets = torch.argmax(labels, dim=1)
            correct += (preds == targets).sum().item()
            total += labels.size(0)

            pbar.set_postfix({
                'loss': f"{running_loss/total:.4f}",
                'acc': f"{100.*correct/total:.2f}%"
            })

    epoch_loss = running_loss / total
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

In [None]:
import os
import torch

def compute_class_weights(data_path: str, split: str = "train") -> torch.Tensor:
    """
    Compute inverse-frequency class weights from dataset folders.

    Args:
        data_path (str): Base path containing 'train' and 'test' folders.
        split (str): Split to compute weights from ('train' recommended).

    Returns:
        torch.Tensor: Normalized class weights (higher for rarer classes).
    """
    split_path = os.path.join(data_path, split)
    classes = sorted(os.listdir(split_path))  # ensures consistent ordering

    # Count samples per class
    class_counts = []
    for cls in classes:
        cls_path = os.path.join(split_path, cls)
        count = len(os.listdir(cls_path))
        class_counts.append(count)

    # Compute inverse-frequency weights
    class_counts = torch.tensor(class_counts, dtype=torch.float)
    weights = 1.0 / class_counts
    weights = weights / weights.sum() * len(classes)  # normalized for stability

    print(f"Class sample counts: {dict(zip(classes, class_counts.tolist()))}")
    print(f"Computed class weights: {weights.tolist()}")
    return weights

In [None]:
def train_model(model, train_loader, test_loader, epochs=10, lr=1e-3, device='cuda', patience=3):
    model = model.to(device)
    data_path = "/kaggle/input/emotion-detection-fer"

    # compute weights dynamically
    class_weights = compute_class_weights(data_path, split="train").to(device)
    criterion = nn.BCEWithLogitsLoss(pos_weight=class_weights)

    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)

    history = {
        'train_loss': [], 'train_acc': [],
        'test_loss': [], 'test_acc': []
    }

    best_acc = 0.0
    best_loss = float('inf')
    patience_counter = 0

    for epoch in range(epochs):
        print(f"\nEpoch [{epoch+1}/{epochs}]")
        print("-"*50)

        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
        test_loss, test_acc = evaluate(model, test_loader, criterion, device)

        print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
        print(f"Test Loss:  {test_loss:.4f} | Test Acc:  {test_acc:.2f}%")

        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['test_loss'].append(test_loss)
        history['test_acc'].append(test_acc)

        # Save best model
        if test_acc > best_acc:
            best_acc = test_acc
            best_loss = test_loss
            patience_counter = 0
            torch.save(model.state_dict(), 'best_emotion_model.pth')
            print(f"Best model saved with accuracy: {best_acc:.2f}%")
        else:
            patience_counter += 1
            print(f"No improvement for {patience_counter} epoch(s).")

        # Early stopping
        if patience_counter >= patience:
            print(f"\nEarly stopping triggered after {patience_counter} epochs with no improvement.")
            break

    return history

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = EmotionManipulator(num_classes=7)
history = train_model(model, train_loader, test_loader, epochs=50, lr=3e-4, device=device)

In [None]:
import matplotlib.pyplot as plt

def plot_training_curves(history):
    epochs = range(1, len(history['train_loss']) + 1)

    plt.figure(figsize=(12, 5))

    # --- Plot Loss ---
    plt.subplot(1, 2, 1)
    plt.plot(epochs, history['train_loss'], label='Train Loss', marker='o')
    plt.plot(epochs, history['test_loss'], label='Test Loss', marker='o')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training vs Testing Loss')
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.5)

    # --- Plot Accuracy ---
    plt.subplot(1, 2, 2)
    plt.plot(epochs, history['train_acc'], label='Train Accuracy', marker='o')
    plt.plot(epochs, history['test_acc'], label='Test Accuracy', marker='o')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy (%)')
    plt.title('Training vs Testing Accuracy')
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.5)

    plt.tight_layout()
    plt.show()

# Call after training
plot_training_curves(history)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_resnet = EmotionManipulatorResnet(num_classes=7)
history_resnet = train_model(model_resnet, train_loader, test_loader, epochs=50, lr=3e-4, device=device)
plot_training_curves(history_resnet)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_residual = EmotionManipulatorResidual(num_classes=7)
history_residual = train_model(model_residual, train_loader, test_loader, epochs=50, lr=3e-4, device=device)
plot_training_curves(history_residual)

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

def plot_confusion_matrix(model, dataloader, device='cuda', class_names=None, title="Confusion Matrix"):
    """
    Plots the confusion matrix for a trained model on a given dataloader.
    Works for multi-class (not multi-label) emotion classification.
    """
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            # For multi-class case
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Compute confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    acc = np.trace(cm) / np.sum(cm) * 100

    plt.figure(figsize=(7, 6))
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
    disp.plot(cmap='Blues', values_format='d', ax=plt.gca(), colorbar=False)
    plt.title(f"{title}\nAccuracy: {acc:.2f}%")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

In [None]:
plot_confusion_matrix(model, test_loader, 'cuda')

In [None]:
plot_confusion_matrix(model_resnet, test_loader, 'cuda')

In [None]:
plot_confusion_matrix(model_residual, test_loader, 'cuda')