<a href="https://colab.research.google.com/github/ekvirika/Facial-Expression-Recognition/blob/main/notebooks/05_resnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Install required packages
!pip install wandb torch torchvision pandas numpy matplotlib seaborn scikit-learn

# Set up Kaggle API
!pip install kaggle

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [3]:
# Upload your kaggle.json to Colab and run:
!mkdir -p ~/.kaggle
!cp /content/drive/MyDrive/ColabNotebooks/kaggle_API_credentials/kaggle.json ~/.kaggle/kaggle.json
! chmod 600 ~/.kaggle/kaggle.json

In [4]:
# Download the dataset
!kaggle competitions download -c challenges-in-representation-learning-facial-expression-recognition-challenge
!unzip -q challenges-in-representation-learning-facial-expression-recognition-challenge.zip


Downloading challenges-in-representation-learning-facial-expression-recognition-challenge.zip to /content
 84% 241M/285M [00:00<00:00, 438MB/s]
100% 285M/285M [00:00<00:00, 434MB/s]


In [5]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from sklearn.metrics import classification_report, confusion_matrix
import wandb
from datetime import datetime
from tqdm import tqdm

In [7]:
class FER2013Dataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]

        # Convert pixel string to image
        pixels = np.array(row['pixels'].split(), dtype=np.uint8)
        image = pixels.reshape(48, 48)
        image = Image.fromarray(image).convert('RGB')

        if self.transform:
            image = self.transform(image)

        label = int(row['emotion'])
        return image, label

# Basic Block for ResNet

In [6]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

# Bottleneck Block for deeper ResNets


In [None]:
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion * planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [None]:
# ResNet Architecture

In [None]:
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=7, dropout_rate=0.5):
        super(ResNet, self).__init__()
        self.in_planes = 64

        # Initial convolution
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # ResNet layers
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)

        # Classification head
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.maxpool(out)

        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)

        out = self.avgpool(out)
        out = torch.flatten(out, 1)
        out = self.dropout(out)
        out = self.fc(out)

        return out



# Different ResNet configurations

In [None]:
# Different ResNet configurations
def ResNet18(num_classes=7, dropout_rate=0.5):
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes, dropout_rate)

def ResNet34(num_classes=7, dropout_rate=0.5):
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes, dropout_rate)

def ResNet50(num_classes=7, dropout_rate=0.5):
    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes, dropout_rate)

def ResNet101(num_classes=7, dropout_rate=0.5):
    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes, dropout_rate)

# Training Loop

In [None]:

# Training function
def train_model(model, train_loader, val_loader, config, device):
    criterion = nn.CrossEntropyLoss()

    if config['optimizer'] == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'],
                              weight_decay=config['weight_decay'])
    elif config['optimizer'] == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=config['learning_rate'],
                             momentum=0.9, weight_decay=config['weight_decay'])
    else:
        optimizer = optim.AdamW(model.parameters(), lr=config['learning_rate'],
                               weight_decay=config['weight_decay'])

    # Learning rate scheduler
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    best_val_acc = 0.0
    patience = 0
    max_patience = 10

    for epoch in range(config['epochs']):
        # Training phase
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        with tqdm(train_loader, desc=f'Epoch {epoch+1}/{config["epochs"]} - Training') as pbar:
            for batch_idx, (data, target) in enumerate(pbar):
                data, target = data.to(device), target.to(device)

                optimizer.zero_grad()
                output = model(data)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                _, predicted = output.max(1)
                train_total += target.size(0)
                train_correct += predicted.eq(target).sum().item()

                pbar.set_postfix({
                    'Loss': f'{loss.item():.4f}',
                    'Acc': f'{100.*train_correct/train_total:.2f}%'
                })

        train_acc = 100. * train_correct / train_total
        avg_train_loss = train_loss / len(train_loader)

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for data, target in val_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                val_loss += criterion(output, target).item()

                _, predicted = output.max(1)
                val_total += target.size(0)
                val_correct += predicted.eq(target).sum().item()

        val_acc = 100. * val_correct / val_total
        avg_val_loss = val_loss / len(val_loader)

        # Log to wandb
        wandb.log({
            'epoch': epoch + 1,
            'train_loss': avg_train_loss,
            'train_accuracy': train_acc,
            'val_loss': avg_val_loss,
            'val_accuracy': val_acc,
            'learning_rate': optimizer.param_groups[0]['lr']
        })

        print(f'Epoch {epoch+1}: Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%')

        # Early stopping
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience = 0
            # Save best model
            torch.save(model.state_dict(), f'best_model_{wandb.run.name}.pth')
        else:
            patience += 1

        if patience >= max_patience:
            print(f'Early stopping at epoch {epoch+1}')
            break

        scheduler.step()

    return best_val_acc


# Evaluation function

In [None]:

def evaluate_model(model, test_loader, device):
    model.eval()
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = output.max(1)

            all_preds.extend(predicted.cpu().numpy())
            all_targets.extend(target.cpu().numpy())

    # Calculate metrics
    accuracy = sum(p == t for p, t in zip(all_preds, all_targets)) / len(all_preds) * 100

    # Emotion labels
    emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

    # Classification report
    report = classification_report(all_targets, all_preds, target_names=emotion_labels, output_dict=True)

    # Confusion matrix
    cm = confusion_matrix(all_targets, all_preds)

    return accuracy, report, cm, all_preds, all_targets

In [None]:
def create_data_transforms(config):
    """Create data transforms with augmentation"""
    train_transform = transforms.Compose([
        transforms.Resize((224, 224)),  # ResNet expects 224x224
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.2, contrast=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    return train_transform, val_transform

In [None]:

def run_experiment(config):
    # Initialize wandb
    wandb.init(project="facial-expression-recognition", config=config)

    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'Using device: {device}')

    # Load data (assuming fer2013.csv is available)
    print("Loading data...")
    try:
        data = pd.read_csv('fer2013.csv')
    except FileNotFoundError:
        print("Please download fer2013.csv from Kaggle FER2013 dataset")
        return

    # Split data
    train_data = data[data['Usage'] == 'Training']
    val_data = data[data['Usage'] == 'PublicTest']
    test_data = data[data['Usage'] == 'PrivateTest']

    # Create transforms
    train_transform, val_transform = create_data_transforms(config)

    # Create datasets
    train_dataset = FER2013Dataset(train_data, train_transform)
    val_dataset = FER2013Dataset(val_data, val_transform)
    test_dataset = FER2013Dataset(test_data, val_transform)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=config['batch_size'],
                             shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=config['batch_size'],
                           shuffle=False, num_workers=2)
    test_loader = DataLoader(test_dataset, batch_size=config['batch_size'],
                            shuffle=False, num_workers=2)

    # Create model
    if config['model'] == 'resnet18':
        model = ResNet18(dropout_rate=config['dropout'])
    elif config['model'] == 'resnet34':
        model = ResNet34(dropout_rate=config['dropout'])
    elif config['model'] == 'resnet50':
        model = ResNet50(dropout_rate=config['dropout'])
    elif config['model'] == 'resnet101':
        model = ResNet101(dropout_rate=config['dropout'])

    model = model.to(device)

    # Log model info
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

    wandb.log({
        'total_parameters': total_params,
        'trainable_parameters': trainable_params
    })

    print(f'Model: {config["model"]}')
    print(f'Total parameters: {total_params:,}')
    print(f'Trainable parameters: {trainable_params:,}')

    # Train model
    best_val_acc = train_model(model, train_loader, val_loader, config, device)

    # Load best model and evaluate
    model.load_state_dict(torch.load(f'best_model_{wandb.run.name}.pth'))
    test_acc, report, cm, preds, targets = evaluate_model(model, test_loader, device)

    # Log final results
    wandb.log({
        'best_val_accuracy': best_val_acc,
        'test_accuracy': test_acc,
        'precision_macro': report['macro avg']['precision'],
        'recall_macro': report['macro avg']['recall'],
        'f1_macro': report['macro avg']['f1-score']
    })

    print(f'Best Validation Accuracy: {best_val_acc:.2f}%')
    print(f'Test Accuracy: {test_acc:.2f}%')

    # Create confusion matrix plot
    plt.figure(figsize=(10, 8))
    emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=emotion_labels, yticklabels=emotion_labels)
    plt.title(f'Confusion Matrix - {config["model"]}')
    plt.ylabel('Actual')
    plt.xlabel('Predicted')

    # Log confusion matrix to wandb
    wandb.log({"confusion_matrix": wandb.Image(plt)})
    plt.close()

    wandb.finish()

# Different experiment configurations

In [None]:
# Different experiment configurations
experiments = [


    # ResNet18 experiments
    {
        'model': 'resnet18',
        'batch_size': 64,
        'learning_rate': 0.001,
        'epochs': 50,
        'optimizer': 'adam',
        'dropout': 0.3,
        'weight_decay': 1e-4
    },

    {
        'model': 'resnet18',
        'batch_size': 32,
        'learning_rate': 0.01,
        'epochs': 50,
        'optimizer': 'sgd',
        'dropout': 0.5,
        'weight_decay': 1e-4
    },

    # ResNet34 experiments
    {
        'model': 'resnet34',
        'batch_size': 64,
        'learning_rate': 0.001,
        'epochs': 50,
        'optimizer': 'adam',
        'dropout': 0.3,
        'weight_decay': 1e-4
    },

    {
        'model': 'resnet34',
        'batch_size': 64,
        'learning_rate': 0.0005,
        'epochs': 50,
        'optimizer': 'adamw',
        'dropout': 0.4,
        'weight_decay': 1e-3
    },

    # ResNet50 experiments
    {
        'model': 'resnet50',
        'batch_size': 32,
        'learning_rate': 0.001,
        'epochs': 50,
        'optimizer': 'adam',
        'dropout': 0.5,
        'weight_decay': 1e-4
    },

    # High dropout (overfitting prevention)
    {
        'model': 'resnet34',
        'batch_size': 64,
        'learning_rate': 0.001,
        'epochs': 50,
        'optimizer': 'adam',
        'dropout': 0.7,
        'weight_decay': 1e-3
    },

    # Low dropout (potential overfitting)
    {
        'model': 'resnet34',
        'batch_size': 64,
        'learning_rate': 0.001,
        'epochs': 50,
        'optimizer': 'adam',
        'dropout': 0.1,
        'weight_decay': 1e-5
    }
]

# Run all experiments
for i, config in enumerate(experiments):
    print(f"\n{'='*50}")
    print(f"Running Experiment {i+1}/{len(experiments)}")
    print(f"Config: {config}")
    print(f"{'='*50}\n")

    run_experiment(config)