# Deep Learning Emotion Recognition System - Complete Pipeline

This notebook demonstrates the complete workflow:
1. Environment setup
2. Data exploration
3. Model architecture overview
4. Training demonstration
5. Evaluation and metrics
6. Inference examples
7. Ablation study results

## 1. Environment Setup

In [None]:
import sys
import os
sys.path.append('..')

import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import cv2

from models import get_model, EMOTION_LABELS
from utils import get_data_loaders, plot_confusion_matrix, compute_metrics

%matplotlib inline
sns.set_style('whitegrid')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

## 2. Data Exploration

In [None]:
DATA_DIR = '../data'

if os.path.exists(DATA_DIR):
    train_loader, val_loader, test_loader, class_weights = get_data_loaders(
        DATA_DIR, batch_size=16, num_workers=2
    )
    
    print(f"Dataset loaded successfully!")
    print(f"Training batches: {len(train_loader)}")
    print(f"Validation batches: {len(val_loader)}")
    print(f"Test batches: {len(test_loader)}")
else:
    print("⚠️ Dataset not found. Please download AffectNet+ and place in data/ directory")

In [None]:
# Visualize class weights
if 'class_weights' in locals() and class_weights is not None:
    plt.figure(figsize=(10, 6))
    plt.bar(EMOTION_LABELS, class_weights, color='skyblue', edgecolor='navy')
    plt.xlabel('Emotion Class', fontsize=12)
    plt.ylabel('Class Weight', fontsize=12)
    plt.title('Class Weights for Handling Imbalanced Data', fontsize=14, fontweight='bold')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

In [None]:
# Visualize sample images
if 'train_loader' in locals():
    images, labels = next(iter(train_loader))
    
    fig, axes = plt.subplots(2, 4, figsize=(16, 8))
    axes = axes.flatten()
    
    for i in range(min(8, len(images))):
        img = images[i].permute(1, 2, 0).cpu().numpy()
        img = img * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])
        img = np.clip(img, 0, 1)
        
        axes[i].imshow(img)
        axes[i].set_title(f"Label: {EMOTION_LABELS[labels[i]]}", fontsize=12, fontweight='bold')
        axes[i].axis('off')
    
    plt.suptitle('Sample Training Images', fontsize=16, fontweight='bold')
    plt.tight_layout()
    plt.show()

## 3. Model Architecture Overview

In [None]:
# Create full model
model = get_model(
    model_type='full',
    num_classes=8,
    backbone='efficientnet_b4',
    pretrained=True,
    lstm_hidden=256,
    lstm_layers=2,
    dropout=0.5
)

model = model.to(device)

total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print("="*80)
print("MODEL ARCHITECTURE")
print("="*80)
print(model)
print("="*80)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Model size: ~{total_params * 4 / 1e6:.2f} MB")
print("="*80)

In [None]:
# Test forward pass
dummy_input = torch.randn(1, 3, 224, 224).to(device)

with torch.no_grad():
    output = model(dummy_input)
    probs = torch.softmax(output, dim=1)

print(f"Input shape: {dummy_input.shape}")
print(f"Output shape: {output.shape}")
print(f"Output probabilities sum: {probs.sum().item():.4f}")
print(f"\nSample output probabilities:")
for i, (emotion, prob) in enumerate(zip(EMOTION_LABELS, probs[0])):
    print(f"  {emotion:<12}: {prob.item()*100:>6.2f}%")

## 4. Training Demonstration

For demonstration, we'll train for just 2 epochs. For full training, use the `train.py` script.

In [None]:
from models import WeightedCrossEntropyLoss
from utils import AverageMeter
from tqdm.notebook import tqdm

# Setup training (demo only - 2 epochs)
if 'train_loader' in locals():
    criterion = WeightedCrossEntropyLoss(class_weights, device=device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5)
    
    history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}
    
    print("Starting demo training (2 epochs)...")
    print("For full training, use: python training/train.py --epochs 50")
    
    for epoch in range(1, 3):  # Demo: only 2 epochs
        # Training
        model.train()
        losses = AverageMeter()
        accuracies = AverageMeter()
        
        for images, labels in tqdm(train_loader, desc=f'Epoch {epoch} [Train]'):
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            _, predicted = outputs.max(1)
            accuracy = (predicted == labels).float().mean().item() * 100
            
            losses.update(loss.item(), images.size(0))
            accuracies.update(accuracy, images.size(0))
        
        history['train_loss'].append(losses.avg)
        history['train_acc'].append(accuracies.avg)
        
        # Validation
        model.eval()
        val_losses = AverageMeter()
        val_accuracies = AverageMeter()
        
        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc=f'Epoch {epoch} [Val]'):
                images, labels = images.to(device), labels.to(device)
                
                outputs = model(images)
                loss = criterion(outputs, labels)
                
                _, predicted = outputs.max(1)
                accuracy = (predicted == labels).float().mean().item() * 100
                
                val_losses.update(loss.item(), images.size(0))
                val_accuracies.update(accuracy, images.size(0))
        
        history['val_loss'].append(val_losses.avg)
        history['val_acc'].append(val_accuracies.avg)
        
        print(f"\nEpoch {epoch}/2:")
        print(f"  Train Loss: {losses.avg:.4f}, Train Acc: {accuracies.avg:.2f}%")
        print(f"  Val Loss: {val_losses.avg:.4f}, Val Acc: {val_accuracies.avg:.2f}%")
else:
    print("⚠️ Skipping training demo - dataset not loaded")

In [None]:
# Plot training history
if 'history' in locals():
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    
    ax1.plot(history['train_loss'], 'b-', label='Training Loss', linewidth=2, marker='o')
    ax1.plot(history['val_loss'], 'r-', label='Validation Loss', linewidth=2, marker='s')
    ax1.set_title('Loss Curve', fontsize=14, fontweight='bold')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    ax2.plot(history['train_acc'], 'b-', label='Training Accuracy', linewidth=2, marker='o')
    ax2.plot(history['val_acc'], 'r-', label='Validation Accuracy', linewidth=2, marker='s')
    ax2.set_title('Accuracy Curve', fontsize=14, fontweight='bold')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy (%)')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

## 5. Evaluation and Metrics

After full training, evaluate on test set. Here we'll demonstrate with validation set.

In [None]:
# Evaluate on validation set
if 'val_loader' in locals():
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc='Evaluating'):
            images = images.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.numpy())
    
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)
    
    metrics = compute_metrics(all_labels, all_preds, average='macro')
    
    print("\n" + "="*60)
    print("EVALUATION RESULTS (Validation Set)")
    print("="*60)
    print(f"Accuracy:  {metrics['accuracy']*100:.2f}%")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall:    {metrics['recall']:.4f}")
    print(f"F1-Score:  {metrics['f1']:.4f}")
    print("="*60)
else:
    print("⚠️ Skipping evaluation - dataset not loaded")

In [None]:
# Confusion matrix
if 'all_preds' in locals():
    from sklearn.metrics import confusion_matrix
    
    cm = confusion_matrix(all_labels, all_preds)
    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    
    plt.figure(figsize=(12, 10))
    sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='Blues',
                xticklabels=EMOTION_LABELS, yticklabels=EMOTION_LABELS,
                cbar_kws={'label': 'Proportion'})
    plt.title('Confusion Matrix (Normalized)', fontsize=16, fontweight='bold')
    plt.ylabel('True Label', fontsize=12)
    plt.xlabel('Predicted Label', fontsize=12)
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

## 6. Inference Example

In [None]:
# Predict on random validation samples
if 'val_loader' in locals():
    images, labels = next(iter(val_loader))
    images = images.to(device)
    
    model.eval()
    with torch.no_grad():
        outputs = model(images)
        probs = torch.softmax(outputs, dim=1)
        confidences, predicted = probs.max(1)
    
    fig, axes = plt.subplots(2, 4, figsize=(20, 10))
    axes = axes.flatten()
    
    for i in range(min(8, len(images))):
        img = images[i].permute(1, 2, 0).cpu().numpy()
        img = img * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])
        img = np.clip(img, 0, 1)
        
        true_label = EMOTION_LABELS[labels[i]]
        pred_label = EMOTION_LABELS[predicted[i]]
        confidence = confidences[i].item()
        
        color = 'green' if labels[i] == predicted[i] else 'red'
        
        axes[i].imshow(img)
        axes[i].set_title(f"True: {true_label}\nPred: {pred_label}\nConf: {confidence*100:.1f}%",
                         fontsize=11, fontweight='bold', color=color)
        axes[i].axis('off')
    
    plt.suptitle('Sample Predictions', fontsize=16, fontweight='bold')
    plt.tight_layout()
    plt.show()
else:
    print("⚠️ Skipping inference demo - dataset not loaded")

## 7. Ablation Study Results

After running ablation study, results will be available here.

In [None]:
# Load ablation results if available
ablation_results_path = '../results/ablation/ablation_results.csv'

if os.path.exists(ablation_results_path):
    df_ablation = pd.read_csv(ablation_results_path)
    print("\n" + "="*80)
    print("ABLATION STUDY RESULTS")
    print("="*80)
    print(df_ablation.to_string(index=False))
    print("="*80)
    
    # Visualize ablation results
    fig, ax = plt.subplots(figsize=(12, 6))
    
    x = np.arange(len(df_ablation))
    width = 0.6
    
    accuracies = [float(acc.strip('%')) for acc in df_ablation['Accuracy (%)']]
    
    bars = ax.bar(x, accuracies, width, color=['#FF6B6B', '#4ECDC4', '#45B7D1'])
    
    ax.set_xlabel('Model Configuration', fontsize=12)
    ax.set_ylabel('Accuracy (%)', fontsize=12)
    ax.set_title('Ablation Study - Impact of Architecture Components', fontsize=14, fontweight='bold')
    ax.set_xticks(x)
    ax.set_xticklabels(df_ablation['Configuration'], rotation=15, ha='right')
    ax.grid(True, alpha=0.3, axis='y')
    
    for i, (bar, acc) in enumerate(zip(bars, accuracies)):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
               f'{acc:.2f}%', ha='center', va='bottom', fontweight='bold')
    
    plt.tight_layout()
    plt.show()
else:
    print("⚠️ Ablation results not found. Run: python training/ablation_study.py")

## 8. Comparison with Base Paper

In [None]:
# Create comparison table
comparison_data = {
    'Model': ['DCD-DAN (2025)', 'Our Model (Proposed)'],
    'Architecture': ['CNN + Cross-Domain Dual Attention', 'CNN + Dual Attention + BiLSTM'],
    'Dataset': ['AffectNet+', 'AffectNet+'],
    'Loss Function': ['Standard CE', 'Class-Weighted CE'],
    'Accuracy (%)': ['83.50', 'TBD (after training)'],
    'F1-Score': ['0.830', 'TBD (after training)'],
    'Novelty': ['Cross-domain learning', 'BiLSTM temporal modeling + class weights']
}

df_comparison = pd.DataFrame(comparison_data)
print("\n" + "="*100)
print("COMPARISON WITH BASE PAPER")
print("="*100)
print(df_comparison.to_string(index=False))
print("="*100)
print("\nNote: Update 'TBD' values after completing full training (50 epochs)")

## Summary

This notebook demonstrated:
- ✅ Data loading and exploration
- ✅ Model architecture (CNN + Dual Attention + BiLSTM)
- ✅ Training pipeline (demo with 2 epochs)
- ✅ Evaluation metrics and confusion matrix
- ✅ Inference on sample images
- ✅ Ablation study visualization

### Next Steps:
1. **Full Training**: Run `python training/train.py --epochs 50` for complete training
2. **Evaluation**: Evaluate on test set with `python training/evaluate.py`
3. **Ablation Study**: Compare architectures with `python training/ablation_study.py`
4. **Real-time Demo**: Test webcam inference with `python inference/webcam_demo.py`

### Publication Checklist:
- [ ] Train model to 85%+ accuracy
- [ ] Complete ablation study
- [ ] Generate all visualizations (confusion matrix, ROC curves)
- [ ] Document results in comparison table
- [ ] Test real-time webcam demo
- [ ] Write paper with methodology and results