In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms, datasets, models
from retinaface import RetinaFace
import cv2
import numpy as np
from PIL import Image
import os
from glob import glob
from sklearn.model_selection import StratifiedShuffleSplit
from tqdm import tqdm
import pickle

# ============================================================================
# STEP 1: ONE-TIME PREPROCESSING - Run this ONCE and save results
# ============================================================================

def preprocess_and_save_faces(data_path, output_path, target_size=(48, 48), skip_detection=True):
    """
    Detect faces once and save cropped faces to disk.
    This should be run ONCE before training.
    
    Args:
        skip_detection: If True, skips face detection entirely (fastest, for pre-cropped datasets like FER2013).
    """
    os.makedirs(output_path, exist_ok=True)
    
    if skip_detection:
        print("✓ Skipping face detection (dataset already contains faces)")
        print("✓ This will take 2-3 minutes for the entire dataset\n")
    
    # Initialize face detector if needed
    face_cascade = None
    if not skip_detection:
        print("✓ Using OpenCV Haar Cascade for face detection")
        face_cascade = cv2.CascadeClassifier(
            cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
        )
    
    for split in ['train', 'test']:
        split_path = os.path.join(data_path, split)
        output_split_path = os.path.join(output_path, split)
        
        classes = os.listdir(split_path)
        
        for emotion_class in classes:
            class_path = os.path.join(split_path, emotion_class)
            output_class_path = os.path.join(output_split_path, emotion_class)
            os.makedirs(output_class_path, exist_ok=True)
            
            image_files = glob(os.path.join(class_path, "*.png"))
            
            print(f"Processing {split}/{emotion_class}: {len(image_files)} images")
            
            successful = 0
            for img_path in tqdm(image_files, desc=f"{split}/{emotion_class}"):
                try:
                    img = Image.open(img_path).convert("RGB")
                    img_np = np.array(img)
                    
                    if skip_detection:
                        # Just resize - no detection needed
                        crop = cv2.resize(img_np, target_size)
                    else:
                        # OpenCV Haar Cascade detection
                        gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
                        faces = face_cascade.detectMultiScale(
                            gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
                        )
                        
                        if len(faces) > 0:
                            # Use the largest face
                            x, y, w, h = max(faces, key=lambda f: f[2] * f[3])
                            crop = img_np[y:y+h, x:x+w]
                            crop = cv2.resize(crop, target_size)
                        else:
                            crop = cv2.resize(img_np, target_size)  # Fallback
                    
                    # Save cropped face
                    crop_pil = Image.fromarray(crop)
                    output_file = os.path.join(output_class_path, os.path.basename(img_path))
                    crop_pil.save(output_file)
                    successful += 1
                    
                except Exception as e:
                    pass  # Skip problematic images
            
            print(f"✓ Successfully processed {successful}/{len(image_files)} images\n")
    
    print("✓ Preprocessing complete! Cropped faces saved to:", output_path)


def extract_and_save_features(preprocessed_path, output_file, device='cpu'):
    """
    Extract features from all preprocessed faces ONCE and save to disk.
    This should be run ONCE after preprocessing faces.
    """
    # Load pre-trained feature extractor
    feature_extractor = models.efficientnet_b0(
        weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1
    )
    feature_extractor = nn.Sequential(*list(feature_extractor.children())[:-1])  # Remove classifier
    feature_extractor.eval()
    feature_extractor.to(device)
    
    # Transformation for feature extraction
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # EfficientNet input size
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    features_dict = {'train': [], 'test': []}
    
    for split in ['train', 'test']:
        dataset = datasets.ImageFolder(
            os.path.join(preprocessed_path, split),
            transform=transform
        )
        dataloader = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=4)
        
        split_features = []
        split_labels = []
        
        print(f"\nExtracting features for {split} set...")
        with torch.no_grad():
            for images, labels in tqdm(dataloader):
                images = images.to(device)
                features = feature_extractor(images)
                features = features.squeeze(-1).squeeze(-1)  # Remove spatial dimensions
                
                split_features.append(features.cpu())
                split_labels.append(labels)
        
        features_dict[split] = {
            'features': torch.cat(split_features),
            'labels': torch.cat(split_labels),
            'classes': dataset.classes
        }
    
    # Save features
    torch.save(features_dict, output_file)
    print(f"\nFeatures saved to: {output_file}")
    print(f"Feature dimension: {features_dict['train']['features'].shape[1]}")


# ============================================================================
# STEP 2: FAST TRAINING DATASET - Uses pre-extracted features
# ============================================================================

class PreExtractedFeatureDataset(Dataset):
    """
    Fast dataset that loads pre-extracted features from memory/disk.
    """
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]


# ============================================================================
# STEP 3: TRAINING FUNCTIONS
# ============================================================================

class FER_EfficientNetClassifier(nn.Module):
    def __init__(self, feature_dim, num_classes=7):
        super().__init__()
        self.classifier = nn.Sequential(
            nn.Linear(feature_dim, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )
    
    def forward(self, x):
        return self.classifier(x)


def train_model(model, train_loader, val_loader, criterion, optimizer, 
                num_epochs, device='cpu', patience=5):
    """
    Fast training function with early stopping.
    """
    model.to(device)
    best_val_acc = 0.0
    patience_counter = 0
    
    for epoch in range(num_epochs):
        print(f"\n--- Epoch {epoch+1}/{num_epochs} ---")
        
        # Training phase
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0
        
        train_bar = tqdm(train_loader, desc='Training')
        for features, labels in train_bar:
            features, labels = features.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item() * features.size(0)
            _, predicted = torch.max(outputs, 1)
            train_correct += (predicted == labels).sum().item()
            train_total += labels.size(0)
            
            train_bar.set_postfix({
                'Loss': f'{loss.item():.4f}',
                'Acc': f'{train_correct/train_total:.4f}'
            })
        
        train_loss /= train_total
        train_acc = train_correct / train_total
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for features, labels in tqdm(val_loader, desc='Validation'):
                features, labels = features.to(device), labels.to(device)
                outputs = model(features)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item() * features.size(0)
                _, predicted = torch.max(outputs, 1)
                val_correct += (predicted == labels).sum().item()
                val_total += labels.size(0)
        
        val_loss /= val_total
        val_acc = val_correct / val_total
        
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc*100:.2f}%")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc*100:.2f}%")
        
        # Early stopping
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_counter = 0
            torch.save(model.state_dict(), 'best_model.pth')
            print(f"✓ New best model saved! (Val Acc: {val_acc*100:.2f}%)")
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"\nEarly stopping triggered after {epoch+1} epochs")
                break
    
    return model


# ============================================================================
# STEP 4: MAIN TRAINING SCRIPT
# ============================================================================

def main():
    # Paths
    DATA_PATH = "C:/adam/AMIT_Diploma/grad_project/archive (1)"
    PREPROCESSED_PATH = "C:/adam/AMIT_Diploma/grad_project/preprocessed_faces"
    FEATURES_FILE = "C:/adam/AMIT_Diploma/grad_project/extracted_features.pt"
    
    # ========================================================================
    # OPTION A: First time setup (run once)
    # ========================================================================
    # FASTEST (5-8 minutes) - Skip face detection for FER2013
    print("Step 1: Preprocessing faces...")
    preprocess_and_save_faces(DATA_PATH, PREPROCESSED_PATH, skip_detection=True)
    # 
    print("\nStep 2: Extracting features...")
    extract_and_save_features(PREPROCESSED_PATH, FEATURES_FILE)
    
    # ========================================================================
    # OPTION B: Fast training (run every time after preprocessing)
    # ========================================================================
    print("Loading pre-extracted features...")
    features_dict = torch.load(FEATURES_FILE)
    
    # Get feature dimension
    feature_dim = features_dict['train']['features'].shape[1]
    print(f"Feature dimension: {feature_dim}")
    
    # Split train into train/val
    train_features = features_dict['train']['features']
    train_labels = features_dict['train']['labels']
    
    # Stratified split
    splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=42)
    train_idx, val_idx = next(splitter.split(
        np.arange(len(train_labels)),
        train_labels.numpy()
    ))
    
    # Create datasets
    train_dataset = PreExtractedFeatureDataset(
        train_features[train_idx],
        train_labels[train_idx]
    )
    val_dataset = PreExtractedFeatureDataset(
        train_features[val_idx],
        train_labels[val_idx]
    )
    test_dataset = PreExtractedFeatureDataset(
        features_dict['test']['features'],
        features_dict['test']['labels']
    )
    
    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
    
    print(f"Train samples: {len(train_dataset)}")
    print(f"Val samples: {len(val_dataset)}")
    print(f"Test samples: {len(test_dataset)}")
    
    # Initialize model
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = FER_EfficientNetClassifier(feature_dim=feature_dim, num_classes=7)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Train
    print("\nStarting training...")
    trained_model = train_model(
        model, train_loader, val_loader, criterion, optimizer,
        num_epochs=50, device=device, patience=5
    )
    
    print("\nTraining complete!")


if __name__ == "__main__":
    main()

Step 1: Preprocessing faces...
✓ Skipping face detection (dataset already contains faces)
✓ This will take 2-3 minutes for the entire dataset

Processing train/angry: 3995 images


train/angry: 100%|██████████| 3995/3995 [00:02<00:00, 1530.10it/s]


✓ Successfully processed 3995/3995 images

Processing train/disgusted: 436 images


train/disgusted: 100%|██████████| 436/436 [00:00<00:00, 1618.93it/s]


✓ Successfully processed 436/436 images

Processing train/fearful: 4097 images


train/fearful: 100%|██████████| 4097/4097 [00:02<00:00, 1591.59it/s]


✓ Successfully processed 4097/4097 images

Processing train/happy: 7215 images


train/happy: 100%|██████████| 7215/7215 [00:04<00:00, 1605.17it/s]


✓ Successfully processed 7215/7215 images

Processing train/neutral: 4965 images


train/neutral: 100%|██████████| 4965/4965 [00:03<00:00, 1605.28it/s]


✓ Successfully processed 4965/4965 images

Processing train/sad: 4830 images


train/sad: 100%|██████████| 4830/4830 [00:02<00:00, 1634.31it/s]


✓ Successfully processed 4830/4830 images

Processing train/surprised: 3171 images


train/surprised: 100%|██████████| 3171/3171 [00:02<00:00, 1506.70it/s]


✓ Successfully processed 3171/3171 images

Processing test/angry: 958 images


test/angry: 100%|██████████| 958/958 [00:00<00:00, 1608.85it/s]


✓ Successfully processed 958/958 images

Processing test/disgusted: 111 images


test/disgusted: 100%|██████████| 111/111 [00:00<00:00, 1662.91it/s]


✓ Successfully processed 111/111 images

Processing test/fearful: 1024 images


test/fearful: 100%|██████████| 1024/1024 [00:00<00:00, 1540.41it/s]


✓ Successfully processed 1024/1024 images

Processing test/happy: 1774 images


test/happy: 100%|██████████| 1774/1774 [00:01<00:00, 1611.56it/s]


✓ Successfully processed 1774/1774 images

Processing test/neutral: 1233 images


test/neutral: 100%|██████████| 1233/1233 [00:00<00:00, 1629.32it/s]


✓ Successfully processed 1233/1233 images

Processing test/sad: 1247 images


test/sad: 100%|██████████| 1247/1247 [00:00<00:00, 1606.02it/s]


✓ Successfully processed 1247/1247 images

Processing test/surprised: 831 images


test/surprised: 100%|██████████| 831/831 [00:00<00:00, 1571.64it/s]


✓ Successfully processed 831/831 images

✓ Preprocessing complete! Cropped faces saved to: C:/adam/AMIT_Diploma/grad_project/preprocessed_faces

Step 2: Extracting features...

Extracting features for train set...


100%|██████████| 898/898 [06:31<00:00,  2.29it/s]



Extracting features for test set...


100%|██████████| 225/225 [01:43<00:00,  2.18it/s]



Features saved to: C:/adam/AMIT_Diploma/grad_project/extracted_features.pt
Feature dimension: 1280
Loading pre-extracted features...
Feature dimension: 1280
Train samples: 25838
Val samples: 2871
Test samples: 7178

Starting training...

--- Epoch 1/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 169.43it/s, Loss=1.4350, Acc=0.4709]
Validation: 100%|██████████| 45/45 [00:00<00:00, 868.39it/s]


Train Loss: 1.4026, Train Acc: 47.09%
Val Loss: 1.2686, Val Acc: 51.27%
✓ New best model saved! (Val Acc: 51.27%)

--- Epoch 2/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 177.68it/s, Loss=1.1193, Acc=0.5351]
Validation: 100%|██████████| 45/45 [00:00<00:00, 1188.55it/s]


Train Loss: 1.2340, Train Acc: 53.51%
Val Loss: 1.2233, Val Acc: 54.75%
✓ New best model saved! (Val Acc: 54.75%)

--- Epoch 3/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 169.02it/s, Loss=1.2269, Acc=0.5679]
Validation: 100%|██████████| 45/45 [00:00<00:00, 1006.51it/s]


Train Loss: 1.1505, Train Acc: 56.79%
Val Loss: 1.1925, Val Acc: 56.36%
✓ New best model saved! (Val Acc: 56.36%)

--- Epoch 4/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 171.90it/s, Loss=1.2097, Acc=0.5974]
Validation: 100%|██████████| 45/45 [00:00<00:00, 900.52it/s]


Train Loss: 1.0805, Train Acc: 59.74%
Val Loss: 1.1746, Val Acc: 56.67%
✓ New best model saved! (Val Acc: 56.67%)

--- Epoch 5/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 167.14it/s, Loss=1.0714, Acc=0.6215]
Validation: 100%|██████████| 45/45 [00:00<00:00, 894.83it/s]


Train Loss: 1.0183, Train Acc: 62.15%
Val Loss: 1.1720, Val Acc: 57.26%
✓ New best model saved! (Val Acc: 57.26%)

--- Epoch 6/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 149.95it/s, Loss=0.8093, Acc=0.6435]
Validation: 100%|██████████| 45/45 [00:00<00:00, 861.98it/s]


Train Loss: 0.9570, Train Acc: 64.35%
Val Loss: 1.1687, Val Acc: 57.30%
✓ New best model saved! (Val Acc: 57.30%)

--- Epoch 7/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 166.46it/s, Loss=0.6403, Acc=0.6658]
Validation: 100%|██████████| 45/45 [00:00<00:00, 795.54it/s]


Train Loss: 0.8948, Train Acc: 66.58%
Val Loss: 1.1896, Val Acc: 57.09%

--- Epoch 8/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 163.66it/s, Loss=1.1001, Acc=0.6862]
Validation: 100%|██████████| 45/45 [00:00<00:00, 1025.33it/s]


Train Loss: 0.8401, Train Acc: 68.62%
Val Loss: 1.2040, Val Acc: 58.03%
✓ New best model saved! (Val Acc: 58.03%)

--- Epoch 9/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 159.51it/s, Loss=0.7680, Acc=0.7085]
Validation: 100%|██████████| 45/45 [00:00<00:00, 918.44it/s]


Train Loss: 0.7850, Train Acc: 70.85%
Val Loss: 1.2352, Val Acc: 57.96%

--- Epoch 10/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 162.17it/s, Loss=0.9557, Acc=0.7347]
Validation: 100%|██████████| 45/45 [00:00<00:00, 1114.68it/s]


Train Loss: 0.7243, Train Acc: 73.47%
Val Loss: 1.2654, Val Acc: 58.13%
✓ New best model saved! (Val Acc: 58.13%)

--- Epoch 11/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 164.88it/s, Loss=0.9868, Acc=0.7491]
Validation: 100%|██████████| 45/45 [00:00<00:00, 1001.28it/s]


Train Loss: 0.6829, Train Acc: 74.91%
Val Loss: 1.3018, Val Acc: 57.61%

--- Epoch 12/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 173.72it/s, Loss=0.6481, Acc=0.7668]
Validation: 100%|██████████| 45/45 [00:00<00:00, 894.85it/s]


Train Loss: 0.6319, Train Acc: 76.68%
Val Loss: 1.3171, Val Acc: 59.04%
✓ New best model saved! (Val Acc: 59.04%)

--- Epoch 13/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 173.65it/s, Loss=1.0868, Acc=0.7833]
Validation: 100%|██████████| 45/45 [00:00<00:00, 964.28it/s]


Train Loss: 0.5839, Train Acc: 78.33%
Val Loss: 1.3885, Val Acc: 59.00%

--- Epoch 14/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 159.67it/s, Loss=0.5608, Acc=0.7979]
Validation: 100%|██████████| 45/45 [00:00<00:00, 963.36it/s]


Train Loss: 0.5470, Train Acc: 79.79%
Val Loss: 1.4616, Val Acc: 58.38%

--- Epoch 15/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 159.01it/s, Loss=0.4056, Acc=0.8117]
Validation: 100%|██████████| 45/45 [00:00<00:00, 789.61it/s]


Train Loss: 0.5108, Train Acc: 81.17%
Val Loss: 1.4421, Val Acc: 58.86%

--- Epoch 16/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 147.11it/s, Loss=0.7739, Acc=0.8248]
Validation: 100%|██████████| 45/45 [00:00<00:00, 725.95it/s]


Train Loss: 0.4744, Train Acc: 82.48%
Val Loss: 1.4996, Val Acc: 58.73%

--- Epoch 17/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 169.75it/s, Loss=0.6071, Acc=0.8357]
Validation: 100%|██████████| 45/45 [00:00<00:00, 1212.79it/s]


Train Loss: 0.4530, Train Acc: 83.57%
Val Loss: 1.5198, Val Acc: 59.60%
✓ New best model saved! (Val Acc: 59.60%)

--- Epoch 18/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 168.83it/s, Loss=0.5182, Acc=0.8442]
Validation: 100%|██████████| 45/45 [00:00<00:00, 995.26it/s]


Train Loss: 0.4240, Train Acc: 84.42%
Val Loss: 1.5653, Val Acc: 59.63%
✓ New best model saved! (Val Acc: 59.63%)

--- Epoch 19/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 170.40it/s, Loss=0.7136, Acc=0.8583]
Validation: 100%|██████████| 45/45 [00:00<00:00, 913.88it/s]


Train Loss: 0.3939, Train Acc: 85.83%
Val Loss: 1.5887, Val Acc: 59.84%
✓ New best model saved! (Val Acc: 59.84%)

--- Epoch 20/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 161.14it/s, Loss=0.3815, Acc=0.8618]
Validation: 100%|██████████| 45/45 [00:00<00:00, 966.39it/s]


Train Loss: 0.3829, Train Acc: 86.18%
Val Loss: 1.5961, Val Acc: 58.38%

--- Epoch 21/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 157.49it/s, Loss=0.1870, Acc=0.8728]
Validation: 100%|██████████| 45/45 [00:00<00:00, 925.40it/s]


Train Loss: 0.3516, Train Acc: 87.28%
Val Loss: 1.6710, Val Acc: 58.86%

--- Epoch 22/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 165.49it/s, Loss=0.2505, Acc=0.8736]
Validation: 100%|██████████| 45/45 [00:00<00:00, 786.93it/s]


Train Loss: 0.3455, Train Acc: 87.36%
Val Loss: 1.7234, Val Acc: 59.74%

--- Epoch 23/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 165.15it/s, Loss=0.4206, Acc=0.8826]
Validation: 100%|██████████| 45/45 [00:00<00:00, 1026.79it/s]


Train Loss: 0.3218, Train Acc: 88.26%
Val Loss: 1.7652, Val Acc: 57.65%

--- Epoch 24/50 ---


Training: 100%|██████████| 404/404 [00:02<00:00, 154.47it/s, Loss=0.4428, Acc=0.8879]
Validation: 100%|██████████| 45/45 [00:00<00:00, 817.39it/s]

Train Loss: 0.3121, Train Acc: 88.79%
Val Loss: 1.7921, Val Acc: 58.13%

Early stopping triggered after 24 epochs

Training complete!



