# Facial Detection Model (Swim Transomer)

# Split facial Dataset to train / val

In [2]:
# Split facial dataset be train and val

import os
import shutil
import random

# Set the paths
dataset_path = "FYP DATA/DATA_FINE_TUNING(Facial)"  # Change this to your actual dataset folder
train_path = "train"
val_path = "val"

# Define the split ratio
split_ratio = 0.8

# Ensure train and val directories exist
os.makedirs(train_path, exist_ok=True)
os.makedirs(val_path, exist_ok=True)

# Get all class folders (angry, fear, sad, happy)
categories = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]

for category in categories:
    category_path = os.path.join(dataset_path, category)
    images = os.listdir(category_path)
    random.shuffle(images)

    # Split into training and validation sets
    split_index = int(len(images) * split_ratio)
    train_images = images[:split_index]
    val_images = images[split_index:]

    # Create class subdirectories in train/ and val/
    os.makedirs(os.path.join(train_path, category), exist_ok=True)
    os.makedirs(os.path.join(val_path, category), exist_ok=True)

    # Move images to respective folders
    for img in train_images:
        shutil.move(os.path.join(category_path, img), os.path.join(train_path, category, img))

    for img in val_images:
        shutil.move(os.path.join(category_path, img), os.path.join(val_path, category, img))

print("Dataset split completed successfully!")


Dataset split completed successfully!


# Create csv file for train and val 

In [5]:
import os
import pandas as pd

# Define dataset directories
train_dir = "FYP DATA/DATA_FINE_TUNING(Facial)/train"
val_dir = "FYP DATA/DATA_FINE_TUNING(Facial)/val"

# Emotion categories and label mapping
categories = ["angry", "fear", "happy", "sad"]
label_map = {category: i for i, category in enumerate(categories)}  # {'angry': 0, 'fear': 1, 'happy': 2, 'sad': 3}

def create_csv(data_dir, output_csv):
    data = []
    for category in categories:
        category_path = os.path.join(data_dir, category)
        if os.path.exists(category_path):
            for img_file in os.listdir(category_path):
                if img_file.endswith(('.jpg', '.png', '.jpeg')):  # Ensure only image files are included
                    data.append([f"{category}/{img_file}", label_map[category]])

    df = pd.DataFrame(data, columns=["image_path", "label"])
    df.to_csv(output_csv, index=False)
    print(f"{output_csv} created successfully!")

# Generate CSV files
create_csv(train_dir, "FYP DATA/DATA_FINE_TUNING(Facial)/train.csv")
create_csv(val_dir, "FYP DATA/DATA_FINE_TUNING(Facial)/val.csv")


FYP DATA/DATA_FINE_TUNING(Facial)/train.csv created successfully!
FYP DATA/DATA_FINE_TUNING(Facial)/val.csv created successfully!


# Train Facial Emotion Detection Model

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from timm import create_model
from PIL import Image
import pandas as pd
import os

# Custom Dataset Class
class EmotionDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)  # CSV with 'image_path' and 'label' columns
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        try:
            img_path = os.path.join(self.root_dir, self.data.iloc[idx]['image_path'])
            image = Image.open(img_path).convert('RGB')
            label = self.data.iloc[idx]['label']
            if self.transform:
                image = self.transform(image)
            return image, label
        except Exception as e:
            print(f"Error loading {img_path}: {e}")
            raise  # Re-raise to stop execution and debug

# Data Transforms
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.2, contrast=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
}

# Load Datasets
train_dataset = EmotionDataset(
    csv_file='FYP DATA/DATA_FINE_TUNING(Facial)/train.csv',
    root_dir='FYP DATA/DATA_FINE_TUNING(Facial)/train',
    transform=data_transforms['train']
)
val_dataset = EmotionDataset(
    csv_file='FYP DATA/DATA_FINE_TUNING(Facial)/val.csv',
    root_dir='FYP DATA/DATA_FINE_TUNING(Facial)/val',
    transform=data_transforms['val']
)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=0)

# Load Pre-trained Swin Transformer (Swin-B)
model = create_model('swin_base_patch4_window7_224', pretrained=True, num_classes=4)  # 4 emotions
print("Model Loaded")

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
print(f"Using device: {device}")

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-2)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50)  # 50 epochs

# Training Loop
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=50):
    best_acc = 0.0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        print(f"Starting epoch {epoch+1}/{num_epochs}")
        for i, (images, labels) in enumerate(train_loader):
            print(f"Batch {i}: Loaded data")
            images, labels = images.to(device), labels.to(device)
            print(f"Batch {i}: Moved to {device}")

            optimizer.zero_grad()
            outputs = model(images)
            print(f"Batch {i}: Forward pass completed")
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            print(f"Batch {i}: Backward pass completed")

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100 * correct / total
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.2f}%')

        # Validation
        model.eval()
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for j, (images, labels) in enumerate(val_loader):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_acc = 100 * val_correct / val_total
        print(f'Validation Acc: {val_acc:.2f}%')

        # Save best model
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), 'best_swin_emotion_model.pth')
            print(f"Saved best model with Val Acc: {best_acc:.2f}%")

        scheduler.step()

    print(f'Best Validation Accuracy: {best_acc:.2f}%')

print('Training Start')
# Run Training
train_model(model, train_loader, val_loader, criterion, optimizer, scheduler)

# Inference Example
def predict_emotion(model, image_path):
    model.eval()
    image = Image.open(image_path).convert('RGB')
    image = data_transforms['val'](image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)
        emotions = ['angry', 'fear', 'happy', 'sad']
        return emotions[predicted.item()]

# Test on a single image (uncomment to use)
# print(predict_emotion(model, 'path/to/test_image.jpg'))

Model Loaded
Using device: cuda
Training Start
Starting epoch 1/50
Batch 0: Loaded data
Batch 0: Moved to cuda
Batch 0: Forward pass completed
Batch 0: Backward pass completed
Batch 1: Loaded data
Batch 1: Moved to cuda
Batch 1: Forward pass completed
Batch 1: Backward pass completed
Batch 2: Loaded data
Batch 2: Moved to cuda
Batch 2: Forward pass completed
Batch 2: Backward pass completed
Batch 3: Loaded data
Batch 3: Moved to cuda
Batch 3: Forward pass completed
Batch 3: Backward pass completed
Batch 4: Loaded data
Batch 4: Moved to cuda
Batch 4: Forward pass completed
Batch 4: Backward pass completed
Batch 5: Loaded data
Batch 5: Moved to cuda
Batch 5: Forward pass completed
Batch 5: Backward pass completed
Batch 6: Loaded data
Batch 6: Moved to cuda
Batch 6: Forward pass completed
Batch 6: Backward pass completed
Batch 7: Loaded data
Batch 7: Moved to cuda
Batch 7: Forward pass completed
Batch 7: Backward pass completed
Batch 8: Loaded data
Batch 8: Moved to cuda
Batch 8: Forward 

KeyboardInterrupt: 

# Evaluation of Facial Detection Model

In [6]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from timm import create_model
from PIL import Image
import pandas as pd
import os
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import numpy as np

# Custom Dataset Class (unchanged)
class EmotionDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        try:
            img_path = os.path.join(self.root_dir, self.data.iloc[idx]['image_path'])
            image = Image.open(img_path).convert('RGB')
            label = self.data.iloc[idx]['label']
            if self.transform:
                image = self.transform(image)
            return image, label
        except Exception as e:
            print(f"Error loading {img_path}: {e}")
            raise

# Data Transforms (unchanged)
data_transforms = {
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
}

# Load Validation Dataset
val_dataset = EmotionDataset(
    csv_file='FYP DATA/DATA_FINE_TUNING(Facial)/val.csv',
    root_dir='FYP DATA/DATA_FINE_TUNING(Facial)/val',
    transform=data_transforms['val']
)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=0)

# Load Pre-trained Swin Transformer Model
model = create_model('swin_base_patch4_window7_224', pretrained=False, num_classes=4)  # 4 emotions
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
print(f"Using device: {device}")

# Load the saved .pth model weights
model_path = 'best_swin_emotion_model.pth'  # Update this path if needed
model.load_state_dict(torch.load(model_path))
print(f"Loaded model weights from {model_path}")

# Evaluation Function
def evaluate_model(model, data_loader, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for i, (images, labels) in enumerate(data_loader):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            
            # Collect predictions and labels
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            print(f"Processed batch {i+1}/{len(data_loader)}")

    # Convert to numpy arrays
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_preds) * 100
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='weighted')
    conf_matrix = confusion_matrix(all_labels, all_preds)

    # Print results
    print("\nEvaluation Results:")
    print(f"Accuracy: {accuracy:.2f}%")
    print(f"Precision (weighted): {precision:.4f}")
    print(f"Recall (weighted): {recall:.4f}")
    print(f"F1-Score (weighted): {f1:.4f}")
    print("\nConfusion Matrix:")
    print(conf_matrix)

    # Optional: Map emotions to confusion matrix
    emotions = ['angry', 'fear', 'happy', 'sad']
    print("\nConfusion Matrix (with labels):")
    for i, row in enumerate(conf_matrix):
        print(f"{emotions[i]}: {row}")

    return accuracy, precision, recall, f1, conf_matrix

# Run Evaluation
print("Starting model evaluation...")
accuracy, precision, recall, f1, conf_matrix = evaluate_model(model, val_loader, device)

# Optional: Save results to a file
with open('evaluation_results.txt', 'w') as f:
    f.write(f"Accuracy: {accuracy:.2f}%\n")
    f.write(f"Precision (weighted): {precision:.4f}\n")
    f.write(f"Recall (weighted): {recall:.4f}\n")
    f.write(f"F1-Score (weighted): {f1:.4f}\n")
    f.write("\nConfusion Matrix:\n")
    f.write(str(conf_matrix))
    print("Results saved to 'evaluation_results.txt'")

Using device: cuda


  model.load_state_dict(torch.load(model_path))


Loaded model weights from best_swin_emotion_model.pth
Starting model evaluation...
Processed batch 1/572
Processed batch 2/572
Processed batch 3/572
Processed batch 4/572
Processed batch 5/572
Processed batch 6/572
Processed batch 7/572
Processed batch 8/572
Processed batch 9/572
Processed batch 10/572
Processed batch 11/572
Processed batch 12/572
Processed batch 13/572
Processed batch 14/572
Processed batch 15/572
Processed batch 16/572
Processed batch 17/572
Processed batch 18/572
Processed batch 19/572
Processed batch 20/572
Processed batch 21/572
Processed batch 22/572
Processed batch 23/572
Processed batch 24/572
Processed batch 25/572
Processed batch 26/572
Processed batch 27/572
Processed batch 28/572
Processed batch 29/572
Processed batch 30/572
Processed batch 31/572
Processed batch 32/572
Processed batch 33/572
Processed batch 34/572
Processed batch 35/572
Processed batch 36/572
Processed batch 37/572
Processed batch 38/572
Processed batch 39/572
Processed batch 40/572
Proces

In [12]:
# Inference Example
def predict_emotion(model, image_path):
    model.eval()
    image = Image.open(image_path).convert('RGB')
    image = data_transforms['val'](image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)
        emotions = ['angry', 'fear', 'happy', 'sad']
        return emotions[predicted.item()]

# Test on a single image (uncomment to use)
print(predict_emotion(model, 'FYP DATA/TestImage/happyface.jpg'))

happy


# Emotion Detection Model (Non Facial)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from sklearn.metrics import classification_report # evaluate the performance
import timm
import numpy as np
from torch.optim.lr_scheduler import CosineAnnealingLR #Learning rate scheduler with cosine decay
from torch.cuda.amp import autocast, GradScaler # handle mixed precesion training, optimizing GPU memory and speed
from PIL import Image

# Enhanced preprocessing and augmentation using torchvision
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224), # crop and resize image
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(p=0.2),
    transforms.RandomRotation(15), 
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), #random color change
    transforms.RandomAffine(degrees=0, translate=(0.05, 0.05), scale=(0.95, 1.05)),
    transforms.RandomApply([transforms.GaussianBlur(3)], p=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=0.2)
])

# basic transformation for validation and testing
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load datasets
train_dataset = datasets.ImageFolder('DATA_FINE_TUNING(NonFacial)/train', transform=train_transform)
val_dataset = datasets.ImageFolder('DATA_FINE_TUNING(NonFacial)/val', transform=val_transform)
test_dataset = datasets.ImageFolder('DATA_FINE_TUNING(NonFacial)/test', transform=val_transform)

# Enhanced data loading (create loader)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)

# model architecture
class EmotionClassifier(nn.Module):
    def __init__(self, num_classes):
        super(EmotionClassifier, self).__init__()
        self.model = timm.create_model('efficientnet_b2', pretrained=True) 
        n_features = self.model.classifier.in_features
        
        self.model.classifier = nn.Sequential(
            nn.Dropout(0.1), #reduce overfitting
            nn.Linear(n_features, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Dropout(0.1),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        return self.model(x)

# Initialize model and training components
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = EmotionClassifier(num_classes=5).to(device)
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-5)
scheduler = CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-6) # gradually decrease lr during training
scaler = GradScaler()

def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=50):
    best_accuracy = 0.0
    patience = 5
    patience_counter = 0
    history = {'train_loss': [], 'train_acc': [], 'val_acc': []}
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad() # clear previous gradient
            
            with autocast():   # used form mixed-precision
                outputs = model(images)
                loss = criterion(outputs, labels)
            
            scaler.scale(loss).backward() #backpropagation 
            scaler.step(optimizer)
            scaler.update()#optimizer update
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            #every 20 step, loss and accuracy will be printed
            if i % 20 == 19:
                print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], '
                      f'Loss: {running_loss/20:.4f}, Acc: {100.*correct/total:.2f}%')
                running_loss = 0.0
        
        epoch_acc = correct / total
        history['train_loss'].append(running_loss / len(train_loader))
        history['train_acc'].append(epoch_acc)
        
        scheduler.step()
        
        # Validation(if validate accuracy increase, model state save)
        val_accuracy = validate_model(model, val_loader) 
        history['val_acc'].append(val_accuracy)
        print(f'Validation Accuracy: {val_accuracy:.4f}')
        
        # implement early stopping (if no improve acc, stop)
        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'accuracy': val_accuracy,
                'history': history
            }, 'Model/NonFacialEmotionModelV2.pth')
            patience_counter = 0
        else:
            patience_counter += 1
            
        if patience_counter >= patience:
            print("Early stopping triggered")
            break
    
    return history

def validate_model(model, loader, set_name="Validation"):
    model.eval()
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad(): #disable gradient calculation, speeding inference
        #for each batch, prediction and true labels are compared
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            with autocast():
                outputs = model(images)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    accuracy = correct / total
    print(f"{set_name} Classification Report:")
    print(classification_report(all_labels, all_preds, target_names=loader.dataset.classes))
    return accuracy

# Train the model
history = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler)

# Evaluate on test set
print("Evaluating on test set...")
test_accuracy = validate_model(model, test_loader, "Test")
print(f'Final Test Accuracy: {test_accuracy:.4f}')

# Improved inference function
def predict_emotion(image_path, model, device):
    model.eval()
    
    # Load and preprocess image
    image = Image.open(image_path).convert('RGB')
    image_tensor = val_transform(image).unsqueeze(0).to(device)
    
    with torch.no_grad():
        with autocast():
            outputs = model(image_tensor)
            probabilities = torch.softmax(outputs, dim=1)
            confidence, predicted = torch.max(probabilities, 1)
            
            # Get top-3 predictions
            top3_prob, top3_indices = torch.topk(probabilities, 3)
            top3_prob = top3_prob.squeeze().cpu().numpy()
            top3_indices = top3_indices.squeeze().cpu().numpy()
    
    return {
        'predicted_class': predicted.item(),
        'confidence': confidence.item(),
        'top3_predictions': [(train_dataset.classes[idx], prob) 
                           for idx, prob in zip(top3_indices, top3_prob)]
    }

# Example usage
def process_image(image_path):
    model.eval()
    results = predict_emotion(image_path, model, device)
    
    print(f"Predicted emotion: {train_dataset.classes[results['predicted_class']]}")
    print(f"Confidence: {results['confidence']:.2f}")
    print("\nTop 3 predictions:")
    for emotion, prob in results['top3_predictions']:
        print(f"{emotion}: {prob:.2f}")

# Test the model on an image
image_path = 'TestImage/happynoface.jpg'
process_image(image_path)

# Combine the two model (facial and non-facial model) with using joblib

In [14]:
import torch
from torch import nn
from torchvision import transforms
import numpy as np
from PIL import Image
from deepface import DeepFace
import cv2
from torch.cuda.amp import autocast
import joblib
import os
import timm
from timm import create_model  # For Swin Transformer

# Assuming EmotionClassifier is a custom class; replace if needed
class EmotionClassifier(nn.Module):
    def __init__(self, num_classes):
        super(EmotionClassifier, self).__init__()
        self.model = timm.create_model('efficientnet_b2', pretrained=True)
        n_features = self.model.classifier.in_features
        
        self.model.classifier = nn.Sequential(
            nn.Dropout(0.1),
            nn.Linear(n_features, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Dropout(0.1),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        return self.model(x)

class EmotionDetectionSystem:
    def __init__(self):
        # Use CPU for simplicity (consistent with your setup)
        self.device = torch.device("cpu")
        
        # Initialize transformations (same as training)
        self.val_transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        
        # Initialize models
        self.facial_model = None  # Now a PyTorch model
        self.general_model = None
        self.emotion_class_facial = ['angry', 'fear', 'happy', 'sad']  # Matches your Swin model
        self.emotion_class_general = ['angry', 'awe', 'fear', 'happy', 'sad']
        
    def load_models(self, facial_model_path='best_swin_emotion_model.pth',
                    general_model_path='FYP Final Version/Model/NonFacialEmotionModelV2.pth'):
        """Load both facial and general emotion models (both PyTorch)"""
        # Load facial emotion model (Swin Transformer)
        self.facial_model = create_model('swin_base_patch4_window7_224', pretrained=False, num_classes=4)
        self.facial_model.to(self.device)
        checkpoint = torch.load(facial_model_path, map_location=torch.device('cpu'))
        self.facial_model.load_state_dict(checkpoint)  # Direct state_dict loading
        self.facial_model.eval()
        print(f"Loaded facial model from {facial_model_path}")

        # Load general emotion model (unchanged)
        self.general_model = EmotionClassifier(num_classes=5)
        self.general_model.to(self.device)
        checkpoint = torch.load(general_model_path, map_location=torch.device('cpu'))
        self.general_model.load_state_dict(checkpoint['model_state_dict'])
        self.general_model.eval()
        print(f"Loaded general model from {general_model_path}")
        
    def save_system(self, save_path='Model/Emotion_Detection_System.joblib'):
        """Save the entire system using joblib"""
        system_state = {
            'facial_model_state': self.facial_model.state_dict(),
            'general_model_state': self.general_model.state_dict(),
            'emotion_class_facial': self.emotion_class_facial,
            'emotion_class_general': self.emotion_class_general,
            'transform_state': None  # val_transform doesn’t have state_dict
        }
        joblib.dump(system_state, save_path)
        print(f"System saved to {save_path}")
        
    def load_system(self, load_path='Emotion_Detection_System.joblib'):
        """Load the entire system from joblib"""
        system_state = joblib.load(load_path)
        
        # Recreate and load facial model
        self.facial_model = create_model('swin_base_patch4_window7_224', pretrained=False, num_classes=4)
        self.facial_model.to(self.device)
        cpu_state_dict = {k: v.to('cpu') if v.is_cuda else v for k, v in system_state['facial_model_state'].items()}
        self.facial_model.load_state_dict(cpu_state_dict)
        self.facial_model.eval()
        
        # Recreate and load general model
        self.general_model = EmotionClassifier(num_classes=5)
        self.general_model.to(self.device)
        cpu_state_dict = {k: v.to('cpu') if v.is_cuda else v for k, v in system_state['general_model_state'].items()}
        self.general_model.load_state_dict(cpu_state_dict)
        self.general_model.eval()
        
        # Load other attributes
        self.emotion_class_facial = system_state['emotion_class_facial']
        self.emotion_class_general = system_state['emotion_class_general']
        
        print("System loaded successfully")

    def detect_face_emotion(self, image_path):
        """Detect emotion using the facial model (PyTorch Swin Transformer)"""
        image = Image.open(image_path).convert('RGB')
        image_tensor = self.val_transform(image).unsqueeze(0).to(self.device)
        
        with torch.no_grad():
            with autocast(enabled=False):  # No CUDA, so autocast is unnecessary but kept for consistency
                outputs = self.facial_model(image_tensor)
                probabilities = torch.softmax(outputs, dim=1)
                confidence, predicted = torch.max(probabilities, 1)
        
        return self.emotion_class_facial[predicted.item()], confidence.item()

    def detect_general_emotion(self, image_path):
        """Detect emotion using general model (unchanged)"""
        image = Image.open(image_path).convert('RGB')
        image_tensor = self.val_transform(image).unsqueeze(0).to(self.device)
        
        with torch.no_grad():
            with autocast(enabled=False):
                outputs = self.general_model(image_tensor)
                probabilities = torch.softmax(outputs, dim=1)
                confidence, predicted = torch.max(probabilities, 1)
        
        return self.emotion_class_general[predicted.item()], confidence.item()

    def is_faceV2(self, image_path):
        """Check if the image contains a face using DeepFace (unchanged)"""
        try:
            result = DeepFace.extract_faces(image_path, detector_backend='ssd')
            return len(result) > 0
        except:
            return False

    def predict_emotion(self, image_path):
        """Main prediction method combining both approaches"""
        has_face = self.is_faceV2(image_path)
        
        if has_face:
            emotion, confidence = self.detect_face_emotion(image_path)
            return {
                'type': 'facial',
                'emotion': emotion,
                'confidence': float(confidence)
            }
        else:
            emotion, confidence = self.detect_general_emotion(image_path)
            return {
                'type': 'general',
                'emotion': emotion,
                'confidence': float(confidence)
            }

# Example usage
if __name__ == "__main__":
    system = EmotionDetectionSystem()
    system.load_models(facial_model_path='best_swin_emotion_model.pth',
                      general_model_path='FYP Final Version/Model/NonFacialEmotionModelV2.pth')
    
    # Test prediction
    image_path = "FYP DATA/TestImage/sadface3.jpg"
    result = system.predict_emotion(image_path)
    print(f"Prediction: {result}")

Loaded facial model from best_swin_emotion_model.pth
Loaded general model from FYP Final Version/Model/NonFacialEmotionModelV2.pth
Prediction: {'type': 'facial', 'emotion': 'sad', 'confidence': 0.9969106316566467}


In [3]:
import torch
import tensorflow as tf
from torch import nn
from torchvision import transforms
import numpy as np
from PIL import Image
from deepface import DeepFace
import cv2
from torch.cuda.amp import autocast
import joblib
import os
from mtcnn import MTCNN
import timm
#from EmotionClassifier import EmotionClassifier

# Assuming EmotionClassifier is a custom class; replace if needed
class EmotionClassifier(nn.Module):
    def __init__(self, num_classes):
        super(EmotionClassifier, self).__init__()
        self.model = timm.create_model('efficientnet_b2', pretrained=True)
        n_features = self.model.classifier.in_features
        
        self.model.classifier = nn.Sequential(
            nn.Dropout(0.1),
            nn.Linear(n_features, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Dropout(0.1),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        return self.model(x)
    

class EmotionDetectionSystem:
    def __init__(self):
        # Always use CPU in Docker/Streamlit Cloud for simplicity
        self.device = torch.device("cpu")  # Force CPU usage
        
        # Initialize transformations
        self.val_transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        
        # Initialize models
        self.facial_model = None
        self.general_model = None
        self.emotion_class_facial = ['angry', 'fear', 'happy', 'sad']
        self.emotion_class_general = ['angry', 'awe', 'fear', 'happy', 'sad']
        
    def load_models(self, facial_model_path='Model/FacialEmotionModel.h5',
                    general_model_path='Model/NonFacialEmotionModelV2.pth'):
        """Load both facial and general emotion models"""
        # Load facial emotion model (TensorFlow/Keras)
        self.facial_model = tf.keras.models.load_model(facial_model_path)
        
        # Load general emotion model (PyTorch)
        self.general_model = EmotionClassifier(num_classes=5)
        self.general_model.to(self.device)
        # Force loading to CPU regardless of where it was saved
        checkpoint = torch.load(general_model_path, map_location=torch.device('cpu'))
        self.general_model.load_state_dict(checkpoint['model_state_dict'])
        self.general_model.eval()
        
    def save_system(self, save_path='Model/Emotion_Detection_System.joblib'):
        """Save the entire system using joblib"""
        system_state = {
            'facial_model_weights': self.facial_model.get_weights(),
            'general_model_state': self.general_model.state_dict(),
            'emotion_class_facial': self.emotion_class_facial,
            'emotion_class_general': self.emotion_class_general,
            'transform_state': self.val_transform.state_dict() if hasattr(self.val_transform, 'state_dict') else None
        }
        joblib.dump(system_state, save_path)
        print(f"System saved to {save_path}")
        
    def load_system(self, load_path='Model/Emotion_Detection_System.joblib'):
        """Load the entire system from joblib"""
        system_state = joblib.load(load_path)
        
        # Recreate and load facial model
        self.facial_model = tf.keras.models.load_model('Model/FacialEmotionModel.h5')
        self.facial_model.set_weights(system_state['facial_model_weights'])
        
        # Recreate and load general model
        self.general_model = EmotionClassifier(num_classes=5)
        self.general_model.to(self.device)
        
        # Remap the general_model_state to CPU
        state_dict = system_state['general_model_state']
        # Convert CUDA tensors to CPU if necessary
        cpu_state_dict = {k: v.to('cpu') if v.is_cuda else v for k, v in state_dict.items()}
        self.general_model.load_state_dict(cpu_state_dict)
        self.general_model.eval()
        
        # Load other attributes
        self.emotion_class_facial = system_state['emotion_class_facial']
        self.emotion_class_general = system_state['emotion_class_general']
        
        print("System loaded successfully")

    def detect_face_emotion(self, image_path):
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB as done previously
        img = cv2.resize(img, (160, 160)) / 255.0
        predictions = self.facial_model.predict(np.expand_dims(img, axis=0))
        emotion = np.argmax(predictions)
        return self.emotion_class_facial[emotion]

    def detect_general_emotion(self, image_path):
        """Detect emotion using general model"""
        image = Image.open(image_path).convert('RGB')
        image_tensor = self.val_transform(image).unsqueeze(0).to(self.device)
        
        with torch.no_grad():
            with autocast(enabled=False):  # Disable autocast since no CUDA
                outputs = self.general_model(image_tensor)
                probabilities = torch.softmax(outputs, dim=1)
                confidence, predicted = torch.max(probabilities, 1)
        
        return self.emotion_class_general[predicted.item()], confidence.item()

    def is_face(self, image_path):
        """Check if the image contains a face using MTCNN"""
        try:
            detector = MTCNN()
            image = Image.open(image_path).convert('RGB')
            faces = detector.detect_faces(np.array(image))
            return len(faces) > 0
        except Exception as e:
            print(f"Error in Face Detection: {str(e)}")
            return False
        
    def is_faceV2(self, image_path):
        try:
            # 使用 DeepFace 检测人脸
            result = DeepFace.extract_faces(image_path, detector_backend='ssd')
            return len(result) > 0
        except:
            return False

    def predict_emotion(self, image_path):
        """Main prediction method combining both approaches"""
        has_face = self.is_faceV2(image_path)
        
        if has_face:
            emotion = self.detect_face_emotion(image_path)
            return {
                'type': 'facial',
                'emotion': emotion,
                'confidence': None  # Facial model doesn't provide confidence
            }
        else:
            emotion, confidence = self.detect_general_emotion(image_path)
            return {
                'type': 'general',
                'emotion': emotion,
                'confidence': float(confidence)
            }
        
# Example usage
if __name__ == "__main__":
    system = EmotionDetectionSystem()
    system.load_models(facial_model_path='FYP Final Version/Model/FacialEmotionModel.h5',
                      general_model_path='FYP Final Version/Model/NonFacialEmotionModelV2.pth')
    
    # Test prediction
    image_path = "FYP DATA/TestImage/sadface3.jpg"
    result = system.predict_emotion(image_path)
    print(f"Prediction: {result}")

Prediction: {'type': 'facial', 'emotion': 'happy', 'confidence': None}
