In [1]:
import cv2
import os
import torch
import torch.nn as nn
from torchvision import models
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

In [1]:
import os

# Path to the frames directory
frames_directory = r"C:\Users\dell\Desktop\gradproject\AI04-Grad\datasets\CREMA-D\frames"

# Count the number of files in the directory
def count_frames(directory):
    frame_count = 0
    for root, dirs, files in os.walk(directory):
        frame_count += len(files)  # Add the number of files in each subdirectory
    return frame_count

# Call the function
total_frames = count_frames(frames_directory)
print(f"Total number of frames: {total_frames}")


Total number of frames: 564916


In [5]:
# Path to the CREMA-D videos and frames
video_dir = r"C:\Users\dell\Desktop\gradproject\AI04-Grad\datasets\CREMA-D\crema-d videos"
frame_dir = r"C:\Users\dell\Desktop\gradproject\AI04-Grad\datasets\CREMA-D\frames"

# Extract frames
def extract_frames(video_dir, frame_dir):
    os.makedirs(frame_dir, exist_ok=True)
    for video_file in os.listdir(video_dir):
        if video_file.endswith(".mp4"):
            video_path = os.path.join(video_dir, video_file)
            video_name = os.path.splitext(video_file)[0]
            output_folder = os.path.join(frame_dir, video_name)
            os.makedirs(output_folder, exist_ok=True)

            cap = cv2.VideoCapture(video_path)
            frame_count = 0
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                frame_path = os.path.join(output_folder, f"frame_{frame_count:04d}.jpg")
                cv2.imwrite(frame_path, frame)
                frame_count += 1

            cap.release()
            print(f"Extracted {frame_count} frames from {video_file}")

# Call the function to extract frames
extract_frames(video_dir, frame_dir)


Extracted 77 frames from 1002_DFA_ANG_XX.mp4
Extracted 76 frames from 1002_DFA_DIS_XX.mp4
Extracted 82 frames from 1002_DFA_FEA_XX.mp4
Extracted 66 frames from 1002_DFA_HAP_XX.mp4
Extracted 75 frames from 1002_DFA_NEU_XX.mp4
Extracted 71 frames from 1002_DFA_SAD_XX.mp4
Extracted 82 frames from 1002_IEO_ANG_HI.mp4
Extracted 72 frames from 1002_IEO_ANG_LO.mp4
Extracted 83 frames from 1002_IEO_ANG_MD.mp4
Extracted 90 frames from 1002_IEO_DIS_HI.mp4
Extracted 86 frames from 1002_IEO_DIS_LO.mp4
Extracted 78 frames from 1002_IEO_DIS_MD.mp4
Extracted 96 frames from 1002_IEO_FEA_HI.mp4
Extracted 78 frames from 1002_IEO_FEA_LO.mp4
Extracted 58 frames from 1002_IEO_FEA_MD.mp4
Extracted 75 frames from 1002_IEO_HAP_HI.mp4
Extracted 85 frames from 1002_IEO_HAP_LO.mp4
Extracted 68 frames from 1002_IEO_HAP_MD.mp4
Extracted 77 frames from 1002_IEO_NEU_XX.mp4
Extracted 82 frames from 1002_IEO_SAD_HI.mp4
Extracted 68 frames from 1002_IEO_SAD_LO.mp4
Extracted 71 frames from 1002_IEO_SAD_MD.mp4
Extracted 

In [2]:
class CREMADataset(Dataset):
    def __init__(self, frame_dir, transform=None):
        self.frame_dir = frame_dir
        self.transform = transform
        self.data = []

        for video_folder in os.listdir(frame_dir):
            video_path = os.path.join(frame_dir, video_folder)
            for frame_file in os.listdir(video_path):
                if frame_file.endswith(".jpg"):
                    frame_path = os.path.join(video_path, frame_file)
                    label = self.get_label_from_video(video_folder)
                    self.data.append((frame_path, label))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        frame_path, label = self.data[idx]
        image = Image.open(frame_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

    def get_label_from_video(self, video_name):
        # Extract the emotion label from the video name (e.g., "1002_DFA_ANG_XX.mp4")
        emotion_code = video_name.split('_')[2]
        emotion_map = {"ANG": 0, "DIS": 1, "FEA": 2, "HAP": 3, "NEU": 4, "SAD": 5, "SUR": 6}
        return emotion_map.get(emotion_code, -1)  # Return -1 if emotion is not found


In [4]:
# Define the frame directory again in the same script
frame_dir = r"C:\Users\dell\Desktop\gradproject\AI04-Grad\datasets\CREMA-D\frames"

# Continue with your dataset and DataLoader creation
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset, DataLoader
from torchvision import transforms

# Transformations for images
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create dataset
dataset = CREMADataset(frame_dir=frame_dir, transform=transform)

# Split indices
indices = list(range(len(dataset)))
train_indices, val_test_indices = train_test_split(indices, test_size=0.4, random_state=42)
val_indices, test_indices = train_test_split(val_test_indices, test_size=0.5, random_state=42)

# Create subsets
train_subset = Subset(dataset, train_indices)
val_subset = Subset(dataset, val_indices)
test_subset = Subset(dataset, test_indices)

# Create DataLoaders
train_loader = DataLoader(train_subset, batch_size=16, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_subset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_subset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)


In [16]:
def pretrained_emotion_model(model_type, num_classes, device):
    if model_type == "resnet":
        model = models.resnet50(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, num_classes)
    elif model_type == "mobilenet":
        model = models.mobilenet_v2(pretrained=True)
        model.classifier[1] = nn.Linear(model.last_channel, num_classes)
    else:
        raise ValueError("Invalid model_type. Choose 'resnet' or 'mobilenet'.")

    return model.to(device)

# Load model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = pretrained_emotion_model("resnet", num_classes=7, device=device)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\dell/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:50<00:00, 2.04MB/s]


In [17]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [None]:
import torch
import os
from tqdm import tqdm

# Define the path to save checkpoints
checkpoint_dir = "./checkpoints"
os.makedirs(checkpoint_dir, exist_ok=True)

# Function to save a checkpoint
def save_checkpoint(epoch, model, optimizer, val_loss, checkpoint_dir):
    checkpoint_path = os.path.join(checkpoint_dir, f"model_epoch_{epoch+1}.pth")
    torch.save({
        'epoch': epoch + 1,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'val_loss': val_loss
    }, checkpoint_path)
    print(f"Checkpoint saved at {checkpoint_path}")

# Training loop with checkpoint saving
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        images, labels = images.to(device), labels.to(device)

        # Zero gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            val_loss += criterion(outputs, labels).item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    val_accuracy = correct / total
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}, Val Accuracy: {val_accuracy*100:.2f}%")

    # Save checkpoint
    save_checkpoint(epoch, model, optimizer, val_loss/len(val_loader), checkpoint_dir)


Epoch 1/10:   0%|          | 0/21185 [00:00<?, ?it/s]

In [None]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

test_accuracy = correct / total
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


In [None]:
torch.save(model.state_dict(), "emotion_recognition_model.pth")


In [None]:
def predict_emotion(model, image_path, transform, device):
    model.eval()
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)

    emotion_map = {0: "Angry", 1: "Disgust", 2: "Fear", 3: "Happy", 4: "Neutral", 5: "Sad", 6: "Surprise"}
    return emotion_map[predicted.item()]

# Example usage
image_path = "path_to_frame.jpg"  # Replace with actual frame path
predicted_emotion = predict_emotion(model, image_path, transform, device)
print(f"Predicted Emotion: {predicted_emotion}")
