In [None]:
import PIL.Image as Image
import pandas as pd
import numpy as np
import glob
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
from efficientnet_pytorch import EfficientNet

epochs = 7
batch_size_train = 32
batch_size_test = 64
lr = 0.001
weight_decay = 0.001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

transform = T.Compose([
    T.Grayscale(num_output_channels=3),  # Convert to RGB
    T.ToTensor(),
    T.Resize([224, 224]),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


class MyDataset(Dataset):
    def __init__(self, transform=None, string="train"):
        self.imgs_path = "/kaggle/input/face-expression-recognition-dataset/images/" + string + "/"
        file_list = glob.glob(self.imgs_path + "*")
        self.data = []
        for class_path in file_list:
            class_name = class_path.split("/")[-1]
            for img_path in glob.glob(class_path + "/*.jpg"):
                self.data.append([img_path, class_name])
        self.class_map = {"angry": 0, "disgust": 1, "fear": 2, "happy": 3, "neutral": 4, "sad": 5, "surprise": 6}
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, class_name = self.data[idx]
        img = Image.open(img_path)
        class_id = self.class_map[class_name]
        class_id = torch.tensor(class_id)
        if self.transform:
            img = self.transform(img)
        return img, class_id

def get_model(num_classes=7):
    model = EfficientNet.from_pretrained('efficientnet-b0')
    in_features = model._fc.in_features
    model._fc = nn.Linear(in_features, num_classes)
    return model

def train(model, train_loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = 100. * correct / total
    return train_acc, train_loss

def test(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    test_loss = running_loss / len(test_loader)
    test_acc = 100. * correct / total
    return test_acc, test_loss

train_dataset = MyDataset(transform, "train")
train_loader = DataLoader(train_dataset, batch_size=batch_size_train, shuffle=True)
test_dataset = MyDataset(transform, "validation")
test_loader = DataLoader(test_dataset, batch_size=batch_size_test, shuffle=False)

model = get_model(num_classes=7).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

for epoch in range(1, epochs + 1):
    print(f"Epoch: {epoch}/{epochs}")
    train_acc, train_loss = train(model, train_loader, optimizer, criterion, device)
    print("\tTraining Loss: {:.4f} | Accuracy: {:.2f}%".format(train_loss, train_acc))

    test_acc, test_loss = test(model, test_loader, criterion, device)
    print("\tValidation Loss: {:.4f} | Accuracy: {:.2f}%".format(test_loss, test_acc))

### Camera Input + Face Detection 

In [1]:
import cv2
import torch
import torch.nn.functional as F
from torchvision.transforms.functional import normalize
from torchvision import transforms as T
import PIL.Image as Image
import pandas as pd
import glob
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms as T

# Load the pre-trained model
model = torch.load("model_efficient_2.pt", map_location=torch.device('cpu'))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# Function to process each frame and detect emotions
def detect_emotion(face_image):
    # Convert NumPy array (OpenCV image) to PIL Image
    pil_image = Image.fromarray(face_image)
    
    # Pre-process the face image
    transform = T.Compose([
        T.Grayscale(num_output_channels=3),
        T.Resize([224, 224]),
        T.ToTensor(),
        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    image_tensor = transform(pil_image).unsqueeze(0).to(device)
    
    # Predict emotion
    with torch.no_grad():
        outputs = F.softmax(model(image_tensor), dim=1)
    
    probabilities, predicted = torch.max(outputs, 1)
    emotion = classes[predicted.item()]
    
    return emotion, probabilities.item()

# Load the pre-defined class labels
classes = ["angry", "disgust", "fear", "happy", "neutral", "sad", "surprise"]

# Initialize variables for storing detected emotions
detected_emotions = []

# Open the built-in camera
cap = cv2.VideoCapture(0)  # Use default camera (0)

# Get the camera frame properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = 30  # Adjust according to your camera

# Define the codec and create VideoWriter object
output_video_path = "output_video.avi"
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

# Initialize face detector
face_detector = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

# Process each frame from the camera
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # Convert frame to grayscale for face detection
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Detect faces in the frame
    faces = face_detector.detectMultiScale(gray_frame, 1.1, 4)
    
    # Process each detected face
    for (x, y, w, h) in faces:
        # Extract the face region from the frame
        face_image = frame[y:y+h, x:x+w]
        
        # Detect emotion for the face image
        emotion, probability = detect_emotion(face_image)
        
        # Display emotion and probability in the corner of the face bounding box
        cv2.putText(frame, f"{emotion} - {probability:.2f}", (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA)
        
        # Draw a rectangle around the face
        cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
    
    # Write the frame into the output video
    out.write(frame)
    
    # Display the frame
    cv2.imshow('Frame', frame)
    
    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release video capture and writer
cap.release()
out.release()
cv2.destroyAllWindows()

### Video Input + Face Detection

In [3]:
import cv2
import torch
import torch.nn.functional as F
from torchvision.transforms.functional import normalize
from torchvision import transforms as T

# Load the pre-trained model
model = torch.load("model_efficient_2.pt", map_location=torch.device('cpu'))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# Load the pre-defined class labels
classes = ["angry", "disgust", "fear", "happy", "neutral", "sad", "surprise"]

def detect_emotion(face_image):
    # Convert NumPy array (OpenCV image) to PIL Image
    pil_image = Image.fromarray(face_image)
    
    # Pre-process the face image
    transform = T.Compose([
        T.Grayscale(num_output_channels=3),
        T.Resize([224, 224]),
        T.ToTensor(),
        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    image_tensor = transform(pil_image).unsqueeze(0).to(device)
    
    # Predict emotion
    with torch.no_grad():
        outputs = F.softmax(model(image_tensor), dim=1)
    
    probabilities, predicted = torch.max(outputs, 1)
    emotion = classes[predicted.item()]
    
    return emotion, probabilities.item()

# Initialize variables for storing detected emotions
detected_emotions = []

# Initialize face detector
face_detector1 = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

# Open the input video
input_video_path = "Angry_Video.mp4"
cap = cv2.VideoCapture(input_video_path)

# Get video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Define the codec and create VideoWriter object
output_video_path = "output_video.avi"
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter(output_video_path, fourcc, fps*8, (frame_width, frame_height))

# Process each frame of the video
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # Convert frame to grayscale for face detection
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Detect faces in the frame
    faces = face_detector1.detectMultiScale(gray, 1.1, 4)
    
    # Process each detected face
    for (x, y, w, h) in faces:
        # Draw a rectangle around the face
        cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
        
        # Extract the face region from the frame
        face_image = frame[y:y+h, x:x+w]
        
        # Detect emotion for the face image
        emotion, probability = detect_emotion(face_image)
        
        # Display emotion and probability in the corner of the face bounding box
        cv2.putText(frame, f"{emotion} - {probability:.2f}", (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA)
        
        # Store detected emotions
        detected_emotions.append(emotion)
    
    # Write the frame into the output video
    out.write(frame)

    # Display the frame
    cv2.imshow('Frame', frame)
    
    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release video capture and writer
cap.release()
out.release()
cv2.destroyAllWindows()

# Display detected emotions at the end of the video
print("Detected Emotions:")
for emotion in detected_emotions:
    print(emotion)

# Print the most occurred emotion
most_occurred_emotion = max(set(detected_emotions), key=detected_emotions.count)
print("Predominant emotion:", most_occurred_emotion)


Detected Emotions:
neutral
neutral
sad
sad
neutral
neutral
sad
sad
sad
sad
fear
fear
fear
fear
sad
fear
sad
fear
fear
fear
fear
fear
fear
fear
fear
fear
sad
sad
sad
sad
neutral
neutral
neutral
neutral
neutral
neutral
neutral
neutral
neutral
neutral
neutral
neutral
neutral
neutral
neutral
neutral
neutral
neutral
neutral
neutral
sad
sad
sad
sad
fear
sad
fear
fear
fear
sad
sad
sad
sad
sad
sad
sad
sad
sad
neutral
neutral
neutral
neutral
neutral
neutral
surprise
fear
sad
fear
fear
fear
fear
fear
fear
sad
fear
fear
fear
fear
fear
fear
fear
fear
fear
fear
fear
fear
fear
sad
sad
fear
fear
fear
fear
sad
fear
sad
sad
sad
Predominant emotion: fear


### Image Input + Face Detection

In [1]:
import cv2
import torch
import torch.nn.functional as F
from torchvision.transforms.functional import normalize
from torchvision import transforms as T

# Load the pre-trained model
model = torch.load("model_efficient_2.pt", map_location=torch.device('cpu'))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# Load the pre-defined class labels
classes = ["angry", "disgust", "fear", "happy", "neutral", "sad", "surprise"]

# Function to detect emotion in the input image
def detect_emotion(image_path, output_path):
    # Load the input image
    frame = cv2.imread(image_path)
    
    # Convert the image to grayscale for face detection
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Detect faces in the image
    face_detector = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
    faces = face_detector.detectMultiScale(gray, 1.1, 4)
    
    # Process each detected face
    for (x, y, w, h) in faces:
        # Extract the face region from the image
        face_image = frame[y:y+h, x:x+w]
        
        # Pre-process the face image
        transform = T.Compose([
            T.ToPILImage(),
            T.Grayscale(num_output_channels=3),
            T.Resize([224, 224]),
            T.ToTensor(),
            T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        image_tensor = transform(face_image).unsqueeze(0).to(device)
        
        # Predict emotion
        with torch.no_grad():
            outputs = F.softmax(model(image_tensor), dim=1)
        
        probabilities, predicted = torch.max(outputs, 1)
        emotion = classes[predicted.item()]
        
        # Display emotion and probability on the image
        cv2.putText(frame, f"{emotion} - {probabilities.item():.2f}", (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA)
        
        # Draw a rectangle around the face
        cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
    
    # Save the output image
    cv2.imwrite(output_path, frame)
    print(f"Output image saved at {output_path}")

# Input image path
input_image_path = "Harry_sad.jpg"

# Output image path
output_image_path = "output_image.jpg"

# Detect emotion in the input image and save the output image
detect_emotion(input_image_path, output_image_path)

Output image saved at output_image.jpg
