Recognizes the reference face from the live video (one by one reference images)

In [None]:
import cv2
import torch
import torch.nn as nn
import numpy as np
from torchvision import transforms, models
from PIL import Image
from facenet_pytorch import MTCNN, InceptionResnetV1
from scipy.spatial.distance import cosine


#  CONFIGURATIONS & LOADING MODELS
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#  Load MTCNN for Face Detection
mtcnn = MTCNN(keep_all=True, device=device)

#  Load Face Recognition Model (FaceNet)
facenet = InceptionResnetV1(pretrained="casia-webface").eval().to(device)

#  Load Fine-tuned Emotion Detection Model (EfficientNet-B2)
emotion_classes = ["Anger", "Disgust", "Fear", "Happy", "Neutral", "Sad", "Surprise"]
model_save_path = "efficientnet_b2_emotion_model.pth"

def load_emotion_model(model_path):
    print("Loading emotion detection model...")
    model = models.efficientnet_b2(pretrained=False)
    model.features[0][0] = nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False)
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.4),  
        nn.Linear(model.classifier[1].in_features, len(emotion_classes)),
    )
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    model.eval()
    print(" Emotion model loaded!")
    return model

emotion_model = load_emotion_model(model_save_path)


#  LOAD REFERENCE FACE (YOUR FACE)

def get_face_embedding(image):
    img_cropped = mtcnn(image)
    if img_cropped is None:
        return None
    if img_cropped.ndim == 3:
        img_cropped = img_cropped.unsqueeze(0)
    img_cropped = img_cropped.to(device)
    embedding = facenet(img_cropped).detach().cpu().numpy().flatten()
    return embedding if embedding.shape[0] == 512 else None

def load_reference_face(reference_images):
    embeddings = []
    for image_path in reference_images:
        image = Image.open(image_path)
        embedding = get_face_embedding(image)
        if embedding is not None:
            embeddings.append(embedding)
    return embeddings if embeddings else None

#  Load YOUR reference images
reference_images = [
    "./face-resources/me1.jpg",
    "./face-resources/me2.jpg",
    "./face-resources/me3.jpg",
    "./face-resources/me4.jpg",
    "./face-resources/me5.jpg"
]
reference_embeddings = load_reference_face(reference_images)

if reference_embeddings is None:
    print("No valid reference face found! Exiting...")
    exit()
else:
    print(f" Loaded {len(reference_embeddings)} reference embeddings!")


#  IMAGE PROCESSING FUNCTIONS

def preprocess_face(face_image):
    transform = transforms.Compose([
        transforms.Resize((260, 260)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    pil_image = Image.fromarray(cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB))
    return transform(pil_image).unsqueeze(0).to(device)

def predict_emotion(face_image):
    processed_image = preprocess_face(face_image)
    with torch.no_grad():
        outputs = emotion_model(processed_image)
        probabilities = torch.softmax(outputs, dim=1)
        predicted_class_idx = torch.argmax(probabilities, dim=1).item()
        return emotion_classes[predicted_class_idx], probabilities[0, predicted_class_idx].item()


#  LIVE WEBCAM GUI FUNCTION

quit_program = False
quit_button_coords = (10, 10, 100, 50)

def check_quit_click(event, x, y, flags, param):
    global quit_program
    if event == cv2.EVENT_LBUTTONDOWN:
        if quit_button_coords[0] <= x <= quit_button_coords[2] and quit_button_coords[1] <= y <= quit_button_coords[3]:
            quit_program = True

def recognize_face(face_image):
    face_embedding = get_face_embedding(Image.fromarray(face_image))
    if face_embedding is None:
        return False, -1
    best_similarity = -1
    for ref_embedding in reference_embeddings:
        similarity = 1 - cosine(face_embedding, ref_embedding)
        if similarity > best_similarity:
            best_similarity = similarity
    return best_similarity > 0.5, best_similarity


#  LIVE WEBCAM EMOTION DETECTION (ONLY FOR YOUR FACE)

def run_live_recognition():
    global quit_program

    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: Cannot access webcam.")
        return

    cv2.namedWindow("Face Recognition & Emotion Detection")
    cv2.setMouseCallback("Face Recognition & Emotion Detection", check_quit_click)

    while not quit_program:
        ret, frame = cap.read()
        if not ret:
            print("Error: Cannot read frame from webcam.")
            break

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        boxes, _ = mtcnn.detect(rgb_frame)

        if boxes is not None:
            for box in boxes:
                x1, y1, x2, y2 = [int(coord) for coord in box]
                face_image = frame[y1:y2, x1:x2]

                # Ensure face is valid
                if face_image.shape[0] > 0 and face_image.shape[1] > 0:
                    is_match, similarity = recognize_face(face_image)

                    if is_match:
                        emotion, confidence = predict_emotion(face_image)
                        print(f"Emotion: {emotion} (Confidence: {confidence:.2f})")

                        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                        label = f"YOU - {emotion} ({confidence:.2f})"
                        cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    else:
                        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
                        cv2.putText(frame, "Not You", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

        # Quit button
        cv2.rectangle(frame, quit_button_coords[:2], quit_button_coords[2:], (0, 0, 255), -1)
        cv2.putText(frame, "Quit", (quit_button_coords[0] + 10, quit_button_coords[1] + 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

        cv2.imshow("Face Recognition & Emotion Detection", frame)

        if cv2.waitKey(1) & 0xFF == ord('q') or quit_program:
            print("Quitting...")
            break

    cap.release()
    cv2.destroyAllWindows()

#  Run the application
if __name__ == "__main__":
    run_live_recognition()

Loading emotion detection model...


  model.load_state_dict(torch.load(model_path, map_location=device))


✅ Emotion model loaded!
✅ Loaded 5 reference embeddings!
🎭 Emotion: Neutral (Confidence: 0.99)
🎭 Emotion: Neutral (Confidence: 0.99)
🎭 Emotion: Neutral (Confidence: 0.99)
🎭 Emotion: Neutral (Confidence: 0.97)
🎭 Emotion: Neutral (Confidence: 0.99)
🛑 Quitting...
