Recognizes the reference face from the live video (Load reference images from the collected references) (withoput the emotion recognmition)

In [None]:
import cv2
import torch
import numpy as np
import os
from PIL import Image
from facenet_pytorch import MTCNN, InceptionResnetV1
from scipy.spatial.distance import cosine


#  CONFIGURATIONS & LOADING MODELS
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#  Load MTCNN for Face Detection (WITH PADDING)
mtcnn = MTCNN(keep_all=True, device=device)

#  Load Face Recognition Model (FaceNet)
facenet = InceptionResnetV1(pretrained="casia-webface").eval().to(device)

#  Reference Face Directory
reference_folder = "./reference-face-frames-collect"

#  Padding for better cropping
padding_ratio = 0.3  # extra padding to include more face details

#  Higher similarity threshold for better recognition
similarity_threshold = 0.6  # Increase this if false negatives happen


#  LOAD REFERENCE IMAGES & COMPUTE AVERAGE EMBEDDING

def get_face_embedding(image):
    img_cropped = mtcnn(image)
    if img_cropped is None:
        return None
    if img_cropped.ndim == 3:
        img_cropped = img_cropped.unsqueeze(0)
    img_cropped = img_cropped.to(device)
    embedding = facenet(img_cropped).detach().cpu().numpy().flatten()
    return embedding if embedding.shape[0] == 512 else None

def load_reference_embeddings(reference_folder):
    embeddings = []
    for file in os.listdir(reference_folder):
        if file.lower().endswith((".jpg", ".jpeg", ".png")):
            image_path = os.path.join(reference_folder, file)
            image = Image.open(image_path)
            embedding = get_face_embedding(image)
            if embedding is not None:
                embeddings.append(embedding)

    if len(embeddings) == 0:
        return None
    
    #  Compute the average embedding
    avg_embedding = np.mean(embeddings, axis=0)
    return avg_embedding

#  Load reference embeddings
reference_embedding = load_reference_embeddings(reference_folder)

if reference_embedding is None:
    print("No valid reference faces found! Exiting...")
    exit()
else:
    print(f" Loaded reference embeddings (Averaged).")


#  LIVE WEBCAM FACE RECOGNITION (WITH IMPROVED ACCURACY)

def recognize_face(face_image):
    face_embedding = get_face_embedding(Image.fromarray(face_image))
    if face_embedding is None:
        return False, -1

    #  Compute direct cosine similarity
    similarity = 1 - cosine(face_embedding, reference_embedding)
    
    return similarity > similarity_threshold, similarity


#  LIVE WEBCAM DETECTION & DISPLAY
def run_live_recognition():
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: Cannot access webcam.")
        return

    print("Starting real-time face recognition...")

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Cannot read frame from webcam.")
            break

        frame_height, frame_width, _ = frame.shape
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        boxes, _ = mtcnn.detect(rgb_frame)

        if boxes is not None:
            for box in boxes:
                x1, y1, x2, y2 = [int(coord) for coord in box]

                #  Add padding for better recognition
                box_width = x2 - x1
                box_height = y2 - y1

                pad_w = int(box_width * padding_ratio)
                pad_h = int(box_height * padding_ratio)

                x1 = max(0, x1 - pad_w)
                y1 = max(0, y1 - pad_h)
                x2 = min(frame_width, x2 + pad_w)
                y2 = min(frame_height, y2 + pad_h)

                # Extract face
                face_image = frame[y1:y2, x1:x2]

                if face_image.shape[0] > 0 and face_image.shape[1] > 0:
                    is_match, similarity = recognize_face(face_image)

                    if is_match:
                        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                        label = f"YOU ({similarity:.2f})"
                        cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                    else:
                        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
                        cv2.putText(frame, "Not You", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

        cv2.imshow("Live Face Recognition", frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            print("Quitting...")
            break

    cap.release()
    cv2.destroyAllWindows()

#  Run the application
if __name__ == "__main__":
    run_live_recognition()


✅ Loaded reference embeddings (Averaged).
🚀 Starting real-time face recognition...


KeyboardInterrupt: 

: 