Recognizes the reference face from the live video (Load reference images from the collected references) (withoput the emotion recognmition)

In [1]:
import cv2
import torch
import numpy as np
import os
from torchvision import transforms, models
from PIL import Image
from facenet_pytorch import MTCNN, InceptionResnetV1
from scipy.spatial.distance import cosine

# --------------------------------------
# ✅ CONFIGURATIONS & LOADING MODELS
# --------------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ✅ Load MTCNN for Face Detection (WITH PADDING)
mtcnn = MTCNN(keep_all=True, device=device)

# ✅ Load Face Recognition Model (FaceNet)
facenet = InceptionResnetV1(pretrained="casia-webface").eval().to(device)

# ✅ Load Fine-tuned Emotion Detection Model (EfficientNet-B2)
emotion_classes = ["Anger", "Disgust", "Fear", "Happy", "Neutral", "Sad", "Surprise"]
model_save_path = "efficientnet_b2_emotion_model.pth"

def load_emotion_model(model_path):
    print("Loading emotion detection model...")
    model = models.efficientnet_b2(pretrained=False)
    model.features[0][0] = torch.nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False)
    model.classifier = torch.nn.Sequential(
        torch.nn.Dropout(p=0.4),  
        torch.nn.Linear(model.classifier[1].in_features, len(emotion_classes)),
    )
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    model.eval()
    print("✅ Emotion model loaded!")
    return model

emotion_model = load_emotion_model(model_save_path)

# ✅ Reference Face Directory
reference_folder = "./reference-face-frames-collect"

# ✅ Padding for face recognition (NOT for emotion detection)
padding_ratio = 0.3  

# ✅ Higher similarity threshold for better recognition
similarity_threshold = 0.6  

# ✅ Quit Button Configuration
quit_program = False
quit_button_coords = (10, 10, 100, 50)  # (x1, y1, x2, y2)

# --------------------------------------
# ✅ LOAD REFERENCE IMAGES & COMPUTE AVERAGE EMBEDDING
# --------------------------------------
def get_face_embedding(image):
    """Detects face and returns its embedding"""
    img_cropped = mtcnn(image)
    if img_cropped is None:
        return None
    if img_cropped.ndim == 3:
        img_cropped = img_cropped.unsqueeze(0)
    img_cropped = img_cropped.to(device)
    embedding = facenet(img_cropped).detach().cpu().numpy().flatten()
    return embedding if embedding.shape[0] == 512 else None

def load_reference_embeddings(reference_folder):
    """Loads reference images, extracts embeddings, and returns an averaged embedding"""
    embeddings = []
    for file in os.listdir(reference_folder):
        if file.lower().endswith((".jpg", ".jpeg", ".png")):  # Consider image files only
            image_path = os.path.join(reference_folder, file)
            image = Image.open(image_path)
            embedding = get_face_embedding(image)
            if embedding is not None:
                embeddings.append(embedding)

    if len(embeddings) == 0:
        return None
    
    # ✅ Compute the average embedding (for better consistency)
    avg_embedding = np.mean(embeddings, axis=0)
    return avg_embedding

# ✅ Load reference embeddings
reference_embedding = load_reference_embeddings(reference_folder)

if reference_embedding is None:
    print("❌ No valid reference faces found! Exiting...")
    exit()
else:
    print(f"✅ Loaded reference embeddings (Averaged).")

# --------------------------------------
# ✅ EMOTION DETECTION FUNCTION
# --------------------------------------
def preprocess_face_for_emotion(face_image):
    """Preprocesses face for emotion model (WITHOUT PADDING)"""
    transform = transforms.Compose([
        transforms.Resize((260, 260)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    pil_image = Image.fromarray(cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB))
    return transform(pil_image).unsqueeze(0).to(device)

def predict_emotion(face_image):
    """Predicts emotion from face image (without padding)"""
    processed_image = preprocess_face_for_emotion(face_image)
    with torch.no_grad():
        outputs = emotion_model(processed_image)
        probabilities = torch.softmax(outputs, dim=1)
        predicted_class_idx = torch.argmax(probabilities, dim=1).item()
        return emotion_classes[predicted_class_idx], probabilities[0, predicted_class_idx].item()

# --------------------------------------
# ✅ FACE RECOGNITION FUNCTION
# --------------------------------------
def recognize_face(face_image):
    """Checks if the detected face matches the reference face"""
    face_embedding = get_face_embedding(Image.fromarray(face_image))
    if face_embedding is None:
        return False, -1

    # ✅ Compute direct cosine similarity
    similarity = 1 - cosine(face_embedding, reference_embedding)
    
    return similarity > similarity_threshold, similarity

# ✅ Quit Button Click Handling
def check_quit_click(event, x, y, flags, param):
    global quit_program
    if event == cv2.EVENT_LBUTTONDOWN:
        if quit_button_coords[0] <= x <= quit_button_coords[2] and quit_button_coords[1] <= y <= quit_button_coords[3]:
            quit_program = True  # Set flag to exit program

# --------------------------------------
# ✅ LIVE WEBCAM DETECTION & DISPLAY (FACE + EMOTION)
# --------------------------------------
def run_live_recognition():
    global quit_program

    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("❌ Error: Cannot access webcam.")
        return

    cv2.namedWindow("Live Face Recognition & Emotion Detection")
    cv2.setMouseCallback("Live Face Recognition & Emotion Detection", check_quit_click)

    print("🚀 Starting real-time face recognition & emotion detection...")

    while not quit_program:
        ret, frame = cap.read()
        if not ret:
            print("❌ Error: Cannot read frame from webcam.")
            break

        frame_height, frame_width, _ = frame.shape
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        boxes, _ = mtcnn.detect(rgb_frame)

        if boxes is not None:
            for box in boxes:
                x1, y1, x2, y2 = [int(coord) for coord in box]

                # ✅ Add padding for better recognition (not for emotion detection)
                box_width = x2 - x1
                box_height = y2 - y1

                pad_w = int(box_width * padding_ratio)
                pad_h = int(box_height * padding_ratio)

                x1_pad = max(0, x1 - pad_w)
                y1_pad = max(0, y1 - pad_h)
                x2_pad = min(frame_width, x2 + pad_w)
                y2_pad = min(frame_height, y2 + pad_h)

                face_image = frame[y1_pad:y2_pad, x1_pad:x2_pad]  # Padded for recognition
                cropped_face = frame[y1:y2, x1:x2]  # Exact crop for emotion detection

                if cropped_face.shape[0] > 0 and cropped_face.shape[1] > 0:
                    is_match, similarity = recognize_face(face_image)

                    if is_match:
                        emotion, confidence = predict_emotion(cropped_face)
                        label = f"YOU - {emotion} ({confidence:.2f})"
                        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                        cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

        # Draw Quit Button
        cv2.rectangle(frame, quit_button_coords[:2], quit_button_coords[2:], (0, 0, 255), -1)
        cv2.putText(frame, "Quit", (quit_button_coords[0] + 10, quit_button_coords[1] + 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

        cv2.imshow("Live Face Recognition & Emotion Detection", frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            print("🛑 Quitting...")
            break

    cap.release()
    cv2.destroyAllWindows()

# ✅ Run the application
if __name__ == "__main__":
    run_live_recognition()


  from .autonotebook import tqdm as notebook_tqdm
  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(cached_file)


Loading emotion detection model...


  model.load_state_dict(torch.load(model_path, map_location=device))


✅ Emotion model loaded!
✅ Loaded reference embeddings (Averaged).
🚀 Starting real-time face recognition & emotion detection...
