In [1]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras import backend, layers, metrics
import os
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Model, Sequential
import tensorflow as tf
from tensorflow.keras.utils import plot_model
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
def get_encoder(input_shape):
    """ Returns the image encoding model """

    pretrained_model = Xception(
        input_shape=input_shape,
        weights='imagenet',
        include_top=False,
        pooling='avg',
    )

    for i in range(len(pretrained_model.layers)-27):
        pretrained_model.layers[i].trainable = False

    encode_model = Sequential([
        pretrained_model,
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.BatchNormalization(),
        layers.Dense(256, activation="relu"),
        layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1))
    ], name="Encode_Model")
    return encode_model

In [3]:
class DistanceLayer(layers.Layer):
    # A layer to compute ‖f(A) - f(P)‖² and ‖f(A) - f(N)‖²
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def call(self, anchor, positive, negative):
        ap_distance = tf.reduce_sum(tf.square(anchor - positive), -1)
        an_distance = tf.reduce_sum(tf.square(anchor - negative), -1)
        return (ap_distance, an_distance)


def get_siamese_network(input_shape = (128, 128, 3)):
    encoder = get_encoder(input_shape)

    # Input Layers for the images
    anchor_input   = layers.Input(input_shape, name="Anchor_Input")
    positive_input = layers.Input(input_shape, name="Positive_Input")
    negative_input = layers.Input(input_shape, name="Negative_Input")

    ## Generate the encodings (feature vectors) for the images
    encoded_a = encoder(anchor_input)
    encoded_p = encoder(positive_input)
    encoded_n = encoder(negative_input)

    # A layer to compute ‖f(A) - f(P)‖² and ‖f(A) - f(N)‖²
    distances = DistanceLayer()(
        encoder(anchor_input),
        encoder(positive_input),
        encoder(negative_input)
    )

    # Creating the Model
    siamese_network = Model(
        inputs  = [anchor_input, positive_input, negative_input],
        outputs = distances,
        name = "Siamese_Network"
    )
    return siamese_network

siamese_network = get_siamese_network()
siamese_network.summary()


Model: "Siamese_Network"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 Anchor_Input (InputLayer)   [(None, 128, 128, 3)]        0         []                            
                                                                                                  
 Positive_Input (InputLayer  [(None, 128, 128, 3)]        0         []                            
 )                                                                                                
                                                                                                  
 Negative_Input (InputLayer  [(None, 128, 128, 3)]        0         []                            
 )                                                                                                
                                                                                    

In [6]:
encoder = get_encoder((128, 128, 3))

encoder.load_weights("C:/project/deployment/encoder_celeb_custom.h5")

In [7]:
mp_face_detection = mp.solutions.face_detection
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)


In [8]:
cap = cv2.VideoCapture(0)

In [9]:
def get_face_embedding(face_image):
    if face_image is None or face_image.shape[0] == 0 or face_image.shape[1] == 0:
        return None

    # Resizing the face image to a standard size ( 128x128 pixels)
    face_image = cv2.resize(face_image, (128, 128))

    face_image =  cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
    # print(face_image.shape)
    # print(face_image.reshape(1,128,128,3).shape)
    face_image= preprocess_input(face_image.reshape(1,128,128,3))
    return encoder.predict(face_image)

In [11]:
dataset_dir = "C:/project/deployment/latest_cropped/"

# List to store face embeddings and corresponding labels
known_embeddings = []
labels = []

# Iterate through the dataset
for celebrity in os.listdir(dataset_dir):
    celebrity_dir = os.path.join(dataset_dir, celebrity)
    if not os.path.isdir(celebrity_dir):
        continue

    for image_file in os.listdir(celebrity_dir):
        if image_file.endswith(".jpg"):
            image_path = os.path.join(celebrity_dir, image_file)
            image = cv2.imread(image_path)

            # Get the face embedding for this image
            face_embedding = get_face_embedding(image)
            # print(face_embedding)
            # Append the embedding to the list along with the label
            known_embeddings.append(face_embedding)
            # print(known_embeddings)
            labels.append(celebrity)

# Convert the face embeddings and labels to NumPy arrays
known_embeddings = np.array(known_embeddings)
# print(known_embeddings.shape)
labels = np.array(labels)
# print(known_embeddings)
# print(labels)



WITH FRAME EXECUTION TIME

In [12]:
import time

# Function to recognize faces in real-time using webcam
def recognize_faces():
    cap = cv2.VideoCapture(0)  # Open the webcam (you might need to specify the camera index)

    mp_face_detection = mp.solutions.face_detection
    mp_drawing = mp.solutions.drawing_utils

    # Initialize a variable to store the total execution time
    total_execution_time = 0.0

    while True:
        ret, frame = cap.read()

        if not ret:
            break

        start_time = time.time()  # Record the start time for frame processing

        # Perform face detection using MediaPipe Face Detection
        with mp_face_detection.FaceDetection(min_detection_confidence=0.5) as face_detection:
            results = face_detection.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

            if results.detections:
                # Loop through detected faces
                for detection in results.detections:
                    box = detection.location_data.relative_bounding_box
                    rect_coords = np.array(
                        [
                            [
                                box.xmin - box.height * 0.2,
                                box.ymin - box.height * 0.5,
                            ],
                            [
                                box.xmin + box.width * 1.2,
                                box.ymin + box.height * 1.1,
                            ],
                        ]
                    )
                    rect_coords[:, 1] *= frame.shape[0]
                    rect_coords[:, 0] *= frame.shape[1]
                    rect_coords = rect_coords.astype(int)
                    face = frame[
                        max(rect_coords[0, 1], 0): rect_coords[1, 1],
                        max(rect_coords[0, 0], 0): rect_coords[1, 0],
                    ]

                    # Get the face embedding for the detected face
                    face_embedding = get_face_embedding(face)

                    if face_embedding is not None:
                        # Initialize variables to keep track of the best match
                        best_match_label = None
                        best_match_distance = float("inf")

                        # Compare face_embedding with known_embeddings
                        for i, anchor_embedding in enumerate(known_embeddings):
                            distance = np.linalg.norm(face_embedding - anchor_embedding)
                            if distance < 0.5:
                                best_match_label = labels[i]
                                best_match_distance = distance
                            # print(best_match_distance)
                        # Draw bounding box and label on the face
                        if best_match_label is not None:
                            color = (0, 255, 0)  # Green for known faces
                            cv2.putText(
                                frame,
                                best_match_label,
                                (rect_coords[0, 0], rect_coords[0, 1] - 10),
                                cv2.FONT_HERSHEY_SIMPLEX,
                                0.5,
                                color,
                                2,
                            )
                        else:
                            color = (0, 0, 255)  # Red for Unknown faces
                            cv2.putText(
                                frame,
                                "Unknown",
                                (rect_coords[0, 0], rect_coords[0, 1] - 10),
                                cv2.FONT_HERSHEY_SIMPLEX,
                                0.5,
                                color,
                                2,
                            )

                        # Draw bounding box
                        cv2.rectangle(
                            frame,
                            (rect_coords[0, 0], rect_coords[0, 1]),
                            (rect_coords[1, 0], rect_coords[1, 1]),
                            color,
                            2,
                        )

        # Record the end time for frame processing
        end_time = time.time()
        frame_execution_time = end_time - start_time
        total_execution_time += frame_execution_time

        # Print the execution time for the current frame
        print(f"Frame Execution Time: {frame_execution_time:.4f} seconds")

        # Display the frame with recognized faces
        cv2.imshow("Face Recognition", frame)

        # Press 'q' to exit the loop
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()

    # Print the total execution time after processing all frames
    print(f"Total Execution Time: {total_execution_time:.4f} seconds")

# Call the recognize_faces function to start real-time face recognition
recognize_faces()


Frame Execution Time: 0.0966 seconds
Frame Execution Time: 0.0867 seconds
Frame Execution Time: 0.0918 seconds
Frame Execution Time: 0.0894 seconds
Frame Execution Time: 0.0873 seconds
Frame Execution Time: 0.0876 seconds
Frame Execution Time: 0.0869 seconds
Frame Execution Time: 0.0854 seconds
Frame Execution Time: 0.0865 seconds
Frame Execution Time: 0.0903 seconds
Frame Execution Time: 0.1038 seconds
Frame Execution Time: 0.0913 seconds
Frame Execution Time: 0.0877 seconds
Frame Execution Time: 0.0875 seconds
Frame Execution Time: 0.0884 seconds
Frame Execution Time: 0.0913 seconds
Frame Execution Time: 0.0872 seconds
Frame Execution Time: 0.0842 seconds
Frame Execution Time: 0.0898 seconds
Frame Execution Time: 0.0880 seconds
Frame Execution Time: 0.0855 seconds
Frame Execution Time: 0.0873 seconds
Frame Execution Time: 0.0860 seconds
Frame Execution Time: 0.0859 seconds
Frame Execution Time: 0.0857 seconds
Frame Execution Time: 0.1029 seconds
Frame Execution Time: 0.0859 seconds
F