In [2]:
from deepface.commons import image_utils
from deepface.modules import modeling, detection, preprocessing
from deepface.models.FacialRecognition import FacialRecognition
from deepface.models.Detector import DetectedFace, FacialAreaRegion
from deepface.detectors import DetectorWrapper
from deepface.detectors import MtCnn
from deepface.basemodels import ArcFace
from deepface.commons import image_utils

import numpy as np
import time
from typing import Any, Dict, Optional, Union, List, Tuple

import numpy as np
import cv2
from PIL import Image

face_recog_model = 'ArcFace'
face_detect_model = 'mtcnn'

In [3]:
face_detector_client = MtCnn.MtCnnClient()

In [4]:
face_recognition_client = ArcFace.ArcFaceClient()

In [5]:
# face detection 

def align_face(
    img: np.ndarray,
    left_eye: Union[list, tuple],
    right_eye: Union[list, tuple],
) -> Tuple[np.ndarray, float]:
    """
    Align a given image horizantally with respect to their left and right eye locations
    Args:
        img (np.ndarray): pre-loaded image with detected face
        left_eye (list or tuple): coordinates of left eye with respect to the person itself
        right_eye(list or tuple): coordinates of right eye with respect to the person itself
    Returns:
        img (np.ndarray): aligned facial image
    """
    # if eye could not be detected for the given image, return image itself
    if left_eye is None or right_eye is None:
        return img, 0

    # sometimes unexpectedly detected images come with nil dimensions
    if img.shape[0] == 0 or img.shape[1] == 0:
        return img, 0

    angle = float(np.degrees(np.arctan2(left_eye[1] - right_eye[1], left_eye[0] - right_eye[0])))
    img = np.array(Image.fromarray(img).rotate(angle))
    return img, angle

def rotate_facial_area(
    facial_area: Tuple[int, int, int, int], angle: float, size: Tuple[int, int]
) -> Tuple[int, int, int, int]:
    """
    Rotate the facial area around its center.
    Inspried from the work of @UmutDeniz26 - github.com/serengil/retinaface/pull/80

    Args:
        facial_area (tuple of int): Representing the (x1, y1, x2, y2) of the facial area.
            x2 is equal to x1 + w1, and y2 is equal to y1 + h1
        angle (float): Angle of rotation in degrees. Its sign determines the direction of rotation.
                       Note that angles > 360 degrees are normalized to the range [0, 360).
        size (tuple of int): Tuple representing the size of the image (width, height).

    Returns:
        rotated_coordinates (tuple of int): Representing the new coordinates
            (x1, y1, x2, y2) or (x1, y1, x1+w1, y1+h1) of the rotated facial area.
    """

    # Normalize the witdh of the angle so we don't have to
    # worry about rotations greater than 360 degrees.
    # We workaround the quirky behavior of the modulo operator
    # for negative angle values.
    direction = 1 if angle >= 0 else -1
    angle = abs(angle) % 360
    if angle == 0:
        return facial_area

    # Angle in radians
    angle = angle * np.pi / 180

    height, weight = size

    # Translate the facial area to the center of the image
    x = (facial_area[0] + facial_area[2]) / 2 - weight / 2
    y = (facial_area[1] + facial_area[3]) / 2 - height / 2

    # Rotate the facial area
    x_new = x * np.cos(angle) + y * direction * np.sin(angle)
    y_new = -x * direction * np.sin(angle) + y * np.cos(angle)

    # Translate the facial area back to the original position
    x_new = x_new + weight / 2
    y_new = y_new + height / 2

    # Calculate projected coordinates after alignment
    x1 = x_new - (facial_area[2] - facial_area[0]) / 2
    y1 = y_new - (facial_area[3] - facial_area[1]) / 2
    x2 = x_new + (facial_area[2] - facial_area[0]) / 2
    y2 = y_new + (facial_area[3] - facial_area[1]) / 2

    # validate projected coordinates are in image's boundaries
    x1 = max(int(x1), 0)
    y1 = max(int(y1), 0)
    x2 = min(int(x2), weight)
    y2 = min(int(y2), height)

    return (x1, y1, x2, y2)

def detect_faces(img: np.ndarray, align: bool = True, expand_percentage: int = 0) -> List[DetectedFace]:
    """
    Detect face(s) from a given image
    Args:
        img (np.ndarray): pre-loaded image

        align (bool): enable or disable alignment after detection

        expand_percentage (int): expand detected facial area with a percentage (default is 0).

    Returns:
        results (List[DetectedFace]): A list of DetectedFace objects
            where each object contains:

        - img (np.ndarray): The detected face as a NumPy array.

        - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h,
            left_eye and right eye. left eye and right eye are eyes on the left and right
            with respect to the person instead of observer.

        - confidence (float): The confidence score associated with the detected face.
    """
    # validate expand percentage score
    if expand_percentage < 0:
        logger.warn(
            f"Expand percentage cannot be negative but you set it to {expand_percentage}."
            "Overwritten it to 0."
        )
        expand_percentage = 0

    # find facial areas of given image
    facial_areas = face_detector_client.detect_faces(img)

    results = []
    for facial_area in facial_areas:
        x = facial_area.x
        y = facial_area.y
        w = facial_area.w
        h = facial_area.h
        left_eye = facial_area.left_eye
        right_eye = facial_area.right_eye
        confidence = facial_area.confidence

        if expand_percentage > 0:
            # Expand the facial region height and width by the provided percentage
            # ensuring that the expanded region stays within img.shape limits
            expanded_w = w + int(w * expand_percentage / 100)
            expanded_h = h + int(h * expand_percentage / 100)

            x = max(0, x - int((expanded_w - w) / 2))
            y = max(0, y - int((expanded_h - h) / 2))
            w = min(img.shape[1] - x, expanded_w)
            h = min(img.shape[0] - y, expanded_h)

        # extract detected face unaligned
        detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]

        # align original image, then find projection of detected face area after alignment
        if align is True:  # and left_eye is not None and right_eye is not None:
            aligned_img, angle = align_face(
                img=img, left_eye=left_eye, right_eye=right_eye
            )
            rotated_x1, rotated_y1, rotated_x2, rotated_y2 = rotate_facial_area(
                facial_area=(x, y, x + w, y + h), angle=angle, size=(img.shape[0], img.shape[1])
            )
            detected_face = aligned_img[
                int(rotated_y1) : int(rotated_y2), int(rotated_x1) : int(rotated_x2)
            ]

        result = DetectedFace(
            img=detected_face,
            facial_area=FacialAreaRegion(
                x=x, y=y, h=h, w=w, confidence=confidence, left_eye=left_eye, right_eye=right_eye
            ),
            confidence=confidence,
        )
        results.append(result)
    return results

def extract_faces(
    img_path: Union[str, np.ndarray],
    enforce_detection: bool = True,
    align: bool = True,
    expand_percentage: int = 0,
    grayscale: bool = False,
) -> List[Dict[str, Any]]:
    
    resp_objs = []

    # img might be path, base64 or numpy array. Convert it to numpy whatever it is.
    img, img_name = image_utils.load_image(img_path)

    if img is None:
        raise ValueError(f"Exception while loading {img_name}")

    base_region = FacialAreaRegion(x=0, y=0, w=img.shape[1], h=img.shape[0], confidence=0)

    face_objs = detect_faces(
        img=img,
        align=align,
        expand_percentage=expand_percentage,
    )
    

    # in case of no face found
    if len(face_objs) == 0 and enforce_detection is True:
        if img_name is not None:
            raise ValueError(
                f"Face could not be detected in {img_name}."
                "Please confirm that the picture is a face photo "
                "or consider to set enforce_detection param to False."
            )
        else:
            raise ValueError(
                "Face could not be detected. Please confirm that the picture is a face photo "
                "or consider to set enforce_detection param to False."
            )

    if len(face_objs) == 0 and enforce_detection is False:
        face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)]

    for face_obj in face_objs:
        current_img = face_obj.img
        current_region = face_obj.facial_area

        if current_img.shape[0] == 0 or current_img.shape[1] == 0:
            continue

        if grayscale is True:
            current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)

        current_img = current_img / 255  # normalize input in [0, 1]

        resp_objs.append(
            {
                "face": current_img[:, :, ::-1],
                "facial_area": {
                    "x": int(current_region.x),
                    "y": int(current_region.y),
                    "w": int(current_region.w),
                    "h": int(current_region.h),
                    "left_eye": current_region.left_eye,
                    "right_eye": current_region.right_eye,
                },
                "confidence": round(current_region.confidence, 2),
            }
        )

    if len(resp_objs) == 0 and enforce_detection == True:
        raise ValueError(
            f"Exception while extracting faces from {img_name}."
            "Consider to set enforce_detection arg to False."
        )

    return resp_objs


In [8]:
# face feature extraction

def extract_faces_and_embeddings(
    img_path: Union[str, np.ndarray],
    enforce_detection: bool = True,
    align: bool = True,
    expand_percentage: int = 0,
    normalization: str = "base",
) -> Tuple[List[List[float]], List[dict]]:
    """
    Extract facial areas and find corresponding embeddings for given image
    Returns:
        embeddings (List[float])
        facial areas (List[dict])
    """
    embeddings = []
    facial_areas = []

    # detect faces
    img_objs = extract_faces(
        img_path=img_path,
        grayscale=False,
        enforce_detection=enforce_detection,
        align=align,
        expand_percentage=expand_percentage,
    )
    
    
    # find embeddings for each face
    for img_obj in img_objs:        
        img = img_obj["face"]

        # rgb to bgr
        img = img[:, :, ::-1]


        # we have run pre-process in verification. so, this can be skipped if it is coming from verify.
        target_size = face_recognition_client.input_shape
        # resize to expected shape of ml model
        img = preprocessing.resize_image(
            img=img,
            # thanks to DeepId (!)
            target_size=(target_size[1], target_size[0]),
        )

        # custom normalization
        img = preprocessing.normalize_input(img=img, normalization=normalization)

        embedding = face_recognition_client.forward(img)
        embeddings.append(embedding)
        facial_areas.append(img_obj["facial_area"])

    return embeddings, facial_areas

In [20]:
def find_cosine_distance(
    source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list]
) -> np.float64:
    """
    Find cosine distance between two given vectors
    Args:
        source_representation (np.ndarray or list): 1st vector
        test_representation (np.ndarray or list): 2nd vector
    Returns
        distance (np.float64): calculated cosine distance
    """
    if isinstance(source_representation, list):
        source_representation = np.array(source_representation)

    if isinstance(test_representation, list):
        test_representation = np.array(test_representation)

    a = np.matmul(np.transpose(source_representation), test_representation)
    b = np.sum(np.multiply(source_representation, source_representation))
    c = np.sum(np.multiply(test_representation, test_representation))
    return 1 - (a / (np.sqrt(b) * np.sqrt(c)))

In [35]:
tic = time.time()
threshold = 0.6 # threshold from Arcface consine distance model recommendation
test_set = [
    ["testset/zxl_1.jpg", "testset/zxl_2.jpg", True],
    ["testset/zxl_1.jpg", "testset/zt_1.jpg", False],
    ["testset/jyz_1.jpg", "testset/jyz_2.jpg", True],
    ["testset/zxl_1.jpg", "testset/jyz_1.jpg", False],
    ["testset/wj_1.jpg", "testset/wj_2.jpg", True],
    ["testset/wj_1.jpg", "testset/wj_3.jpg", True],
    ["testset/lsm_1.jpg", "testset/lsm_2.jpg", True],
    ["testset/lsm_1.jpg", "testset/zxl_2.jpg", False],
    ["testset/zxl_1.jpg", "testset/wj_1.jpg", False],
]

successful_tests = 0
unsuccessful_tests = 0
for pair in test_set:
    img1_embeddings, img1_facial_areas = extract_faces_and_embeddings(pair[0])
    img2_embeddings, img2_facial_areas = extract_faces_and_embeddings(pair[1])
    ture_label = pair[2]
    distance = find_cosine_distance(img1_embeddings[0], img2_embeddings[0])
    label = (distance <= threshold)
    if label == ture_label:
        test_result_label = "✅"
        successful_tests += 1
    else:
        test_result_label = "❌"
        unsuccessful_tests += 1

    if label:
        classified_label = "same person"
    else:
        classified_label = "different persons"

    img1_alias = pair[0].split("/", maxsplit=1)[-1]
    img2_alias = pair[1].split("/", maxsplit=1)[-1]

    print(f"{test_result_label} Pair {img1_alias}-{img2_alias} is {classified_label}, Distance: {distance}",)
toc = time.time()

coverage_score = (100 * successful_tests) / (successful_tests + unsuccessful_tests)
print(f'coverage_score {coverage_score}')

✅ Pair zxl_1.jpg-zxl_2.jpg is same person, Distance: 0.3385937740158439
✅ Pair zxl_1.jpg-zt_1.jpg is different persons, Distance: 0.6192383744209936
✅ Pair jyz_1.jpg-jyz_2.jpg is same person, Distance: 0.28493035294999614
✅ Pair zxl_1.jpg-jyz_1.jpg is different persons, Distance: 0.7327641113834089
✅ Pair wj_1.jpg-wj_2.jpg is same person, Distance: 0.40727009046274076
✅ Pair wj_1.jpg-wj_3.jpg is same person, Distance: 0.24609952886567377
✅ Pair lsm_1.jpg-lsm_2.jpg is same person, Distance: 0.5068068690820959
✅ Pair lsm_1.jpg-zxl_2.jpg is different persons, Distance: 1.0429172228115027
✅ Pair zxl_1.jpg-wj_1.jpg is different persons, Distance: 0.7808340290531035
coverage_score 100.0


In [50]:
import os
test_path = 'testset/'
files = os.listdir(test_path)
embeds = []
for file in files:
    img_path = f'{test_path}{file}'
    img_embeddings, img_facial_areas = extract_faces_and_embeddings(img_path)
    embeds.append(img_embeddings[0])

In [77]:
num_correct = 0
num_error = 0
for i in range(len(embeds)):
    for j in range(len(embeds)):
        if i == j:
            continue
        distance = find_cosine_distance(embeds[i], embeds[j])
        label = (distance <= threshold)
        true_label = (files[i].split('_')[0] == files[j].split('_')[0])
        if label == true_label:
            test_result_label = "✅"
            num_correct += 1
        else:
            test_result_label = "❌"
            num_error += 1

        if label:
            classified_label = "same person"
        else:
            classified_label = "different persons"

        if true_label:
            print(f"{test_result_label} Pair {files[i]}-{files[j]} is {classified_label}, Distance: {distance:.2f}")

✅ Pair zxl_1.jpg-zxl_2.jpg is same person, Distance: 0.34
✅ Pair zxl_2.jpg-zxl_1.jpg is same person, Distance: 0.34
✅ Pair zt_1.jpg-zt_2.jpg is same person, Distance: 0.43
✅ Pair zt_2.jpg-zt_1.jpg is same person, Distance: 0.43
✅ Pair wj_1.jpg-wj_2.jpg is same person, Distance: 0.41
✅ Pair wj_1.jpg-wj_3.jpg is same person, Distance: 0.25
✅ Pair wj_2.jpg-wj_1.jpg is same person, Distance: 0.41
✅ Pair wj_2.jpg-wj_3.jpg is same person, Distance: 0.35
✅ Pair wj_3.jpg-wj_1.jpg is same person, Distance: 0.25
✅ Pair wj_3.jpg-wj_2.jpg is same person, Distance: 0.35
✅ Pair lsm_1.jpg-lsm_2.jpg is same person, Distance: 0.51
✅ Pair lsm_2.jpg-lsm_1.jpg is same person, Distance: 0.51
✅ Pair jyz_1.jpg-jyz_2.jpg is same person, Distance: 0.28
✅ Pair jyz_2.jpg-jyz_1.jpg is same person, Distance: 0.28
✅ Pair ax_2.jpg-ax_1.jpg is same person, Distance: 0.33
✅ Pair ax_1.jpg-ax_2.jpg is same person, Distance: 0.33
✅ Pair cjx_3.jpg-cjx_1.jpg is same person, Distance: 0.35
✅ Pair cjx_3.jpg-cjx_2.jpg is same

In [70]:
distance = find_cosine_distance(embeds[0], embeds[1])
label = (distance <= threshold)
true_label = (files[0].split('_')[0] == files[1].split('_')[0])

In [71]:
distance, label, true_label

(0.3385937740158439, True, True)

In [76]:
num_error

514