In [5]:
import numpy as np
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from PIL import Image, ImageDraw
from sklearn.cluster import DBSCAN

In [12]:
# Initialize MTCNN for face detection
mtcnn = MTCNN()

# Load an image containing faces
img = Image.open('data/images/img_1.jpg')

# Detect faces in the image
boxes, _ = mtcnn.detect(img)

# If faces are detected, 'boxes' will contain the bounding box coordinates
if boxes is not None:
    for box in boxes:
        # Draw bounding boxes on the image
        draw = ImageDraw.Draw(img)
        draw.rectangle(box.tolist(), outline='red', width=3)

# Display or save the image with detected faces
img.show()

In [26]:
# Initialize MTCNN and ResNet models
mtcnn = MTCNN()  # MTCNN is used for detecting and aligning faces in images
resnet = InceptionResnetV1(pretrained='casia-webface').eval()  # Pre-trained Inception ResNet model for face embeddings

# Load an image (replace 'your_image.jpg' with the actual image path)
img = Image.open('data/images/img_1.jpg')  # Open the image using PIL

# Preprocess the image and extract embeddings
aligned = mtcnn(img)  # Align the detected face(s) in the image

if aligned is not None:  # Check if any face was detected and aligned
    print(aligned.shape)  # Print the shape of the aligned face tensor

    # Add batch dimension to the aligned image (necessary for model input)
    aligned = aligned.unsqueeze(0)  # Reshape to [1, 3, 160, 160]

    # Pass the preprocessed image to the ResNet model to get face embeddings
    embeddings = resnet(aligned).detach()  # Detach the tensor from the computation graph

    # 'embeddings' now contains the feature vector for the detected face
    print(embeddings, embeddings.shape)  # Print the embeddings and their shape for reference
else:
    # If no face is detected in the image, print a message
    print("No face detected in the image.")

torch.Size([3, 160, 160])
tensor([[ 1.9026e-02, -6.0987e-02,  7.1703e-03, -4.8106e-02,  3.0901e-02,
         -5.6444e-02,  7.2428e-02,  8.5632e-02,  1.3193e-02, -7.1483e-02,
          4.8667e-02, -7.1583e-02, -6.2817e-03,  5.6666e-02, -5.6190e-02,
          8.5444e-02,  7.4032e-02, -9.9748e-02,  2.7052e-02,  4.9049e-02,
         -2.8160e-02, -1.7794e-02,  1.4784e-03,  1.2196e-02, -3.4382e-02,
         -1.2122e-02,  3.4515e-02,  6.4010e-03,  9.1089e-02, -6.9073e-02,
          1.4593e-02,  2.5911e-02,  2.3787e-02,  2.4444e-03,  1.0286e-02,
         -1.4961e-02, -3.1552e-04,  2.0226e-02, -5.9818e-02,  2.9698e-02,
          4.3853e-02, -8.2309e-03, -1.0467e-02, -5.4482e-02, -9.5878e-03,
         -4.7281e-02,  2.0789e-02,  8.9954e-02, -8.3861e-02,  1.6708e-02,
         -3.6441e-02, -1.1991e-02, -5.7313e-02,  2.9761e-02,  7.8992e-03,
         -3.6963e-02,  1.1660e-02, -5.4706e-02,  4.6702e-02,  3.7148e-03,
          6.3400e-04, -9.1871e-02,  2.5330e-03, -6.9191e-03,  3.6635e-02,
         -5.

In [15]:
# Initialize MTCNN (Multi-task Cascaded Convolutional Networks) for face detection.
# MTCNN is used to detect and align faces in images.
mtcnn = MTCNN()

# Load a pre-trained Inception ResNet model trained on the VGGFace2 dataset.
# This model extracts feature embeddings (numerical representations) for detected faces.
resnet = InceptionResnetV1(pretrained='vggface2').eval()

# Load two face images to be verified (replace with actual image paths if needed).
# These images will be compared to determine similarity.
img1 = Image.open('data/images/img_2.jpg')
img2 = Image.open('data/images/img_4.jpg')

# Detect faces in each image using MTCNN.
# `mtcnn.detect` returns face bounding boxes and confidence scores (unused here).
faces1, _ = mtcnn.detect(img1)
faces2, _ = mtcnn.detect(img2)

# Proceed only if faces are detected in both images.
if faces1 is not None and faces2 is not None:
    # Align and preprocess the detected faces for feature embedding extraction.
    # The `mtcnn` call aligns the face to the required input format for the model.
    aligned1 = mtcnn(img1)
    aligned2 = mtcnn(img2)

    # Extract feature embeddings for the aligned faces using the Inception ResNet model.
    # Add a batch dimension using `.unsqueeze(0)` to process a single image.
    # `.detach()` removes the computation graph to treat the result as raw data.
    embeddings1 = resnet(aligned1.unsqueeze(0)).detach()
    embeddings2 = resnet(aligned2.unsqueeze(0)).detach()

    # Compute the Euclidean distance between the two embeddings.
    # A smaller distance indicates higher similarity between the faces.
    distance = torch.dist(embeddings1, embeddings2)
    print(f"Distance between embeddings: {distance:.4f}")

    # Compare the distance with a threshold (e.g., 1.0) to determine similarity.
    # If the distance is below the threshold, the faces are considered similar.
    if distance < 1.0:
        print("The two images are similar.")
    else:
        print("The two images are different.")
else:
    # If no face is detected in one or both images, print an appropriate message.
    print("Face(s) not detected in one or both images.")


Distance between embeddings: 0.8801
The two images are similar.


In [12]:
# Initialize MTCNN (Multi-task Cascaded Convolutional Networks) for face detection.
# This is used to detect faces in the given images.
mtcnn = MTCNN()

# Load a pre-trained Inception ResNet model trained on VGGFace2 dataset.
# This model generates feature embeddings for detected faces.
resnet = InceptionResnetV1(pretrained='vggface2').eval()

# List of image paths where each image contains one or more faces to be processed.
img_paths = ['data/images/img_1.jpg', 'data/images/img_2.jpg', 'data/images/img_3.jpg', 'data/images/img_4.jpg',
             'data/images/img_5.jpg']

# Initialize an empty list to store face embeddings for all detected faces.
embeddings = []

# Loop through each image file in the list.
for img_path in img_paths:
    # Open the image using PIL (Python Imaging Library).
    img = Image.open(img_path)

    # Detect faces in the image using MTCNN and return face bounding boxes.
    faces, _ = mtcnn.detect(img)

    # Proceed only if faces are detected in the image.
    if faces is not None:
        # Align and preprocess the detected faces for the Inception ResNet model.
        aligned = mtcnn(img)

        # Compute the face embeddings (feature vectors) for the aligned face(s).
        # Detach the computation graph to save only the embeddings as raw data.
        face_embeddings = resnet(aligned.unsqueeze(0)).detach()

        # Convert the embeddings to a NumPy array and add it to the embeddings list.
        embeddings.append(face_embeddings.numpy())

# Combine all individual face embeddings into a single NumPy array.
embeddings = np.vstack(embeddings)

# Perform clustering on the embeddings using DBSCAN (Density-Based Spatial Clustering of Applications with Noise).
# 'eps' is the maximum distance between two samples to be considered as in the same neighborhood.
# 'min_samples' is the minimum number of samples in a neighborhood required to form a cluster.
clustering = DBSCAN(eps=0.5, min_samples=2).fit(embeddings)


In [13]:
clustering.labels_

array([ 0, -1,  0, -1, -1])