In [22]:
from facenet_pytorch import MTCNN, InceptionResnetV1,fixed_image_standardization, training, extract_face
import torch
from torch.utils.data import DataLoader,SequentialSampler
from torchvision import datasets,transforms
import numpy as np
import math
import pandas as pd
import os
from PIL import Image, ImageDraw,ImageFont

import matplotlib
matplotlib.use('Qt5Agg') # change backend enviroment so plot is working 
import matplotlib.pyplot as plt

In [23]:
def distance(embeddings1, embeddings2, distance_metric=0):
    if distance_metric==0:
        # Euclidian distance
        diff = np.subtract(embeddings1, embeddings2)
        dist = np.sum(np.square(diff),1)
    elif distance_metric==1:
        # 基于余弦相似度的距离
        dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1)
        norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1)
        similarity = dot / norm
        dist = np.arccos(similarity) / math.pi
    else:
        raise 'Undefined distance metric %d' % distance_metric

    return dist

In [None]:
# USE MTCNN TO EXTRACT FACES AND RETURN FOLLOWING FFEATURES
# -- BONDING BOXES OF EACH PEOPLE'S FACE, AND ITS DIMENSION AND LOCATIONS
# -- CONFIDENCE LEVEL THAT THE BONDING BOX IS CORRESPONDING TO A FACE (PROBS)

workers = 0 if os.name == 'nt' else 4

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

# Extract the embeddings from people's face only
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device,
    keep_all=True, # by setting true, retured img_cropped dimension will be batch_size x identified_people_num x chanels x pixel_x x pixel_y
)

img = Image.open('C:/Users/initi/OneDrive/Documents/GitHub/facenet-pytorch/data/test_images/test/sombody.jpg')  # Replace with the path to your image file
img_cropped = mtcnn(img, save_path="C:/Users/initi/OneDrive/Documents/GitHub/facenet-pytorch/data/test_images/test/sombody_cropped.jpg")

# box dim : number_of_people x 4 point coordinates (x_min, y_min, x_max, y_max)
# points dim : number_of_people x 5 boxes x 4 point coordinates (x_min, y_min, x_max, y_max)
boxes, probs, points = mtcnn.detect(img, landmarks=True)
num_of_people = boxes.shape[0]
print(str(num_of_people)+' of people has been identified' )

# COPY OF ORIGINAL IMAGE FOR LATER NAME LABELING USE
img_draw = img.copy()
draw = ImageDraw.Draw(img_draw)

# %%
# 为减少GPU内存使用，删除mtcnn
del mtcnn
torch.cuda.empty_cache()

Running on device: cuda:0
6 of people has been identified


In [None]:
data_dir = r'C:\Users\initi\OneDrive\Documents\GitHub\facenet-pytorch\data\lfw\lfw'
pairs_path = r'C:\Users\initi\OneDrive\Documents\GitHub\facenet-pytorch\data\lfw\lfwpairs.txt'

trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])
dataset = datasets.ImageFolder(data_dir + '_cropped', transform=trans)
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()} # build dict for index:class(people) pairs

resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)
resnet.classify = False # If set to be true, the final fully connected layer will be aciviated, the output dimension is depending on the # of classes that used for the trainning. Adding more people to the trainning will change this number

threshold = 1.16
embeddings = np.load("embeddings.npy") # EMBEDDINGS FROM ALL TRAINNING DATA, CAN BE OBTAINED FROM FACE_RECOGNATION.IPYNB
classes = np.load("classes.npy") # CLASSES FROM ALL TRAINNING DATA, CAN BE OBTAINED FROM FACE_RECOGNATION.IPYNB

# Optionally, specify a font
# Replace "arial.ttf" with a path to a valid font file on your system
try:
    font = ImageFont.truetype("arial.ttf", size=20)
except IOError:
    font = ImageFont.load_default()  # Fallback to default font
    

In [27]:
# ITERATE EACH FACES IN BONDING BOXES, EXTRACT EMBEDDINGS AND COMPARE WITH SAVED EMBEDDINGS FROM TRAINNING DATA

img_probs  = [] # store the embedding vectors for each detected people
match_people_list= []
for i, (box, point) in enumerate(zip(boxes, points)):
    draw.rectangle(box.tolist(), width=5) # draw the rectangle of the entire face
    for p in point:
        draw.rectangle((p - 10).tolist() + (p + 10).tolist(), width=0) # draw the rectangle of eyes, nose and mouth corners
        extract_face(img, box, save_path='detected_face_{}.png'.format(i))
        #img_draw.save('annotated_faces.png')
    img_crop_single = img_cropped[i,...].unsqueeze(0)# Add batch dimension: [channels, height, width] -> [1, channels, height, width]
    img_crop_single = img_crop_single.to(device)
    img_single_probs = resnet(img_crop_single)
    img_single_probs_cpu = img_single_probs.cpu()
    img_single_probs_cpu = img_single_probs_cpu.detach().numpy()
    img_probs.append(img_single_probs_cpu)

    match_class = 0
    dist_min = float('inf')
    #img_single_probs = img_probs[i]
   
    match_people = []

    for j, train_embed in enumerate(embeddings):
        #train_embed = train_embed.numpy()
        dist = distance(train_embed, img_single_probs_cpu)
        if dist < dist_min:
            dist_min = dist
            match_class = classes[j]
    if dist_min <= threshold:
        match_people =  dataset.idx_to_class[match_class]      
        match_people_list.append(match_people) 
    else:
        match_people =  "Unknown"      
        match_people_list.append(match_people)
    text_bbox = draw.textbbox((box[0], box[1]), match_people, font=font)
    text_width = text_bbox[2] - text_bbox[0]
    text_height = text_bbox[3] - text_bbox[1]
    # Position the text above the rectangle (or below if there's not enough space)
    w = box[2]-box[0]
    h = box[3]-box[1]
    text_x = box[0]
    text_y = box[1] - text_height - 5  # Place text 5 pixels above the rectang
    if text_y < 0:  # If text goes below the image, place it above the rectangle
        text_y = box[1] + h + 5
    print("detected people is "+ match_people + ", min dist is "+ str(dist_min)) 
    # Add a filled rectangle as a background for text (optional, for better visibility)
    draw.rectangle([text_x, text_y, text_x + text_width, text_y + text_height], fill="black")
    # Add text
    draw.text((text_x, text_y), match_people, fill="white", font=font)
    img_draw.save('annotated_faces.png')

detected people is Tom_Hanks, min dist is [0.3645305]
detected people is Jack_Nicholson, min dist is [0.6623132]
detected people is Harrison_Ford, min dist is [0.30935055]
detected people is Rob_Lowe, min dist is [0.7386179]
detected people is Tom_Cruise, min dist is [0.2907584]
detected people is Edward_Norton, min dist is [0.38294858]
