In [1]:
from facenet_pytorch import MTCNN, InceptionResnetV1,fixed_image_standardization, training, extract_face
import torch
from torch.utils.data import DataLoader,SequentialSampler
from torchvision import datasets,transforms
import numpy as np
import math
import pandas as pd
import os
from PIL import Image, ImageDraw

import matplotlib
matplotlib.use('Qt5Agg') # change backend enviroment so plot is working 
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
workers = 0 if os.name == 'nt' else 4

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device,
    keep_all=True, # by setting true, retured img_cropped dimension will be batch_size x identified_people_num x chanels x pixel_x x pixel_y
)


resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)
resnet.classify = False # If set to be true, the final fully connected layer will be aciviated, the output dimension is depending on the # of classes that used for the trainning. Adding more people to the trainning will change this number


img = Image.open('C:/Users/initi/OneDrive/Documents/GitHub/facenet-pytorch/data/test_images/test/sombody.jpg')  # Replace with the path to your image file
img_cropped = mtcnn(img, save_path="C:/Users/initi/OneDrive/Documents/GitHub/facenet-pytorch/data/test_images/test/sombody_cropped.jpg")

# box dim : number_of_people x 4 point coordinates (x_min, y_min, x_max, y_max)
# points dim : number_of_people x 5 boxes x 4 point coordinates (x_min, y_min, x_max, y_max)
boxes, probs, points = mtcnn.detect(img, landmarks=True)
num_of_people = boxes.shape[0]
print(str(num_of_people)+' of people has been identified' )

img_draw = img.copy()
draw = ImageDraw.Draw(img_draw)
for i, (box, point) in enumerate(zip(boxes, points)):
    draw.rectangle(box.tolist(), width=5) # draw the rectangle of the entire face
    for p in point:
        draw.rectangle((p - 10).tolist() + (p + 10).tolist(), width=0) # draw the rectangle of eyes, nose and mouth corners
        extract_face(img, box, save_path='detected_face_{}.png'.format(i))
        img_draw.save('annotated_faces.png')

img_probs  = []
for i in range(num_of_people):
    img_crop_single = img_cropped[i,...].unsqueeze(0)# Add batch dimension: [channels, height, width] -> [1, channels, height, width]
    img_crop_single = img_crop_single.to(device)
    img_single_probs = resnet(img_crop_single)
    img_single_probs_cpu = img_single_probs.cpu()
    img_single_probs_cpu = img_single_probs_cpu.detach().numpy()
    img_probs.append(img_single_probs_cpu)

# plt.ion()
# plt.plot(img_probs[0])
# plt.show()

Running on device: cuda:0


  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(state_dict_path)
  state_dict = torch.load(cached_file)


4 of people has been identified


In [3]:
img_probs[0].shape

(1, 512)

In [4]:
data_dir = r'C:\Users\initi\OneDrive\Documents\GitHub\facenet-pytorch\data\lfw\lfw'
pairs_path = r'C:\Users\initi\OneDrive\Documents\GitHub\facenet-pytorch\data\lfw\lfwpairs.txt'

In [5]:
# %%
# 为减少GPU内存使用，删除mtcnn
del mtcnn
torch.cuda.empty_cache()

In [6]:
batch_size = 16
epochs = 15

trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])
dataset = datasets.ImageFolder(data_dir + '_cropped', transform=trans)
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()} # build dict for index:class(people) pairs

# %%
# 从MTCNN裁剪的图像输出创建数据集和数据加载器

embed_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SequentialSampler(dataset)
)

In [8]:
# %%
# 加载预训练的Resnet模型
resnet = InceptionResnetV1(
    classify=False,
    pretrained='vggface2'
).to(device)

classes = []
embeddings = []
resnet.eval()
with torch.no_grad():
    for xb, yb in embed_loader:
        xb = xb.to(device)
        b_embeddings = resnet(xb)
        b_embeddings = b_embeddings.to('cpu').numpy()
        classes.extend(yb.numpy())
        embeddings.extend(b_embeddings)

# Assuming `embeddings` is a numpy array and `labels` contains the corresponding labels
np.save("embeddings.npy", embeddings)
np.save("classes.npy", classes)

In [8]:
def distance(embeddings1, embeddings2, distance_metric=0):
    if distance_metric==0:
        # Euclidian distance
        diff = np.subtract(embeddings1, embeddings2)
        dist = np.sum(np.square(diff),1)
    elif distance_metric==1:
        # 基于余弦相似度的距离
        dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1)
        norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1)
        similarity = dot / norm
        dist = np.arccos(similarity) / math.pi
    else:
        raise 'Undefined distance metric %d' % distance_metric

    return dist

In [None]:
threshold =  1.16 # this is referecn number from lfw_evaluate.py line 201. 

In [14]:
match_people= []
for j in range(num_of_people): 
    match_class = 0
    dist_min = float('inf')
    img_single_probs = img_probs[j]
    for i, train_embed in enumerate(embeddings):
        #train_embed = train_embed.numpy()
        dist = distance(train_embed, img_single_probs)
        if dist < dist_min:
            dist_min = dist
            match_class = classes[i]
    if dist_min <= threshold :       
        match_people.append(dataset.idx_to_class[match_class]) 
    else:
        match_people.append("Unknown")
    print("min dist is "+ str(dist_min))    


min dist is [0.3664642]
min dist is [0.30911055]
min dist is [0.5099769]
min dist is [0.72873294]


In [15]:
print(match_people)

['LeBron_James', 'Michael_Jordan', 'Unknown', 'Unknown']
