In [1]:
import dlib
import numpy as np
import os, random
import numpy as np
from scipy.spatial.distance import euclidean, cosine, cityblock
from sklearn.preprocessing import normalize
from itertools import combinations
from PIL import Image, ImageDraw
import cv2
from IPython.display import Image, display

In [49]:
# Load the pre-trained face recognition model from dlib
detector = dlib.get_frontal_face_detector()
sp = dlib.shape_predictor("models\shape_predictor_68_face_landmarks.dat")
facerec = dlib.face_recognition_model_v1("models\dlib_face_recognition_resnet_model_v1.dat")
face_cascade = cv2.CascadeClassifier('models\haarcascade_frontalface_default.xml')

In [39]:
def create_verification_dataset(root_dir):
    # Initialize lists for positive and negative pairs
    positive_pairs = []
    negative_pairs = []

    # Iterate through each folder in the root directory
    for person_folder in os.listdir(root_dir):
        person_folder_path = os.path.join(root_dir, person_folder)
        
        # Collect all image files in the person's folder
        images = [img for img in os.listdir(person_folder_path) if img.endswith(".jpg")]
        tmp = []
        # Generate positive pairs (same person)
        tmp.extend(combinations(images, 2))
        positive_pairs = [(f'{person_folder}/{x[0]}',f'{person_folder}/{x[1]}',1) for x in tmp]
        # Generate negative pairs (different persons)
        other_folders = [folder for folder in os.listdir(root_dir) if folder != person_folder]
        for other_folder in other_folders:
            other_folder_path = os.path.join(root_dir, other_folder)
            other_images = [img for img in os.listdir(other_folder_path) if img.endswith(".jpg")]
            negative_pairs.extend([(f'{person_folder}/{img1}', f'{other_folder}/{img2}',0) for img1 in images for img2 in other_images])

    # Shuffle the pairs
    random.shuffle(positive_pairs)
    random.shuffle(negative_pairs)

    # Take an equal number of positive and negative pairs for balance
    min_pairs = min(len(positive_pairs), len(negative_pairs))
    verification_dataset = positive_pairs[:min_pairs] + negative_pairs[:min_pairs]
    random.shuffle(verification_dataset)
    return verification_dataset


In [91]:
data = create_verification_dataset('./images/train/')
total_data = len(data)
print(total_data)

702


In [84]:

def get_embeddings(image_url):
  image = dlib.load_rgb_image(image_url)
  dis_img = cv2.imread(image_url)

  faces = detector(image)
  if len(faces) < 1:
    # print("No face detected")
    return
  elif len(faces)> 1:
    print("More than one face")
    return
  else:
    face = faces[0]
    left, top, right, bottom = (face.left(), face.top(), face.right(), face.bottom())
    cv2.rectangle(dis_img, (left, top), (right, bottom), (255, 0, 0), 2)
    # display(Image(data=cv2.imencode('.png', dis_img)[1].tobytes(),))
    shape = sp(image, face)
    face_descriptor = facerec.compute_face_descriptor(image, shape)
    face_embedding = np.array(face_descriptor)
    return face_embedding

In [92]:
correct = 0
per = 0
non_img = 0
i = 0
for x in data:
    i+=1
    print("Cur :",i)
    e1 = get_embeddings(f'./images/train/{x[0]}')
    e2 = get_embeddings(f'./images/train/{x[1]}')
    if e1 is None or e2 is None:
        non_img += 1
        continue
    similarity = 1 -cosine(e1, e2)
    threshold = 0.9
    if (similarity > threshold) == x[2]:
        correct+=1
    per = correct / total_data * 100
print("Total:",total_data)
print("Face not detected in :",non_img)
print("Correct data:",correct)

Cur : 1
Cur : 2
Cur : 3
Cur : 4
Cur : 5
Cur : 6
Cur : 7
Cur : 8
Cur : 9
Cur : 10
Cur : 11
Cur : 12
Cur : 13
Cur : 14
Cur : 15
Cur : 16
Cur : 17
Cur : 18
Cur : 19
Cur : 20
Cur : 21
Cur : 22
Cur : 23
Cur : 24
Cur : 25
Cur : 26
Cur : 27
Cur : 28
Cur : 29
Cur : 30
Cur : 31
Cur : 32
Cur : 33
Cur : 34
Cur : 35
Cur : 36
Cur : 37
Cur : 38
Cur : 39
More than one face
Cur : 40
More than one face
Cur : 41
Cur : 42
Cur : 43
Cur : 44
Cur : 45
Cur : 46
Cur : 47
Cur : 48
Cur : 49
Cur : 50
Cur : 51
Cur : 52
Cur : 53
Cur : 54
Cur : 55
Cur : 56
Cur : 57
Cur : 58
Cur : 59
Cur : 60
Cur : 61
Cur : 62
Cur : 63
Cur : 64
Cur : 65
Cur : 66
Cur : 67
Cur : 68
Cur : 69
Cur : 70
Cur : 71
Cur : 72
Cur : 73
Cur : 74
Cur : 75
Cur : 76
Cur : 77
Cur : 78
Cur : 79
Cur : 80
Cur : 81
Cur : 82
Cur : 83
Cur : 84
Cur : 85
Cur : 86
Cur : 87
Cur : 88
Cur : 89
Cur : 90
Cur : 91
Cur : 92
Cur : 93
Cur : 94
Cur : 95
Cur : 96
Cur : 97
Cur : 98
Cur : 99
Cur : 100
Cur : 101
Cur : 102
Cur : 103
Cur : 104
Cur : 105
Cur : 106
Cur : 107


In [66]:
embedding1 = get_embeddings("images/vijay/1.jpg")
embedding2 = get_embeddings("images/mohanlal/1.jpg")
print(embedding1,embedding2)

[[-0.09388432 -0.00399763  0.07545884 -0.03911177 -0.03010902 -0.03206744
   0.01060309 -0.01886168  0.05775773 -0.06459548  0.16172824 -0.05138695
  -0.17615721 -0.13724533  0.02507709  0.07617425 -0.03987565 -0.12933987
  -0.01599298 -0.06766327  0.02195174  0.00550691 -0.00197432  0.03070059
  -0.11293151 -0.31198113 -0.07078664 -0.06595528 -0.02410119 -0.0888819
  -0.03610783  0.08223411 -0.12610801 -0.0327865   0.00233943  0.12063585
   0.03580037 -0.00342928  0.10501767  0.03217995 -0.11335482 -0.03189758
   0.01991971  0.24327719  0.09294015  0.05287999 -0.00518219  0.02687515
   0.07377499 -0.17384567  0.04612494  0.11216631  0.05465456 -0.02206278
   0.10901006 -0.03165293  0.03052447  0.00100064 -0.15381505  0.02462585
  -0.01364539 -0.02470358 -0.02006581 -0.05483526  0.12916046  0.06966843
  -0.04493634 -0.08978443  0.09755947 -0.15662601 -0.02763078  0.05030108
  -0.07685959 -0.08817089 -0.19113349  0.03792436  0.29706919  0.10380089
  -0.1700718   0.03250409 -0.03063755 -

In [67]:
similarity = 1 -euclidean(embedding1[0], embedding2[0])
print(similarity)

0.5468399704620205


In [56]:
# Set a threshold for similarity (you may need to fine-tune this based on your data)
threshold = 0.6

# Compare similarity with the threshold to determine if faces are a match
if similarity > threshold:
    print("Faces are a match!")
else:
    print("Faces are not a match.")

Faces are not a match.
