# Imports

In [117]:
import sys
sys.path.append("../")

In [332]:
import os
import pickle
from PIL import Image
import numpy as np
from typing import List
from tqdm import tqdm

from insightface.app import FaceAnalysis
from sklearn.neighbors import NearestNeighbors

# Insightface Model

In [125]:
app = FaceAnalysis(name="antelope")
app.prepare(ctx_id=0, det_size=(640, 640))

input mean and std: 127.5 127.5
find model: /Users/varshita/.insightface/models/antelope/glintr100.onnx recognition
find model: /Users/varshita/.insightface/models/antelope/scrfd_10g_bnkps.onnx detection
set det-size: (640, 640)


# Dataset

`The Yale Face Database (size 6.4MB) contains 165 grayscale images in GIF format of 15 individuals. There are 11 images per subject, one per different facial expression or configuration: center-light, w/glasses, happy, left-light, w/no glasses, normal, right-light, sad, sleepy, surprised, and wink.`

In [17]:
# Fixing the file extensions
YALE_DIR = "../data/yalefaces"
files = os.listdir(YALE_DIR)[1:]
for i, img in enumerate(files):
    # print("original name: ", img)
    new_ext_name = "_".join(img.split(".")) + ".gif"
    # print("new name: ",  new_ext_name)
    os.rename(os.path.join(YALE_DIR, img), os.path.join(YALE_DIR, new_ext_name))

# Helper functions

In [239]:
def create_probe_eval_set(files: List):
    # pick random index between 0 and len(files)-1
    random_idx = np.random.randint(0,len(files))
    probe_img_fpaths = [files[random_idx]]
    eval_img_fpaths = [files[idx] for idx in range(len(files)) if idx != random_idx]
    
    return probe_img_fpaths, eval_img_fpaths

In [258]:
def generate_embs(img_fpaths: List[str]):
    embs_set = list()
    embs_label = list()

    for img_fpath in img_fpaths:  
                    
        # read grayscale img
        img = Image.open(os.path.join(YALE_DIR, img_fpath)) 
        img_arr = np.asarray(img)  
        
        # convert grayscale to rgb
        im = Image.fromarray((img_arr * 255).astype(np.uint8))
        rgb_arr = np.asarray(im.convert('RGB'))       
       
        # generate Insightface embedding
        res = app.get(rgb_arr)          
        # append emb to the eval set
        embs_set.append(res)          
        # append label to eval_label set
        embs_label.append(img_fpath.split("_")[0])          

    return embs_set, embs_label
    
    

In [325]:
def filter_empty_embs(img_set: List, img_labels: List[str]):
    # filtering where insightface could not generate an embedding
    good_idx = [i for i,x in enumerate(img_set) if x]
    
    if len(good_idx) == len(img_set):
        clean_embs = [e[0].embedding for e in img_set]
        clean_labels = img_labels
        
    else:
        # filtering eval set and labels based on good idx
        clean_labels = np.array(img_labels)[good_idx]
        clean_set = np.array(img_set, dtype=object)[good_idx]
        
        # generating embs for good idx
        clean_embs = [e[0].embedding for e in clean_set]
    
    return clean_embs, clean_labels

# Generate probe and eval set embeddings

In [323]:
# sorting files
files = os.listdir(YALE_DIR)
files.sort()
eval_set = list()
eval_labels = list()
probe_set = list()
probe_labels = list()
IMAGES_PER_IDENTITY = 11
for i in tqdm(range(1, len(files), IMAGES_PER_IDENTITY), unit_divisor=True): # ignore the README.txt file at files[0]
    # print(i)
    probe, eval = create_probe_eval_set(files[i:i+IMAGES_PER_IDENTITY])
    
    # store eval embs and labels
    eval_set_t, eval_labels_t = generate_embs(eval)
    eval_set.extend(eval_set_t)
    eval_labels.extend(eval_labels_t)
    
    # store probe embs and labels
    probe_set_t, probe_labels_t = generate_embs(probe)
    probe_set.extend(probe_set_t)
    probe_labels.extend(probe_labels_t)
    

100%|██████████| 15/15 [01:04<00:00,  4.30s/it]


In [324]:
assert len(eval_set) == len(eval_labels)
assert len(probe_set) == len(probe_labels)

In [326]:
evaluation_embs, evaluation_labels = filter_empty_embs(eval_set, eval_labels)
probe_embs, probe_labels = filter_empty_embs(probe_set, probe_labels)

In [327]:
assert len(evaluation_embs) == len(evaluation_labels)
assert len(probe_embs) == len(probe_labels)

# Train K NearestNeighbours

In [329]:
# Train KNN classifier
nn = NearestNeighbors(n_neighbors=3, metric="cosine")
nn.fit(X=evaluation_embs)

NearestNeighbors(metric='cosine', n_neighbors=3)

In [None]:
# Optional - saving and loading model
# save the model to disk
filename = 'faceID_model.pkl'
with open(filename, 'wb') as file:
    pickle.dump(nn, file)
    
# some time later...
 
# load the model from disk
# with open(filename, 'rb') as file:
#     pickle_model = pickle.load(file)

# Inference

In [None]:
dists, inds = nn.kneighbors(X=probe_embs, n_neighbors=2, return_distance=True)

# Evaluating metrics - p_at_k

In [330]:
# p@k
p_at_k = np.zeros(len(probe_embs))
for i in range(len(probe_embs)):
    true_label = probe_labels[i]
    pred_neighbr_idx = inds[i]
    
    pred_labels = [evaluation_labels[id] for id in pred_neighbr_idx]
    pred_is_labels = [1 if label == true_label else 0 for label in pred_labels]
    
    p_at_k[i] = np.mean(pred_is_labels)
    
p_at_k.mean()

0.9

# Running Face ID for unknown faces

In [331]:
def print_ID_results(img_fpath: str, evaluation_labels: np.ndarray, verbose: bool = False):
    img = Image.open(img_fpath)
    img_emb = app.get(np.asarray(img))[0].embedding
    
    # get pred from KNN
    dists, inds = nn.kneighbors(X=img_emb.reshape(1,-1), n_neighbors=3, return_distance=True)
    
    # get labels of the neighbours
    pred_labels = [evaluation_labels[i] for i in inds[0]]
    
    # check if any dist is greater than 0.5, and if so, print the results
    no_of_matching_faces = np.sum([1 if d <=0.6 else 0 for d in dists[0]])
    if no_of_matching_faces > 0:
        print("Matching face(s) found in database! ")
        verbose = True
    else: 
        print("No matching face(s) not found in database!")
        
    # print labels and corresponding distances
    if verbose:
        for label, dist in zip(pred_labels, dists[0]):
            print(f"Nearest neighbours found in the database have labels {label} and is at a distance of {dist}")
    

In [319]:
print_ID_results("../data/baby4.jpg", evaluation_labels, verbose=True)

No matching face(s) not found in database!
Nearest neighbours found in the database have labels subject02 and is at a distance of 0.7562326192855835
Nearest neighbours found in the database have labels subject02 and is at a distance of 0.9153403043746948
Nearest neighbours found in the database have labels subject01 and is at a distance of 0.9396535158157349
