In [None]:
# install cmake from https://cmake.org/download/ adding it to the path
# install desktop development tools for c++ from https://visualstudio.microsoft.com/visual-cpp-build-tools/  

!pip install face_recognition
!pip install opencv-python==4.6.0.66
!pip install cmake
!pip install Image
!pip install dlib 

In [None]:
import dlib
from PIL import Image # (PIL = Python Image Library)
import face_recognition
import numpy as np
import sklearn
import cv2 # For resizing images
import numpy as np

In [None]:
import os
import zipfile

Now we point to the folder that contains the zip files<BR>
The zip file name is used as the "label"<BR>
In the following function, first the process attempts to detect faces in each pic, otherwise it would not include the image in the list<BR>
Then it crops it to the face itself<BR>
Finally it converts the image into numbers (embedding)

In [None]:
def load_images_from_zip(directory):
    faces = [] # conversion of the original image in an object so that it can be used
    face_identities = [] # this will store filenames
    face_filenames = []
    face_embeddings = [] # numerical representation of a face in a vector space
    # Iterate through each file in the directory
    for zip_filename in os.listdir(directory):
        if zip_filename.endswith(".zip"):
            zip_path = os.path.join(directory, zip_filename)
            print ("Processing zip file %s" % zip_path)
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                # Iterate through each file in the zip archive
                for zip_info in zip_ref.infolist():
                    if zip_info.filename.find("__MACOSX") == -1 and (zip_info.filename.lower().endswith(".jpg") or zip_info.filename.lower().endswith(".jpeg")): 
                        ## Read the image data from the zip archive
                        print (f"  Processing file {zip_info.filename}")
                        with zip_ref.open(zip_info) as image_file:
                            face = face_recognition.load_image_file(image_file)
                            face_locations = face_recognition.face_locations(face)
                            if len(face_locations) != 1:
                                print ("Skipping, because could not identify a face")
                                continue
                            else:
                                print ("Cropping according to face location")
                                top, right, bottom, left = face_locations[0]
                                face = face[top:bottom, left:right, :]
                                face = np.array(face)
                                face = cv2.resize(face, dsize=(128,128), interpolation=cv2.INTER_CUBIC)
                                pil_image = Image.fromarray(face)
                                print (face.shape)
                                display(pil_image)
                                try:
                                    embedding = face_recognition.face_encodings(face)[0]
                                    print ("Success getting embedding")
                                except:
                                    print ("Failed getting embedding, skipping") #for instance, a face is not found
                                    continue
                                faces.append(face)
                                print (face.shape)
                                face_identities.append(zip_filename)
                                face_filenames.append(zip_info.filename)
                                face_embeddings.append(embedding)
                                
    return faces, face_identities, face_filenames, face_embeddings

Now, we call the function either using all pics belonging to the same person or just one from everyone (to see simmilarities between all).

In [None]:
allpics=0

if allpics==1:
    faces, identities, filenames, embeddings = load_images_from_zip("pics") # this will be using all pics
else:
    faces, identities, filenames, embeddings = load_images_from_zip("unique_pics") # this will be using just one pic from each

In [None]:
print("Numbef of pics loaded: ", len(faces))

The following will be used to find nearest neighbors of each face.
We choose n_neighbors=2 because the first neighbor is always going to be the image itself

In [None]:
num_neighbors=2
from sklearn.neighbors import NearestNeighbors
nbrs = NearestNeighbors(n_neighbors=num_neighbors, algorithm='brute', metric='cosine').fit(embeddings)
nbrs

# Brute-force search compares each data point to all other points in the dataset to find nearest neighbours. 
# While it can be efficient for small datasets, it can become computationally expensive for large datasets due to the number of comparisons required.

The following will return two arrays. The first is a nxk array, with the [i,j] entry equaling the distance (cosine) of the i'th example to its j'the neighbor.<BR>
The second will be of the same shape nxk, whith the [i,j] entry equaling the index of the j'th neighbor of i.  <BR>
If j=0, then we will always have value "i" at entry [i,0], because i is closest to itself.

In [None]:
neighbors = nbrs.kneighbors(embeddings)
neighbors


The values are expressed in terms of distances: if we are using the cosine metric, the results are 1-cosine obtained. <BR>Therefore, the lower the value, the closer the similarity between faces

In [None]:
distances, indices = neighbors[0][:,1:],neighbors[1][:,1:]

n = len(indices)
if num_neighbors==2:
    n_correct = 0
    for i in range(n):
        best_match = indices[i,0]
        if identities[best_match] == identities[i]: n_correct += 1
        print (f"Query face:  {identities[i]}  (file: {filenames[i]}) \n best match: {identities[best_match]}  (file: {filenames[best_match]})\n")
    
    if allpics==1: #it does not make sense to calculate it otherwise
        acc=n_correct / n
        print ("Accuracy of face_recognition model: {0:.2%}".format(acc))

else:
    for i in range(n):
        best_match = indices[i,0]
        print (f"Query face:  {identities[i]}  (file: {filenames[i]}) \n best match: {identities[best_match]}  (file: {filenames[best_match]})")
        if num_neighbors>2:
            secondbest_match = indices[i,1]
            print (f"2nd best match: {identities[secondbest_match]}  (file: {filenames[secondbest_match]})\n")
