In [15]:
# !pip install deepface
# !pip install faiss-cpu
# restart jupyter notebook, if lib is not found 



In [20]:
import os
import random
from deepface import DeepFace     # import lib
import faiss
import numpy as np

In [None]:
representations= []
for root, _dir, files in os.walk('deepface/tests/dataset/'):    # image datasets to create embeddings
    for file in files:
        if ".jpg" in file:
            exact_file = f'{root}{file}'
            objs = DeepFace.represent(
                img_path=exact_file,
                model_name="Facenet",
                detector_backend="mtcnn"
            )
            for obj in objs:
                embedding = obj["embedding"]
                representations.append([file, embedding])
            
#             print(exact_file)

In [10]:
# check counts, we have 71 seed images
len(representations)

71

In [12]:
# check embedding dimensions, 128, we will use to create synthetic images 
len(representations[0][1])

128

In [96]:
# create 1000000 (one million) synthetic data, and their embeddings
for i in range(len(representations), 1000000):
    key = f"synthetic_{i}.jpg"
    vector = [random.gauss(-0.5, 0.5) for z in range(128)]
    representations.append([key, vector])

In [14]:
len(representations)       # see the length of embeddings

1000000

In [18]:
# create embeddings 
embeddings = []
for key, value in representations:
    embeddings.append(value)


## Initialize faiss index

In [17]:
# euclidean 
index = faiss.IndexFlatL2(128)      # use dimension of 128 

In [21]:
# faiss index requires to pass np array
index.add(np.array(embeddings, dtype = "f"))    

## compare target image

In [97]:
target_path = "target/target.jpg"    # given an unseen target image, to find in the trained dataset/embedddings

In [76]:
# We use same models used for embedding for target 
target_embedding = DeepFace.represent(
    img_path = target_path,
    model_name = "Facenet",
    detector_backend = "mtcnn"
)[0]["embedding"]



In [77]:
target_embedding = np.array(target_embedding, dtype="f")

In [78]:
target_embedding.shape

(128,)

In [79]:
target_embedding = np.expand_dims(target_embedding, axis = 0)

In [80]:
target_embedding.shape

(1, 128)

## search

In [91]:
# look up top 3 closest neighbors 
import time
k = 3 
tic = time.time()
distances, neighbors = index.search(target_embedding, k)
toc = time.time()

In [92]:
print(toc - tic, 'seconds')

0.03563284873962402 seconds


In [98]:
# find image of k nearest neighbors
neighbors

array([[41, 45, 68]])

In [99]:
# get the image names 
representations[41][0], representations[45][0], representations[68][0], 

('img5.jpg', 'img6.jpg', 'img10.jpg')

In [95]:
# open above 3 images, you should see dataset images match target image