In [None]:
from deepface import DeepFace

models = [
  "VGG-Face", 
  "Facenet", # Google's
  "Facenet512", # Google's
  "OpenFace", 
  "DeepFace", # Meta's
  "DeepID", 
  "ArcFace", 
  "Dlib", 
  "SFace",
]

Directory  /root /.deepface created
Directory  /root /.deepface/weights created


In [131]:
import os
import matplotlib.pyplot as plt

def generate_embeddings(input_dir: str = None, dir_tag: str = None, model_name: str = None, file_name: str = None) -> list:
    """
        Roll over a directory and generate the embeddings of that folder
        Args:
            input_dir: str = None - The input directory to run over
            dir_tag: str = None - What the tagging of that folder (positive or negative)
            model_name: str - The name of the embedding generator model name
            file_name: str = None - Absolute path of the file in case you want to generate for specific file
        Returns:
            List contains all the embeddings (1d list)
    """
    files_to_embed: list[str] = []
    embeddings: dict[str, any] = {}
    backends: list = [
      'opencv', 
      'ssd', 
      'dlib', 
      'mtcnn', 
      'retinaface', 
      'mediapipe'
    ]
    
    if input_dir and not file_name:
        all_files: list[str] = os.listdir(input_dir)
        for file_path in all_files:
            if file_path.split('.')[-1] == 'jpg':
                files_to_embed.append(os.path.join(input_dir, file_path))
    elif file_name:
        files_to_embed.append(file_name)
    
    for path in files_to_embed:
        face_objs = DeepFace.extract_faces(
            img_path = os.path.abspath(path), 
            detector_backend = backends[4]
        )

        if len(face_objs) > 0:
            embeddings[path] = {
                'tag': dir_tag,
                'embeddings': DeepFace.represent(
                    img_path = os.path.abspath(path),
                    model_name = model_name,
                    detector_backend=backends[4]
                )
            }
            
    return embeddings

In [108]:
# anchor_embedding_obj = DeepFace.represent(
#     img_path = "input_image.jpg", 
#     model_name = models[2]
# )

negatives: list = generate_embeddings('negative', 'negative', models[2]) # Returns embedding of 512 dims
# positives: list = generate_embeddings('p', 'positive', models[0]) # Returns embedding of 2622 dims
positives: list = generate_embeddings('p', 'positive', models[2])

In [98]:
positives['WIN_20230221_17_19_22_Pro.jpg']['embeddings'][0].keys()

dict_keys(['embedding', 'facial_area'])

In [89]:
def extract_embeddings(embedding_list: dict[str, dict]) -> list[list]: 
    all_embeddings: list[list] = []
    for search_key in embedding_list.keys():
        model_res: dict[str, any] = embedding_list[search_key]['embeddings'][0]
        all_embeddings.append(model_res['embedding'])
    
    return all_embeddings

In [75]:
import os
from typing import Final

ELASTIC_PROTOCOL: Final[str] = 'http://'
ELASTIC_PORT: Final[str] = '9200'
ELASTIC_HOSTS: Final[str] = ['elasticsearch']
INDEX_NAME: Final[str] = 'tests_vectors_test_index'
# VECTORS_FILE_PATH: Final[str] = os.path.join('tutorial_workspace', 'outputs', 'vectors.json')

In [77]:
from elasticsearch import Elasticsearch

es = Elasticsearch([f'{ELASTIC_PROTOCOL}{host}:{ELASTIC_PORT}' for host in ELASTIC_HOSTS])

Resetting the index

In [182]:
is_exists: bool = es.indices.exists(index=INDEX_NAME)
print(is_exists)

if is_exists:
    es.indices.delete(INDEX_NAME)

True


Create index with mapping for the testing

In [183]:
mapping = {
    'properties': {
        'face_embeddings': {
            'type': 'dense_vector',
            'dims': 512
        },
        'name': {
            'type': 'keyword'
        }
    }
}

es.indices.create(index=INDEX_NAME, mappings=mapping, ignore=400)

{'acknowledged': True,
 'shards_acknowledged': True,
 'index': 'tests_vectors_test_index'}

Index the embeddings of the positives and negatives

In [188]:
def index_embeddings(embedding_list: dict[str, dict]) -> list[list]: 
    """
        Indexing just one class (positive, negative)
    """
    for search_key in embedding_list.keys():
        custom_res: dict = embedding_list[search_key]
        for face_res in custom_res['embeddings']:
            print(f"Indexing new {custom_res['tag']} document")
            print(f"length of embeddings: {len(face_res['embedding'])}")
            es.index(index=INDEX_NAME, document={
                'face_embeddings': face_res['embedding'],
                'tag': custom_res['tag']
            })
            
index_embeddings(positives)
index_embeddings(negatives)

Indexing new positive document
length of embeddings: 512
Indexing new positive document
length of embeddings: 512
Indexing new positive document
length of embeddings: 512
Indexing new positive document
length of embeddings: 512
Indexing new positive document
length of embeddings: 512
Indexing new negative document
length of embeddings: 512
Indexing new negative document
length of embeddings: 512
Indexing new negative document
length of embeddings: 512
Indexing new negative document
length of embeddings: 512
Indexing new negative document
length of embeddings: 512
Indexing new negative document
length of embeddings: 512
Indexing new negative document
length of embeddings: 512


Let's search our anchor in the database

In [187]:
import numpy as np

anchor = generate_embeddings(file_name='4a527694-9cb1-11ed-8529-b808cf4c5169.jpg', dir_tag='Anchor', model_name=models[2])
vector_to_search: list = anchor['4a527694-9cb1-11ed-8529-b808cf4c5169.jpg']['embeddings'][0]['embedding']

np.savetxt('anchor_embeddings.txt', vector_to_search, fmt = '%.16f')

res = es.search(index=INDEX_NAME, query={
    "script_score": {
        "query": {
            "match_all": {}
        },
        "script": {
            "source": "1 / (l1norm(params.queryVector, 'face_embeddings') + 0.1)",
            "params": {
                "queryVector": vector_to_search
            }
        }
    }    
})
res['hits']['hits']

[{'_index': 'tests_vectors_test_index',
  '_type': '_doc',
  '_id': 'oZGUfYYBIe4H3HY1WEDb',
  '_score': 10.0,
  '_source': {'tag': 'Test',
   'face_embeddings': [0.1460995078086853,
    1.4441208839416504,
    -0.7445499897003174,
    -0.3466446101665497,
    0.7807940244674683,
    0.0892653539776802,
    0.0938315019011497,
    2.3403263092041016,
    1.6094220876693726,
    -1.0290778875350952,
    0.6735588312149048,
    -0.97528076171875,
    -0.0115146152675152,
    -0.1813190877437592,
    -1.1522160768508911,
    1.0728859901428223,
    -0.281550794839859,
    1.091519832611084,
    -0.2357115596532822,
    -0.2210192829370499,
    -0.3538000881671906,
    -0.5404279828071594,
    0.7653772830963135,
    -1.6739424467086792,
    0.437554270029068,
    -0.4481526613235474,
    0.2610797882080078,
    -0.9723384380340576,
    -0.2195749431848526,
    -0.3762271404266357,
    -1.9802687168121338,
    0.1240489035844803,
    -0.6143405437469482,
    0.4803548455238342,
    0.885330