In [1]:
import os
import sys
import json
from pathlib import Path
sys.path.append(os.path.abspath('../src'))

# ----------- local imports ----------- 
from utils import show_images
from constants import DATA_DIR

In [2]:
import matplotlib.pyplot as plt
from deepface import DeepFace
from tqdm.auto import tqdm
from PIL import Image
import pandas as pd
import numpy as np

2025-03-21 10:42:36.898859: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-21 10:42:37.188584: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-03-21 10:42:37.264446: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-21 10:42:38.102581: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Load Pickle Representation File

In [4]:
import pickle

datastore_path = DATA_DIR / "face_identification/train/ds_model_facenet512_detector_opencv_aligned_normalization_base_expand_0.pkl"
with open(datastore_path, "rb") as f:
        representations = pickle.load(f)

# convert represtations to df        
df_rep = pd.DataFrame(representations)
# add person column
df_rep['person'] = df_rep['identity'].apply(lambda x: Path(x).parent.name)
# get average embedding foreach person
df_avg_embedding = df_rep.groupby("person", as_index=False).agg({
    "embedding": lambda x: np.mean(np.stack(x), axis=0)  # Average embeddings
})

df_avg_embedding

Unnamed: 0,person,embedding
0,person_0,"[0.3298376335629395, -0.7333409530775887, -1.0..."
1,person_1,"[0.4783467225009395, 1.0117955845930884, -1.16..."
2,person_10,"[0.4395070165395737, -0.2822106957435608, 0.18..."
3,person_11,"[0.6532818440061349, 0.564850529942375, -0.769..."
4,person_12,"[-0.2833601363003254, 0.6550906949987014, 0.09..."
...,...,...
57,person_60,"[0.636040463577956, 0.5885688029229641, -0.439..."
58,person_61,"[0.03856503907987412, 0.8731057628830696, 0.20..."
59,person_7,"[0.39031277467705766, 0.3133518789153622, -0.0..."
60,person_8,"[0.01244738662526721, -0.8572551366828737, 0.1..."


In [3]:
train_img_path = DATA_DIR/"face_identification/train"
test_img_path = DATA_DIR/"face_identification/test"
train_small = DATA_DIR/"face_identification/train_small"

def make_small_dataset():
    train_small.mkdir(exist_ok=True, parents=True)
    
    for img_path in tqdm(train_img_path.rglob("*.jpg")):
        person_path = train_small / img_path.parent.name
        person_path.mkdir(exist_ok=True, parents=True)
        
        new_img_path = person_path / img_path.name
        
        Image.open(img_path).resize((160, 160), Image.LANCZOS).save(new_img_path)

In [5]:
from deepface.modules import modeling, detection, preprocessing

model = modeling.build_model(task="facial_recognition", model_name="Facenet512")

I0000 00:00:1741812715.175794    2340 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-03-12 22:51:55.194458: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2343] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [23]:
def load_batch(batch_size=16):
    paths = []
    images = []
    images_path = sorted(list(train_img_path.rglob("*.jpg")))
    total_len = len(images_path)
    
    for i, img_path in enumerate(tqdm(images_path)):
        # ============= preprocess =============
        img = np.array( Image.open(img_path) )
        img = preprocessing.resize_image(
            img=img,
            target_size=(160, 160),
        )
        img = preprocessing.normalize_input(img=img, normalization="base")
        
        paths.append(img_path)
        images.append(img) # Load image and append it
        if (i + 1) % batch_size == 0 or (i+1) == total_len:
            yield (paths, np.array(images)[:,0,:,:])
            images = []
            paths = []

In [24]:
batch_size = 32
total_embeddings = pd.DataFrame(columns=['path', 'embeddings'])

for paths, images in load_batch():
    embeddings = model.model(images).numpy()
    
    batch_df = pd.DataFrame({'path': paths, 'embeddings': list(embeddings)})
    
    total_embeddings = pd.concat([total_embeddings, batch_df], ignore_index=True)

  0%|          | 0/6828 [00:00<?, ?it/s]

In [25]:
total_embeddings['person'] = total_embeddings['path'].apply(lambda x: Path(x).parent.name)
total_embeddings

Unnamed: 0,path,embeddings,person
0,/home/aliaagheis/projects/Fawary-Competition-s...,"[1.006344, -1.4313573, -0.17667598, 0.19756395...",person_0
1,/home/aliaagheis/projects/Fawary-Competition-s...,"[0.4157936, -1.3333133, -0.51268137, 0.3930167...",person_0
2,/home/aliaagheis/projects/Fawary-Competition-s...,"[-0.3850316, -2.5415323, -1.176042, -0.8309456...",person_0
3,/home/aliaagheis/projects/Fawary-Competition-s...,"[1.1891283, -1.013693, -0.68593585, 0.8123388,...",person_0
4,/home/aliaagheis/projects/Fawary-Competition-s...,"[0.78686726, -1.4626352, -0.86144245, -0.09703...",person_0
...,...,...,...
6823,/home/aliaagheis/projects/Fawary-Competition-s...,"[1.3901302, -0.33843932, -0.7714985, 0.0276765...",person_99
6824,/home/aliaagheis/projects/Fawary-Competition-s...,"[-0.45898774, 0.4042239, -0.70163524, 0.538473...",person_99
6825,/home/aliaagheis/projects/Fawary-Competition-s...,"[0.45999855, 0.7937782, 1.2101912, 0.326266, 2...",person_99
6826,/home/aliaagheis/projects/Fawary-Competition-s...,"[-0.13476726, 1.1266866, 0.6428942, 0.83492845...",person_99


In [40]:
total_embeddings['person'].value_counts().min(), total_embeddings['person'].value_counts().max()

(5, 283)

In [4]:
# # ds_model_facenet512_detector_opencv_aligned_normalization_base_expand_0.pkl
# dfs = DeepFace.find(
#   img_path = "pp.jpg", 
#   db_path = DATA_DIR / "face_identification/train", 
#   model_name = 'Facenet512',
#   detector_backend='opencv',  # Try 'mtcnn', 'retinaface', etc.
#   enforce_detection=False,
#   refresh_database=False
# )