In [19]:
import os
import cv2
import numpy as np
import pandas as pd

from insightface.app import FaceAnalysis

In [56]:
# configure face analysis
# faceapp = FaceAnalysis(name='buffalo_l', root='insightface_model', providers=['CPUExecutionProvider'])
faceapp = FaceAnalysis(name='buffalo_m', root='insightface_model', providers=['CPUExecutionProvider'])
# faceapp = FaceAnalysis(name='buffalo_s', root='insightface_model', providers=['CPUExecutionProvider'])
# faceapp = FaceAnalysis(name='buffalo_sc', root='insightface_model', providers=['CPUExecutionProvider'])
# faceapp = FaceAnalysis(name='antelopev2', root='insightface_model', providers=['CPUExecutionProvider'])
faceapp.prepare(ctx_id=0, det_size=(640,640), det_thresh=0.5)

Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: insightface_model\models\buffalo_m\1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: insightface_model\models\buffalo_m\2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: insightface_model\models\buffalo_m\det_2.5g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: insightface_model\models\buffalo_m\genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: insightface_model\models\buffalo_m\w600k_r50.onnx recognition ['None', 3, 112, 112] 127.5 127.5
set det-size: (640, 640)


### Extract facial features and labels

In [57]:
# regular expression
import re

In [58]:
# fn to remove any hypen or special char between the first & last name
def clean_name(string):
    string = re.sub(r'[^A-Za-z]',' ', string)
    string = string.title()
    return string

# clean_name('chris_evans')

In [59]:
# Return a list containing the names of the files in the directory.
# os.listdir()

# Return a list inside images folder containing the names of the files in the directory.
# os.listdir(path='images')
# os.listdir(path='images/student-Angelina Jolie')
person_info = []
listdir = os.listdir(path='custom_images')


# 0 - role, 1 - name
for folder_name in listdir:
    print(f"Processing folder: {folder_name}")
    role, name = folder_name.split('-')
    name = clean_name(name);
    role = clean_name(role)
    # print('role :', role, end='\t')
    # print('name :', name)

    # path of each folder in respective folder
    img_files = os.listdir(path=f'custom_images/{folder_name}')
    # print(img_files)
    for file in img_files:
        path = f'./custom_images/{folder_name}/{file}'
        # print(path)

        # step-1 : read the image
        img_arr = cv2.imread(path)

        # step-2 : get the info
        result = faceapp.get(img_arr, max_num=1); # return lists
        # print(result[0].keys())
        # print(result)
        
        if len(result) > 0:
            # step-3 : extract facial embedding
            res = result[0]
            # print(res.keys())
            embedding = res['embedding']
            # step-4 : save all info, name, role, embedding in a list
            person_info.append([name, role, embedding])


Processing folder: student-abhay
Processing folder: student-aditi
Processing folder: student-Aditi_Sunil
Processing folder: student-aisvarrya
Processing folder: student-alastair
Processing folder: student-alekhya
Processing folder: student-Amresh_Chaurasiya
Processing folder: student-anki
Processing folder: student-ankita
Processing folder: student-ankur
Processing folder: student-arun
Processing folder: student-asaqin
Processing folder: student-ashi
Processing folder: student-ashu
Processing folder: student-asit
Processing folder: student-astha
Processing folder: student-ayusha
Processing folder: student-binks
Processing folder: student-Chandni_Ahuja
Processing folder: student-Chinoy
Processing folder: student-deepak
Processing folder: student-deepika
Processing folder: student-deepthi
Processing folder: student-dhananjay
Processing folder: student-Diya_Khare
Processing folder: student-fattwo
Processing folder: student-Gursimar_Singh
Processing folder: student-hari
Processing folder: 

In [60]:
# Step 1: Load existing LFW embeddings from external file
model_name = 'buffalo_m'
lfw_path = f"lfw_embeddings_{model_name}.pkl"
# print(lfw_path)

if os.path.exists(lfw_path):
    lfw_df = pd.read_pickle(lfw_path)
    print("✅ Loaded LFW embeddings:", lfw_df.shape)
else:
    print("❌ LFW embeddings not found.")
    lfw_df = pd.DataFrame(columns=['Name', 'Role', 'Facial_Features'])

lfw_df

✅ Loaded LFW embeddings: (1767, 3)


Unnamed: 0,Name,Role,Facial_Features
0,Aaron Peirsol,Student,"[-0.45143485, -1.6528509, 1.4327767, -0.422060..."
1,Aaron Peirsol,Student,"[-0.023353167, -1.446713, -0.01197511, 0.55963..."
2,Aaron Peirsol,Student,"[0.1924659, 0.27568585, 0.50092113, -0.3748789..."
3,Abdoulaye Wade,Student,"[-0.8208419, 0.2701584, -1.5315266, -0.8176923..."
4,Abdoulaye Wade,Student,"[0.23625141, -0.14451125, -1.2972999, -0.10191..."
...,...,...,...
1762,Zinedine Zidane,Student,"[0.8057942, 1.024703, 1.8526406, 0.23545212, -..."
1763,Zinedine Zidane,Student,"[0.19742373, 1.265694, 0.32113343, 0.16623753,..."
1764,Zoran Djindjic,Student,"[-0.6187218, -1.115498, 1.2926705, -0.69644, 0..."
1765,Zoran Djindjic,Student,"[-0.56719923, -0.68499243, 0.9995722, -1.31357..."


In [61]:
# Step 2: Extract embeddings for custom dataset
custom_df = pd.DataFrame(person_info, columns=['Name', 'Role', 'Facial_Features'])
custom_df

Unnamed: 0,Name,Role,Facial_Features
0,Abhay,Student,"[-0.5529244, 0.14004476, 0.34776747, -0.144605..."
1,Abhay,Student,"[-0.3415976, 0.5834756, 0.22209199, -0.7189925..."
2,Abhay,Student,"[-0.7486452, 0.3462922, 0.2664011, -1.4635835,..."
3,Abhay,Student,"[-0.6645088, 0.40617895, 0.18942404, -0.433024..."
4,Abhay,Student,"[-0.45482063, 0.3974201, 0.77906615, -1.194844..."
...,...,...,...
628,Zefer,Student,"[0.753831, -1.2290117, 0.78595555, 0.270209, -..."
629,Zefer,Student,"[1.3946941, -1.8593612, 0.15844575, 0.72348434..."
630,Zefer,Student,"[1.6239473, -1.7631176, 0.6296853, 1.4575704, ..."
631,Zefer,Student,"[0.47189575, -1.3044252, 0.30811393, 1.1644347..."


In [62]:
# Step 3: Merge both dataframes
combined_df = pd.concat([lfw_df, custom_df], ignore_index=True)
print("🧠 Combined embeddings shape:", combined_df.shape)

combined_df

🧠 Combined embeddings shape: (2400, 3)


Unnamed: 0,Name,Role,Facial_Features
0,Aaron Peirsol,Student,"[-0.45143485, -1.6528509, 1.4327767, -0.422060..."
1,Aaron Peirsol,Student,"[-0.023353167, -1.446713, -0.01197511, 0.55963..."
2,Aaron Peirsol,Student,"[0.1924659, 0.27568585, 0.50092113, -0.3748789..."
3,Abdoulaye Wade,Student,"[-0.8208419, 0.2701584, -1.5315266, -0.8176923..."
4,Abdoulaye Wade,Student,"[0.23625141, -0.14451125, -1.2972999, -0.10191..."
...,...,...,...
2395,Zefer,Student,"[0.753831, -1.2290117, 0.78595555, 0.270209, -..."
2396,Zefer,Student,"[1.3946941, -1.8593612, 0.15844575, 0.72348434..."
2397,Zefer,Student,"[1.6239473, -1.7631176, 0.6296853, 1.4575704, ..."
2398,Zefer,Student,"[0.47189575, -1.3044252, 0.30811393, 1.1644347..."


In [63]:
# Step 4: Save the new combined reference embeddings
combined_df.to_pickle(f"combined_embeddings_{model_name}.pkl")
print(f"💾 Saved combined embeddings to: combined_embeddings_{model_name}.pkl")

💾 Saved combined embeddings to: combined_embeddings_buffalo_m.pkl


In [64]:
# Step 5: Load the new combined reference embeddings

# Load the saved combined embeddings file
embedding_path = f"combined_embeddings_{model_name}.pkl"

try:
    dataframe = pd.read_pickle(embedding_path)
    print(f"✅ Loaded combined embeddings: {dataframe.shape}")
    
    # Extract the facial features and convert to NumPy array
    X_list = dataframe['Facial_Features'].tolist()
    X = np.asarray(X_list)

except FileNotFoundError:
    print(f"❌ File not found: {embedding_path}")
    dataframe = pd.DataFrame()
    X = np.array([])

dataframe

✅ Loaded combined embeddings: (2400, 3)


Unnamed: 0,Name,Role,Facial_Features
0,Aaron Peirsol,Student,"[-0.45143485, -1.6528509, 1.4327767, -0.422060..."
1,Aaron Peirsol,Student,"[-0.023353167, -1.446713, -0.01197511, 0.55963..."
2,Aaron Peirsol,Student,"[0.1924659, 0.27568585, 0.50092113, -0.3748789..."
3,Abdoulaye Wade,Student,"[-0.8208419, 0.2701584, -1.5315266, -0.8176923..."
4,Abdoulaye Wade,Student,"[0.23625141, -0.14451125, -1.2972999, -0.10191..."
...,...,...,...
2395,Zefer,Student,"[0.753831, -1.2290117, 0.78595555, 0.270209, -..."
2396,Zefer,Student,"[1.3946941, -1.8593612, 0.15844575, 0.72348434..."
2397,Zefer,Student,"[1.6239473, -1.7631176, 0.6296853, 1.4575704, ..."
2398,Zefer,Student,"[0.47189575, -1.3044252, 0.30811393, 1.1644347..."
