In [None]:
!pip install deepface

In [1]:
from deepface import DeepFace
import os
import numpy as np
from sklearn.metrics.pairwise import cosine_distances
from sklearn.metrics import accuracy_score




In [None]:
DATASET_PATH = "dataset/merged_lfw_cplfw"
EMB_PATH = "dataset/embeddings"
MODELS = ["Facenet", "VGG-Face", "ArcFace", "GhostFaceNet", "OpenFace"]
DETECTORS = ["retinaface", "centerface", "yunet", "yolov8"]

def get_all_images(dataset_path):
    images = []
    for subj in os.listdir(dataset_path):
        subj_path = os.path.join(dataset_path, subj)
        if os.path.isdir(subj_path):
            for img in os.listdir(subj_path):
                if img.lower().endswith(('.jpg', '.jpeg', '.png')):
                    images.append((subj, os.path.join(subj_path, img)))
    return images

def extract_embeddings(images, model, detector, emb_path):
    save_dir = os.path.join(emb_path, model, detector)
    os.makedirs(save_dir, exist_ok=True)
    for subj, img_path in images:
        subj_dir = os.path.join(save_dir, subj)
        os.makedirs(subj_dir, exist_ok=True)
        emb_file = os.path.join(subj_dir, os.path.basename(img_path) + ".npy")
        if not os.path.exists(emb_file):  # Skip si ya existe
            try:
                emb = DeepFace.represent(img_path=img_path, model_name=model, detector_backend=detector, enforce_detection=(detector != "skip"), align=True)[0]["embedding"]
                np.save(emb_file, emb)
            except Exception as e:
                print(f"Error: {img_path} | {e}")

In [None]:
images = get_all_images(DATASET_PATH)
for model in MODELS:
    for detector in DETECTORS:
        print(f"Extrayendo embeddings para modelo: {model} | detector: {detector}")
        extract_embeddings(images, model, detector, EMB_PATH)
print("¡Embeddings extraídos y guardados!")

In [None]:
SPLIT_SIZE = 10

def load_embeddings(model, detector):
    emb_dir = os.path.join(EMB_PATH, model, detector)
    subjects = {}
    for subj in os.listdir(emb_dir):
        subj_path = os.path.join(emb_dir, subj)
        if os.path.isdir(subj_path):
            lfw_embs = []
            cplfw_embs = []
            for emb_name in os.listdir(subj_path):
                emb_path = os.path.join(subj_path, emb_name)
                emb = np.load(emb_path)
                if emb_name.startswith("lfw_"):
                    lfw_embs.append((emb_name, emb))
                elif emb_name.startswith("cplfw_"):
                    cplfw_embs.append((emb_name, emb))
            if len(lfw_embs) == 10 and len(cplfw_embs) > 0:
                subjects[subj] = {"lfw": lfw_embs, "cplfw": cplfw_embs}
    return subjects

def split_subjects(subjects, split_size):
    subject_list = sorted(list(subjects.keys()))
    return [subject_list[i:i+split_size] for i in range(0, len(subject_list), split_size)]

def benchmark_block(subjects, block_subjects):
    references = {subj: [emb for name, emb in subjects[subj]["lfw"]] for subj in block_subjects}
    probes = []
    true_labels = []
    pred_labels = []
    # Prepara pruebas (todas las cplfw)
    for subj in block_subjects:
        for probe_name, probe_emb in subjects[subj]["cplfw"]:
            probes.append((probe_emb, subj))
            true_labels.append(subj)
    # Comparación solo de embeddings (coseno)
    for probe_emb, true_subj in probes:
        min_dist = float("inf")
        best_match = None
        for ref_subj, ref_embs in references.items():
            dists = cosine_distances([probe_emb], ref_embs)[0]
            dist = np.min(dists)
            if dist < min_dist:
                min_dist = dist
                best_match = ref_subj
        pred_labels.append(best_match)
    acc = accuracy_score(true_labels, pred_labels)
    return acc, len(probes)

In [None]:
for model in MODELS:
    for detector in DETECTORS:
        print(f"Benchmarking modelo: {model} | detector: {detector}")
        subjects = load_embeddings(model, detector)
        splits = split_subjects(subjects, SPLIT_SIZE)
        for i, block_subjects in enumerate(splits):
            acc, n = benchmark_block(subjects, block_subjects)
            print(f"Split {i+1}: Acc: {acc:.3f} (n={n})")