In [1]:
import os
import re
import time
import numpy as np
from deepface import DeepFace
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd




In [3]:
# Configuración
DATASET_PATH = "dataset/cplfw/images"
SPLIT_SIZE = 10
MODELS = ["Facenet", "VGG-Face", "ArcFace", "GhostFaceNet", "OpenFace"]

def get_persons_and_images(dataset_path):
    """Devuelve un diccionario persona -> lista de imágenes ordenadas."""
    regex = re.compile(r"(.+?)_(\d+)\.jpg$")
    persons = {}
    for file in os.listdir(dataset_path):
        match = regex.match(file)
        if match:
            name = match.group(1)
            persons.setdefault(name, []).append(file)
    for k in persons:
        # Ordenar imágenes por número
        persons[k].sort(key=lambda x: int(re.match(r".+_(\d+)\.jpg$", x).group(1)))
    return persons

def split_persons(persons, split_size):
    """Divide la lista de personas en bloques de tamaño split_size."""
    person_list = list(persons.keys())
    return [person_list[i:i+split_size] for i in range(0, len(person_list), split_size)]

def benchmark_block(model_name, block_persons, persons, dataset_path):
    """Evalúa un bloque de 10 personas usando un modelo de DeepFace."""
    references = {}
    probes = []
    true_labels = []
    pred_labels = []
    times = []

    # Prepara referencias
    for person in block_persons:
        ref_img = [img for img in persons[person] if "_1.jpg" in img]
        if not ref_img:
            continue
        references[person] = os.path.join(dataset_path, ref_img[0])

    # Prepara pruebas
    for idx, person in enumerate(block_persons):
        probe_imgs = [img for img in persons[person] if "_1.jpg" not in img]
        for probe_img in probe_imgs:
            probe_path = os.path.join(dataset_path, probe_img)
            probes.append((probe_path, person))
            true_labels.append(person)

    # Inferencia
    for probe_path, true_person in probes:
        start = time.time()
        best_match = None
        min_dist = float("inf")
        for ref_person, ref_path in references.items():
            try:
                result = DeepFace.verify(img1_path=probe_path, img2_path=ref_path, model_name=model_name, enforce_detection=False, detector_backend="skip")
                dist = result["distance"]
                if dist < min_dist:
                    min_dist = dist
                    best_match = ref_person
            except Exception as e:
                print(f"Error procesando {probe_path} vs {ref_path}: {e}")
                continue
        pred_labels.append(best_match)
        times.append(time.time() - start)

    # Métricas
    acc = accuracy_score(true_labels, pred_labels)
    prec = precision_score(true_labels, pred_labels, average="macro", zero_division=0)
    recall = recall_score(true_labels, pred_labels, average="macro", zero_division=0)
    f1 = f1_score(true_labels, pred_labels, average="macro", zero_division=0)
    avg_time = np.mean(times)
    return {
        "model": model_name,
        "accuracy": acc,
        "precision": prec,
        "recall": recall,
        "f1": f1,
        "avg_time": avg_time,
        "n_tests": len(probes)
    }

def run_benchmark():
    persons = get_persons_and_images(DATASET_PATH)
    blocks = split_persons(persons, SPLIT_SIZE)
    results = []

    for model in MODELS:
        print(f"Evaluando modelo: {model}")
        for i, block_persons in enumerate(blocks):
            print(f" - Split {i+1}/{len(blocks)}")
            metrics = benchmark_block(model, block_persons, persons, DATASET_PATH)
            metrics["split"] = i+1
            results.append(metrics)
            print(metrics)

    pd.DataFrame(results).to_csv("benchmark_results.csv", index=False)

In [4]:
run_benchmark()

Evaluando modelo: VGG-Face
 - Split 1/393


KeyboardInterrupt: 

In [11]:
import os

# Cambia esta ruta a donde tengas las carpetas de LFW
DATASET_PATH = "dataset\\lfw\\lfw-deepfunneled\\lfw-deepfunneled"

def contar_sujetos_con_mas_de_10_fotos_por_carpeta(dataset_path, min_fotos=10):
    sujetos = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]
    sujetos_con_mas_de_min_fotos = 0
    for sujeto in sujetos:
        fotos = [f for f in os.listdir(os.path.join(dataset_path, sujeto)) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        if len(fotos) >= min_fotos:
            sujetos_con_mas_de_min_fotos += 1
    total_sujetos = len(sujetos)
    return sujetos_con_mas_de_min_fotos, total_sujetos

if __name__ == "__main__":
    sujetos_10omas, total = contar_sujetos_con_mas_de_10_fotos_por_carpeta(DATASET_PATH, min_fotos=10)
    print(f"Sujetos con 10 o más fotos: {sujetos_10omas} / {total} ({100*sujetos_10omas/total:.2f}%)")

Sujetos con 10 o más fotos: 158 / 5749 (2.75%)


In [10]:
import os
import re

# Cambia esta ruta a donde tengas las imágenes de LFW
DATASET_PATH = "dataset/cplfw/images"

def contar_sujetos_con_mas_de_3_fotos(dataset_path, min_fotos=3):
    regex = re.compile(r"(.+?)_\d+\.jpg$")
    sujetos = {}
    for file in os.listdir(dataset_path):
        match = regex.match(file)
        if match:
            nombre = match.group(1)
            sujetos[nombre] = sujetos.get(nombre, 0) + 1
    total_sujetos = len(sujetos)
    sujetos_con_mas_de_min_fotos = sum(1 for count in sujetos.values() if count >= min_fotos)
    return sujetos_con_mas_de_min_fotos, total_sujetos

if __name__ == "__main__":
    sujetos_3omas, total = contar_sujetos_con_mas_de_3_fotos(DATASET_PATH, min_fotos=3)
    print(f"Sujetos con 3 o más fotos: {sujetos_3omas} / {total} ({100*sujetos_3omas/total:.2f}%)")

Sujetos con 3 o más fotos: 3811 / 3929 (97.00%)


In [12]:
import os
import shutil
import re

LFW_PATH = "dataset/lfw/lfw-deepfunneled/lfw-deepfunneled"
CPLFW_PATH = "dataset/cplfw/images"
OUTPUT_PATH = "dataset/merged_lfw_cplfw"

def obtener_imagenes_lfw(lfw_path, min_fotos=10):
    sujetos_validos = {}
    for sujeto in os.listdir(lfw_path):
        path_sujeto = os.path.join(lfw_path, sujeto)
        if os.path.isdir(path_sujeto):
            fotos = sorted([f for f in os.listdir(path_sujeto) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
            if len(fotos) >= min_fotos:
                sujetos_validos[sujeto] = fotos[:10]  # exactamente 10, las primeras en orden
    return sujetos_validos

def obtener_imagenes_cplfw(cplfw_path, sujetos_validos):
    regex = re.compile(r"(.+?)_(\d+)\.jpg$")
    sujetos_cplfw = {}
    for file in os.listdir(cplfw_path):
        match = regex.match(file)
        if match:
            nombre = match.group(1)
            idx = int(match.group(2))
            if nombre in sujetos_validos and idx >= 2:
                sujetos_cplfw.setdefault(nombre, []).append(file)
    # Solo nos quedamos con sujetos que tengan al menos una imagen >=_2
    sujetos_cplfw = {k: v for k, v in sujetos_cplfw.items() if len(v) > 0}
    return sujetos_cplfw

def merge_lfw_cplfw(lfw_path, cplfw_path, min_lfw=10):
    sujetos_lfw = obtener_imagenes_lfw(lfw_path, min_lfw)
    sujetos_cplfw = obtener_imagenes_cplfw(cplfw_path, sujetos_lfw)
    sujetos_comunes = set(sujetos_lfw.keys()) & set(sujetos_cplfw.keys())
    sujetos_lfw = {k: sujetos_lfw[k] for k in sujetos_comunes}
    sujetos_cplfw = {k: sujetos_cplfw[k] for k in sujetos_comunes}
    return sujetos_lfw, sujetos_cplfw

def copiar_imagenes_a_nueva_carpeta(lfw, cplfw, lfw_path, cplfw_path, output_path):
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    for sujeto in lfw:
        out_sujeto_path = os.path.join(output_path, sujeto)
        if not os.path.exists(out_sujeto_path):
            os.makedirs(out_sujeto_path)
        # Copia imágenes base (LFW)
        for img in lfw[sujeto]:
            src = os.path.join(lfw_path, sujeto, img)
            dst = os.path.join(out_sujeto_path, "lfw_" + img)
            shutil.copy2(src, dst)
        # Copia imágenes de test (CPLFW)
        for img in cplfw[sujeto]:
            src = os.path.join(cplfw_path, img)
            dst = os.path.join(out_sujeto_path, "cplfw_" + img)
            shutil.copy2(src, dst)

if __name__ == "__main__":
    lfw, cplfw = merge_lfw_cplfw(LFW_PATH, CPLFW_PATH)
    print(f"Copiando sujetos en común: {len(lfw)}")
    copiar_imagenes_a_nueva_carpeta(lfw, cplfw, LFW_PATH, CPLFW_PATH, OUTPUT_PATH)
    print(f"¡Listo! Las imágenes se copiaron en {OUTPUT_PATH}")

Copiando sujetos en común: 135
¡Listo! Las imágenes se copiaron en dataset/merged_lfw_cplfw


In [3]:
import os
import shutil
import re
import random

LFW_PATH = "dataset/lfw/lfw-deepfunneled/lfw-deepfunneled"
CPLFW_PATH = "dataset/cplfw/images"
OUTPUT_PATH = "dataset/merged_lfw_cplfw_50"
N_SUJETOS = 50
RANDOM_SEED = 42

def obtener_imagenes_lfw(lfw_path, min_fotos=10):
    sujetos_validos = {}
    for sujeto in os.listdir(lfw_path):
        path_sujeto = os.path.join(lfw_path, sujeto)
        if os.path.isdir(path_sujeto):
            fotos = sorted([f for f in os.listdir(path_sujeto) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
            if len(fotos) >= min_fotos:
                sujetos_validos[sujeto] = fotos[:10]
    return sujetos_validos

def obtener_imagenes_cplfw(cplfw_path, sujetos_validos):
    regex = re.compile(r"(.+?)_(\d+)\.jpg$")
    sujetos_cplfw = {}
    for file in os.listdir(cplfw_path):
        match = regex.match(file)
        if match:
            nombre = match.group(1)
            idx = int(match.group(2))
            if nombre in sujetos_validos and idx >= 2:
                sujetos_cplfw.setdefault(nombre, []).append(file)
    sujetos_cplfw = {k: v for k, v in sujetos_cplfw.items() if len(v) > 0}
    return sujetos_cplfw

def merge_lfw_cplfw(lfw_path, cplfw_path, min_lfw=10):
    sujetos_lfw = obtener_imagenes_lfw(lfw_path, min_lfw)
    sujetos_cplfw = obtener_imagenes_cplfw(cplfw_path, sujetos_lfw)
    sujetos_comunes = set(sujetos_lfw.keys()) & set(sujetos_cplfw.keys())
    sujetos_lfw = {k: sujetos_lfw[k] for k in sujetos_comunes}
    sujetos_cplfw = {k: sujetos_cplfw[k] for k in sujetos_comunes}
    return sujetos_lfw, sujetos_cplfw

def copiar_imagenes_a_nueva_carpeta(lfw, cplfw, lfw_path, cplfw_path, output_path):
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    for sujeto in lfw:
        out_sujeto_path = os.path.join(output_path, sujeto)
        if not os.path.exists(out_sujeto_path):
            os.makedirs(out_sujeto_path)
        for img in lfw[sujeto]:
            src = os.path.join(lfw_path, sujeto, img)
            dst = os.path.join(out_sujeto_path, "lfw_" + img)
            shutil.copy2(src, dst)
        for img in cplfw[sujeto]:
            src = os.path.join(cplfw_path, img)
            dst = os.path.join(out_sujeto_path, "cplfw_" + img)
            shutil.copy2(src, dst)

lfw, cplfw = merge_lfw_cplfw(LFW_PATH, CPLFW_PATH)
sujetos_comunes = list(lfw.keys())
print(f"Sujetos en común antes del muestreo: {len(sujetos_comunes)}")
# Selección aleatoria reproducible
random.seed(RANDOM_SEED)
if len(sujetos_comunes) < N_SUJETOS:
    print(f"¡Advertencia! Solo hay {len(sujetos_comunes)} sujetos. Usando todos.")
    sujetos_seleccionados = sujetos_comunes
else:
    sujetos_seleccionados = random.sample(sujetos_comunes, N_SUJETOS)
# Filtra los diccionarios para solo esos sujetos
lfw = {k: lfw[k] for k in sujetos_seleccionados}
cplfw = {k: cplfw[k] for k in sujetos_seleccionados}
print(f"Copiando sujetos seleccionados: {len(lfw)}")
copiar_imagenes_a_nueva_carpeta(lfw, cplfw, LFW_PATH, CPLFW_PATH, OUTPUT_PATH)
print(f"¡Listo! Las imágenes de 50 sujetos seleccionados se copiaron en {OUTPUT_PATH}")

Sujetos en común antes del muestreo: 135
Copiando sujetos seleccionados: 50
¡Listo! Las imágenes de 50 sujetos seleccionados se copiaron en dataset/merged_lfw_cplfw_50


In [None]:
import os
import time
import numpy as np
from deepface import DeepFace
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd

# Configuración
DATASET_PATH = "dataset/merged_lfw_cplfw"
SPLIT_SIZE = 10
MODELS = ["VGG-Face", "Facenet", "Facenet512", "OpenFace", "DeepFace", "ArcFace", "Dlib", "SFace"]
DETECTORS = ["retinaface", "centerface", "yunet", "yolov8"]

def get_subjects_and_images(dataset_path):
    """
    Devuelve un dict: sujeto -> {'lfw': [img1, ...], 'cplfw': [imgA, ...]}
    """
    subjects = {}
    for s in os.listdir(dataset_path):
        subj_path = os.path.join(dataset_path, s)
        if os.path.isdir(subj_path):
            lfw_imgs = sorted([os.path.join(subj_path, f) for f in os.listdir(subj_path) if f.startswith("lfw_")])
            cplfw_imgs = sorted([os.path.join(subj_path, f) for f in os.listdir(subj_path) if f.startswith("cplfw_")])
            if len(lfw_imgs) == 10 and len(cplfw_imgs) > 0:
                subjects[s] = {'lfw': lfw_imgs, 'cplfw': cplfw_imgs}
    return subjects

def split_subjects(subjects, split_size):
    subject_list = list(subjects.keys())
    return [subject_list[i:i+split_size] for i in range(0, len(subject_list), split_size)]

def benchmark_block(model_name, detector, block_subjects, subjects):
    references = {}  # sujeto: [img1, img2, ..., img10] (lfw)
    probes = []
    true_labels = []
    pred_labels = []
    times = []

    # Prepara referencias
    for subj in block_subjects:
        references[subj] = subjects[subj]['lfw']

    # Prepara pruebas (todas las cplfw del bloque)
    for subj in block_subjects:
        for probe_img in subjects[subj]['cplfw']:
            probes.append((probe_img, subj))
            true_labels.append(subj)

    # Inferencia
    for probe_path, true_subj in probes:
        start = time.time()
        best_match = None
        min_dist = float("inf")
        for ref_subj, ref_imgs in references.items():
            # Compara con las 10 imágenes base, toma la mínima distancia
            for ref_path in ref_imgs:
                try:
                    result = DeepFace.verify(
                        img1_path=probe_path,
                        img2_path=ref_path,
                        model_name=model_name,
                        detector_backend=detector,
                        enforce_detection=(detector != "skip")
                    )
                    dist = result["distance"]
                    if dist < min_dist:
                        min_dist = dist
                        best_match = ref_subj
                except Exception as e:
                    print(f"Error: {probe_path} vs {ref_path} | {e}")
        pred_labels.append(best_match)
        times.append(time.time() - start)

    # Métricas
    acc = accuracy_score(true_labels, pred_labels)
    prec = precision_score(true_labels, pred_labels, average="macro", zero_division=0)
    recall = recall_score(true_labels, pred_labels, average="macro", zero_division=0)
    f1 = f1_score(true_labels, pred_labels, average="macro", zero_division=0)
    avg_time = np.mean(times)
    return {
        "model": model_name,
        "detector": detector,
        "accuracy": acc,
        "precision": prec,
        "recall": recall,
        "f1": f1,
        "avg_time": avg_time,
        "n_tests": len(probes)
    }

def benchmark_init():
    subjects = get_subjects_and_images(DATASET_PATH)
    splits = split_subjects(subjects, SPLIT_SIZE)
    results = []
    for model in MODELS:
        for detector in DETECTORS:
            print(f"Evaluando modelo: {model} | detector: {detector}")
            for i, block_subjects in enumerate(splits):
                print(f" - Split {i+1}/{len(splits)} ({len(block_subjects)} sujetos)")
                metrics = benchmark_block(model, detector, block_subjects, subjects)
                metrics["split"] = i+1
                results.append(metrics)
                print(metrics)
    pd.DataFrame(results).to_csv("benchmark_results_detectors.csv", index=False)

In [None]:
benchmark_init()