In [1]:
import os
import cv2
import numpy as np
from deepface import DeepFace
#from deepface.commons import functions

from time import time

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier




# Definición de funciones 

In [None]:
def GetSVMPredictions(X_train, X_test, y_train, y_test):
    
    print("+++++++++++++++++++++++++++++++++++++++++++\n")
    print("SVM Normalization...")
    scaler = MinMaxScaler()
    train_X = scaler.fit_transform(X_train)
    test_X = scaler.transform(X_test)

    print("SVM training...")
    t0 = time()
    parameters = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
                  'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],}
    # Grid serach across parameter range
    clf = GridSearchCV(
        SVC(kernel='rbf', class_weight='balanced'), parameters, cv=5
    )
    clf = clf.fit(train_X, y_train)
    print("done in %0.3fs" % (time() - t0))
    print("Best estimator found by grid search:")
    print(clf.best_estimator_)


    print("Predicting")
    t0 = time()
    y_pred = clf.predict(test_X)
    print("done in %0.3fs" % (time() - t0))
    
    return y_pred, y_test


In [1]:
def calc_embs(X, batch_size=2): #No usada desde 2024 en el código
    norm_images = prewhiten(X)
    pd = []
    for start in range(0, len(norm_images), batch_size):
        pd.append(model.predict_on_batch(norm_images[start:start+batch_size]))     #https://github.com/serengil/deepface/issues/819           
    return l2_normalize(np.concatenate(pd))

def l2_normalize(x, axis=-1, epsilon=1e-10):
    output = x / np.sqrt(np.maximum(np.sum(np.square(x), axis=axis, keepdims=True), epsilon))
    return output

def prewhiten(x):
    if x.ndim == 4:
        axis = (1, 2, 3)
        size = x[0].size
    elif x.ndim == 3:
        axis = (0, 1, 2)
        size = x.size
    else:
        raise ValueError('Dimension should be 3 or 4')

    mean = np.mean(x, axis=axis, keepdims=True)
    std = np.std(x, axis=axis, keepdims=True)
    std_adj = np.maximum(std, 1.0/np.sqrt(size))
    y = (x - mean) / std_adj
    return y


def LoadDataset(folder, ext):
    # Contador de número de clases del conjunto
    nclasses = 0
    # Contador de muestras por clase
    nperclass = []
    # Etiqueta de cada clase (nombre de la subcarpeta)
    classlabels = []
    # Inicializa estructuras de datos y sus correpondientes etiquetas
    X = []
    Y = []

    preprocessing = 0

    # Asume que en la ruta indicada hay una subcarpeta por clase
    for class_name in os.listdir(folder):
        # Cada subcarpeta implica una clase más
        nclasses += 1
        # Inicialmente esta clase no tiene muestras
        nsamples = 1    

        # Compone la ruta
        class_folder = os.path.join(folder, class_name)
        for file_name in os.listdir(class_folder):
            # Asume imágenes en formato ext
            if file_name.endswith(ext):
                # Lee la imagen
                image = cv2.imread (os.path.join(class_folder, file_name))  

                # Obtiene embeddings
                img1 = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)

                # Hasta 2023 usaba
                # Get embeddings after preprocessing
                #if preprocessing == 1:
                #    img_embedding = calc_embs(np.array([img1]))
                #else:
                #    img_embedding = model.predict(img1[None,...])

                #X.append(img_embedding[0])
                
                #Desde 2024
                embedding_objs = DeepFace.represent(img_path = img1,model_name  = "Facenet", enforce_detection = False)   
                img_embedding = embedding_objs[0]["embedding"]             

                X.append(img_embedding)

                # Añade etiqueta numérica de la muestra
                Y.append(nclasses-1)

                #Incrementa el número de muestras
                nsamples += 1

        nperclass.append(nsamples)
        classlabels.append(class_name)

    #Convierte a numpy array X e Y
    X = np.array(X,dtype='float32')
    Y = np.array(Y,dtype='float64')

    # Muestra datos del conjunto leído
    # Depuración
    print("Features")
    print(X.shape)
    print(Y.shape)
    # Obtiene número de muestras y características
    n_samples , n_features = X.shape
    # Obtiene nombres de las clases
    class_names = np.array(classlabels)
    n_classes = class_names.shape[0]
    
    return X, Y, n_samples, n_features, n_classes, classlabels, nperclass, class_names



# Carga conjuntos de datos

Se proporciona la carpeta, a través de la variable folder, donde cada subcarpeta se corresponde con una clase.
Cada clase contiene muestras en forma de imágenes jpg, todas del mismo tamaño. Obtiene embeddings Facenet

In [None]:
# Modelos disponibles ['VGG-Face', 'Facenet', 'OpenFace', 'DeepFace', 'DeepID', 'Dlib']
model = DeepFace.build_model("Facenet")
dim = model.input_shape

#MODIFICAR INDICANDO RUTA EN TU EQUIPO TRAS DESCAGAR DATOS DEL CAMPUS. EVITAR TILDES
folder = "Races"

print('Loading dataset')
X, Y, nsamples, class_name, nperclass, classlabels, width, height = LoadDataset(folder,'.jpg')

#Convierte a numpy array X e Y
X = np.array(X,dtype='float32')
Y = np.array(Y,dtype='float64')

# Obtiene número de muestras y características
n_samples , n_features = X.shape
# Obtiene nombres de las clases
class_names = np.array(classlabels)
n_classes = class_names.shape[0]

print("Dataset info:")
print("# samples: %d" % n_samples)
print("# features: %d" % n_features)
print("# classes: %d" % n_classes)
print("classes %s" % classlabels)
print("samples per class %s" % str(nperclass)[1:-1] )




# Diseña conjunto experimental k-fold

Divide los datos k veces en conjunto de entrenamiento y test

In [None]:
# StratifiedKFold
# Define el número de subconjuntos a considerar
kfold = 5
skf = StratifiedKFold(n_splits=kfold, random_state=4, shuffle=True)
#Distribución de muestras por fold
fold = 1
for train_index, test_index in skf.split(X, Y):
    #print("TRAIN:", train_index, "TEST:", test_index)
    print("Fold %d" % fold)
    print("# samples in training set %d" % train_index.shape[0])
    print("# samples in test set %d" % test_index.shape[0])
    fold += 1

# Lanza experimento

In [None]:
# Embeddings
precs_facenet_svm, recs_facenet_svm = [], []
precs_facenet_knn, recs_facenet_knn = [], []


# Recorre folds
fold = 1
while fold <= kfold:
    accs, precs, recs = [], [], []
    for train_index, test_index in skf.split(X, Y):
        #print("TRAIN:", train_index, "TEST:", test_index)
        print("***\nFold %d" % fold)
        #División de muestras de entreno y test
        X_train, X_test = X[train_index], X[test_index]
        #Etiquetas de las muestras
        y_train, y_test = Y[train_index], Y[test_index]


        #Facenet+SVM
        y_pred, y_test = GetSVMPredictions(X_train, X_test,y_train, y_test)
        print("\nFacenet+SVM Metrics")
        precs_facenet_svm.append(precision_score(y_test, y_pred, average='weighted'))
        recs_facenet_svm.append(recall_score(y_test, y_pred, average='weighted'))
        print(classification_report(y_test, y_pred, target_names=class_names))
        print(confusion_matrix(y_test, y_pred, labels=range(n_classes)))        
    

    fold += 1

print("Facenet+KNN Mean Precision:  %0.3f, Mean Recall:  %0.3f" % ( np.mean(precs_facenet_knn) , np.mean(recs_facenet_knn) ))
print("Facenet+SVM Mean Precision:  %0.3f, Mean Recall:  %0.3f" % ( np.mean(precs_facenet_svm) , np.mean(recs_facenet_svm) ))


NameError: name 'kfold' is not defined

In [3]:
import cv2
import numpy as np
import joblib
from deepface import DeepFace

# DNN
dnn_model = "deploy.prototxt.txt"
dnn_weights = "res10_300x300_ssd_iter_140000.caffemodel"
net = cv2.dnn.readNetFromCaffe(dnn_model, dnn_weights)

# Classifier + scaler
clf = joblib.load("svm_final_model.pkl")
scaler = joblib.load("scaler.pkl")
class_names = ["Asian", "Caucasian", "African"]

# Load crowns (with alpha)
crowns = {
    "Asian": cv2.imread("c1.png", cv2.IMREAD_UNCHANGED),
    "Caucasian": cv2.imread("c2.png", cv2.IMREAD_UNCHANGED),
    "African": cv2.imread("c3.png", cv2.IMREAD_UNCHANGED),
}

video_capture = cv2.VideoCapture(0)

def overlay_png(frame, png, x, y, w):
    """Superpone PNG con canal alfa sobre frame en coordenada x,y"""
    if png is None:
        return frame

    # resize keeping ratio
    scale = w / png.shape[1]
    new_w = int(png.shape[1] * scale)
    new_h = int(png.shape[0] * scale)
    png_resized = cv2.resize(png, (new_w, new_h))

    # Coordinates
    y1 = max(0, y - new_h)
    y2 = y1 + new_h
    x1 = x
    x2 = x1 + new_w

    if x1 < 0 or y1 < 0 or x2 > frame.shape[1] or y2 > frame.shape[0]:
        return frame  # avoid errors if goes out of frame

    # Split channels
    b, g, r, a = cv2.split(png_resized)
    overlay_color = cv2.merge((b, g, r))
    mask = a / 255.0

    # Apply alpha blending
    frame_region = frame[y1:y2, x1:x2]
    frame[y1:y2, x1:x2] = (overlay_color * mask[..., None] +
                           frame_region * (1 - mask[..., None])).astype(np.uint8)
    return frame


while True:
    ret, frame = video_capture.read()
    if not ret:
        break

    h, w = frame.shape[:2]

    # Face detection
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300),
                                 [104.0, 177.0, 123.0], False, False)
    net.setInput(blob)
    detections = net.forward()

    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > 0.5:
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            x, y, x2, y2 = box.astype(int)

            cv2.rectangle(frame, (x, y), (x2, y2), (0, 255, 0), 2)

            face = frame[y:y2, x:x2]
            if face.size == 0:
                continue

            # Deepface embedding
            embedding_objs = DeepFace.represent(face, model_name="Facenet",
                                                enforce_detection=False)
            embedding = np.array(embedding_objs[0]["embedding"]).reshape(1, -1)
            embedding = scaler.transform(embedding)

            # Predict class
            idx = int(clf.predict(embedding)[0])
            class_name = class_names[idx]

            # Draw text
            cv2.putText(frame, class_name, (x, y2 + 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

            # Select crown PNG
            crown_png = crowns[class_name]

            # Apply crown on top of head
            face_width = x2 - x
            frame = overlay_png(frame, crown_png, x, y, face_width)

    cv2.imshow("Frame", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

video_capture.release()
cv2.destroyAllWindows()
