In [1]:
import face_recognition
import cv2
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
from matplotlib import pyplot as plt
import pandas as pd
from datetime import date
import os

2023-11-05 19:41:50.262450: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def draw_connections(frame, keypoints, edges, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for edge, color in edges.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]
        
        if (c1 > confidence_threshold) & (c2 > confidence_threshold):      
            cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 4)

In [3]:
def draw_keypoints(frame, keypoints, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for kp in shaped:
        ky, kx, kp_conf = kp
        if kp_conf > confidence_threshold:
            cv2.circle(frame, (int(kx), int(ky)), 6, (0,255,0), -1)

In [4]:
def loop_through_people(frame, keypoints_with_scores, edges, confidence_threshold):
    for person in keypoints_with_scores:
        draw_connections(frame, person, edges, confidence_threshold)
        draw_keypoints(frame, person, confidence_threshold)

In [5]:
# función para obtener una lista con los nombres de los archivos de las imagenes

def list_files_in_folder(folder_path):
    file_names = []
    for file in os.listdir(folder_path):
        if os.path.isfile(os.path.join(folder_path, file)):
            file_names.append(file)
    return file_names

In [6]:
# Funcion para actualizar la asistencia en la base de datos
def update_attendance(row, asistencias):

    if row['fecha']== date.today() and row['id'] in asistencias:
        return asistencias[row['id']]
    else:
        return row['asistencia']

In [7]:
# Funcion para identificar una participacion

def mano_alzada(keypoints):
    # Definir keypoints de la muñeca y el hombro
    izq_muneca = keypoints[0][9]
    izq_hombro = keypoints[0][5]
    der_muneca = keypoints[0][10]
    der_hombro = keypoints[0][6]

    threshold = 7.0

    # Check if both wrists are above their respective shoulders
    izq_mano_alzada = izq_muneca[1] < izq_hombro[1] - threshold
    der_mano_alzada = der_muneca[1] < der_hombro[1] - threshold

    return izq_mano_alzada and der_mano_alzada

In [8]:
EDGES = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

In [9]:
# Base de datos para guardar asistencia y participación
db = pd.DataFrame(columns=['fecha', 'id', 'asistencia', 'participacion'])

In [10]:
###########

model = hub.load('https://tfhub.dev/google/movenet/multipose/lightning/1')
movenet = model.signatures['serving_default']

In [11]:
# Subir una foto de prueba y aprende a como reconocerla
known_face_encodings = []
known_face_names = []

for i in list_files_in_folder('datasets'):
    face = face_recognition.load_image_file(f"datasets/{i}")
    face_face_encoding = face_recognition.face_encodings(face)[0]
    
    known_face_encodings.append(face_face_encoding)
    known_face_names.append(i[:-5])
    if not (db['fecha'] == date.today()).any():
        db.loc[len(db.index)] = [date.today(), i[:-5], '', 0] 


#######################################

video_capture = cv2.VideoCapture(0)

asistencias = {name: 0 for name in known_face_names}
frames_count = 0

process_this_frame = True
while True:
    # Se agarra un solo frame del video
    ret, frame = video_capture.read()

    # Solo se procesan algunos frames para ahorrar tiempo
    if process_this_frame:
        frames_count += 1
        # Hacer el frame 1/4 de su tamaño original
        small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)

        # Convertir la imagen de BGR color (OpenCV) a RGB color (face_recognition)
        rgb_small_frame = small_frame
        
        # Encontrar todas las caras y codigos en el frame
        face_locations = face_recognition.face_locations(rgb_small_frame)
        face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)

        face_names = []
        for face_encoding in face_encodings:
            
            # Verifica si la cara hace match con alguna del dataset
            matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
            name = "Unknown"

            # En caso de que tenga dudas, pone la cara mas cerca a la de alguna del dataset
            face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
            best_match_index = np.argmin(face_distances)
            if matches[best_match_index]:
                name = known_face_names[best_match_index]
                asistencias[name] += 1

            face_names.append(name)

    process_this_frame = not process_this_frame
    ret, frame = video_capture.read()
            
    # Cambiar de tamaño de la imagen
    img = frame.copy()
    img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 384,640)
    input_img = tf.cast(img, dtype=tf.int32)
    
    # Detección
    results = movenet(input_img)
    keypoints_with_scores = results['output_0'].numpy()[:,:,:51].reshape((6,17,3))
    
    manoalzada = mano_alzada(keypoints_with_scores)

    # Render keypoints 
    loop_through_people(frame, keypoints_with_scores, EDGES, 0.1)


    # Mostrar resultados
    for (top, right, bottom, left), name in zip(face_locations, face_names):
        # Volver a poner las imagenes en tamaño orignal antes de reducirla a 1/4 de su tamaño
        top *= 4
        right *= 4
        bottom *= 4
        left *= 4

        # Poner la caja alrededor de las caras
        cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)

        # Poner los labels de las caras en la caja
        cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)
        font = cv2.FONT_HERSHEY_DUPLEX
        cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)

    cv2.imshow('Video', frame)
    # Mostrar las imagenes
    if manoalzada:
        cv2.putText(frame, "Raised Hand Detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release handle to the webcam
video_capture.release()
cv2.destroyAllWindows()


valores = np.fromiter(asistencias.values(), dtype=float)
# Dividimos todos los valores por el número de frames
divided_values = (np.round(valores / frames_count, 0)).astype(int)

# Rango para convertir a cualitativo
conditions = [
    (divided_values >= 0) & (divided_values <= .05),
    (divided_values > .05) & (divided_values <= .20),
    (divided_values > .20) & (divided_values <= .60),
    (divided_values > .60)
]
choices = ['Inasistencia', 'Bajo' ,'Medio', 'Alto']
rango_valores = np.select(conditions, choices, default='Unknown')
asistencias = dict(zip(asistencias.keys(), rango_valores))

In [12]:
asistencias

{'Elias': 'Inasistencia',
 'Sofi': 'Inasistencia',
 'Fer': 'Inasistencia',
 'Federico': 'Alto',
 'Mich': 'Inasistencia'}

In [17]:
# Actualizar asistencia
db['asistencia'] = db.apply(update_attendance(), axis=1)

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [None]:
'''

###########

model = hub.load('https://tfhub.dev/google/movenet/multipose/lightning/1')
movenet = model.signatures['serving_default']
video_capture = cv2.VideoCapture(0)

##########

# Subir una foto de prueba y aprende a como reconocerla

fede_image = face_recognition.load_image_file("datasets/Federico.jpeg")
fer_image = face_recognition.load_image_file("datasets/Fer.jpeg")
sofi_image = face_recognition.load_image_file("datasets/Sofi.jpeg")
mich_image = face_recognition.load_image_file("datasets/Mich.jpeg")
elias_image = face_recognition.load_image_file("datasets/Elias.jpeg")

fede_face_encoding = face_recognition.face_encodings(fede_image)[0]
fer_face_encoding = face_recognition.face_encodings(fer_image)[0]
sofi_face_encoding = face_recognition.face_encodings(sofi_image)[0]
mich_face_encoding = face_recognition.face_encodings(mich_image)[0]
elias_face_encoding = face_recognition.face_encodings(elias_image)[0]

# Se hacen arrays con los codigos de las fotos y sus labels
known_face_encodings = [
    fede_face_encoding,
    fer_face_encoding,
    sofi_face_encoding,
    mich_face_encoding,
    elias_face_encoding
]

known_face_names = [
    "Fede",
    "Fer",
    'Sofi',
    'Mich',
    'Elias'
]

asistencias = {name: 0 for name in known_face_names}
frames_count = 0

process_this_frame = True
while True:
    # Se agarra un solo frame del video
    ret, frame = video_capture.read()

    # Solo se procesan algunos frames para ahorrar tiempo
    if process_this_frame:
        frames_count = frames_count + 1
        # Hacer el frame 1/4 de su tamaño original
        small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)

        # Convertir la imagen de BGR color (OpenCV) a RGB color (face_recognition)
        rgb_small_frame = small_frame
        
        # Encontrar todas las caras y codigos en el frame
        face_locations = face_recognition.face_locations(rgb_small_frame)
        face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)

        face_names = []
        for face_encoding in face_encodings:
            # Verifica si la cara hace match con alguna del dataset
            matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
            name = "Unknown"

            # En caso de que tenga dudas, pone la cara mas cerca a la de alguna del dataset
            face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
            best_match_index = np.argmin(face_distances)
            if matches[best_match_index]:
                name = known_face_names[best_match_index]
                asistencias[name] = asistencias[name] + 1

            face_names.append(name)

    process_this_frame = not process_this_frame
    ret, frame = video_capture.read()
            
    # Cambiar de tamaño de la imagen
    img = frame.copy()
    img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 384,640)
    input_img = tf.cast(img, dtype=tf.int32)
    
    # Detección
    results = movenet(input_img)
    keypoints_with_scores = results['output_0'].numpy()[:,:,:51].reshape((6,17,3))
    
    # Render keypoints 
    loop_through_people(frame, keypoints_with_scores, EDGES, 0.1)


    # Mostrar resultados
    for (top, right, bottom, left), name in zip(face_locations, face_names):
        # Volver a poner las imagenes en tamaño orignal antes de reducirla a 1/4 de su tamaño
        top *= 4
        right *= 4
        bottom *= 4
        left *= 4

        # Poner la caja alrededor de las caras
        cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)

        # Poner los labels de las caras en la caja
        cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)
        font = cv2.FONT_HERSHEY_DUPLEX
        cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)

    # Mostrar las imagenes
    cv2.imshow('Video', frame)
    cv2.imshow('Movenet Multipose', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release handle to the webcam
video_capture.release()
cv2.destroyAllWindows()


valores = np.fromiter(asistencias.values(), dtype=float)
# Dividimos todos los valores por el número de frames
divided_values = (np.round(valores / frames_count, 0)).astype(int)

# Rango para convertir a cualitativo
conditions = [
    (divided_values >= 0) & (divided_values <= .05),
    (divided_values > .05) & (divided_values <= .20),
    (divided_values > .20) & (divided_values <= .60),
    (divided_values > .60)
]
choices = ['Inasistencia', 'Bajo' ,'Medio', 'Alto']
rango_valores = np.select(conditions, choices, default='Unknown')
asistencias = dict(zip(asistencias.keys(), rango_valores))

'''

{'Fede': 'Alto',
 'Fer': 'Inasistencia',
 'Sofi': 'Inasistencia',
 'Mich': 'Inasistencia',
 'Elias': 'Inasistencia'}

In [None]:
valores = np.fromiter(asistencias.values(), dtype=float)
# Dividimos todos los valores por el número de frames
divided_values = (np.round(valores / frames_count, 0)).astype(int)

# Rango para convertir a cualitativo
conditions = [
    (divided_values >= 0) & (divided_values <= .05),
    (divided_values > .05) & (divided_values <= .20),
    (divided_values > .20) & (divided_values <= .60),
    (divided_values > .60)
]
choices = ['Inasistencia', 'Bajo' ,'Medio', 'Alto']
rango_valores = np.select(conditions, choices, default='Unknown')
asistencias = dict(zip(asistencias.keys(), rango_valores))
