TAREA: Tras mostrar opciones para la detección y extracción de información de caras humanas con deepface, la tarea a entregar consiste en proponer dos escenarios de aplicación y desarrollar dos prototipos de temática libre que provoquen reacciones a partir de la información extraída del rostro. Uno de los prototipos deberá incluir el uso de algún modelo entrenado por ustedes para la extracción de información biometríca, similar al ejemplo del género planteado durante la práctica pero con diferente aplicación (emociones, raza, edad…). El otro es de temática completamente libre

In [None]:
import cv2
import mediapipe as mp

# Inizializziamo MediaPipe per il rilevamento dei volti
mp_face_detection = mp.solutions.face_detection
face_detection = mp_face_detection.FaceDetection(model_selection=0, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils
cap = cv2.VideoCapture(0)  # Usa la webcam predefinita

while True:
    # Acquisiamo un frame dalla webcam
    ret, frame = cap.read()
    if not ret:
        break

    # Convertiamo il frame in RGB (richiesto da MediaPipe)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_detection.process(rgb_frame)

    # Controlliamo se è stato rilevato qualche volto
    if results.detections:
        for detection in results.detections:
            # Otteniamo la bounding box del volto
            bboxC = detection.location_data.relative_bounding_box
            h, w, _ = frame.shape
            x, y, width, height = int(bboxC.xmin * w), int(bboxC.ymin * h), int(bboxC.width * w), int(bboxC.height * h)

            # Disegniamo un rettangolo attorno al volto
            cv2.rectangle(frame, (x, y), (x + width, y + height), (0, 255, 0), 2)

            # Disegniamo i punti chiave (MediaPipe ne fornisce 6 per la face detection)
            for keypoint in detection.location_data.relative_keypoints:
                keypoint_x = int(keypoint.x * w)
                keypoint_y = int(keypoint.y * h)
                cv2.circle(frame, (keypoint_x, keypoint_y), 5, (0, 0, 255), -1)

    # Mostriamo il frame con le rilevazioni
    cv2.imshow("MediaPipe Face Detection", frame)
    
    # Premere "q" per uscire
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Rilasciamo le risorse
cap.release()
cv2.destroyAllWindows()

Inicie MediaPipe para usar el filtro:

In [10]:
# Inizializziamo MediaPipe face mesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=3, min_detection_confidence=0.5)
cap = cv2.VideoCapture(0)

# Carichiamo le immagini PNG dei filtri
dog_ears = cv2.imread("assets/dog_ears.png", -1)
dalmatian_ears = cv2.imread("assets/dalmatian_ears.png", -1)
dog_nose = cv2.imread("assets/dog_nose.png", -1)
dog_tongue = cv2.imread("assets/dog_tongue.png", -1)
dalmatian_nose = cv2.imread("assets/dalmatian_nose.png", -1)

# Tipo di orecchie da usare, default dog
ear_type = "dog"

def overlay_image(background, overlay, x, y, w, h):
    if x < 0:
        overlay = overlay[:, -x:]
        w += x
        x = 0
    if y < 0:
        overlay = overlay[-y:, :]
        h += y
        y = 0
    if x + w > background.shape[1]:
        w = background.shape[1] - x
    if y + h > background.shape[0]:
        h = background.shape[0] - y
    if w <= 0 or h <= 0:
        return background

    overlay = cv2.resize(overlay, (w, h))
    alpha_overlay = overlay[:, :, 3] / 255.0
    alpha_background = 1.0 - alpha_overlay

    for c in range(3):
        background[y:y + h, x:x + w, c] = (alpha_overlay * overlay[:, :, c] +
                                           alpha_background * background[y:y + h, x:x + w, c])
    return background

while True:
    ret, frame = cap.read()
    if not ret:
        break

    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(rgb_frame)

    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            h, w, _ = frame.shape

            left_eye = face_landmarks.landmark[33]
            right_eye = face_landmarks.landmark[263]
            nose = face_landmarks.landmark[1]
            mouth_top = face_landmarks.landmark[13]
            mouth_bottom = face_landmarks.landmark[14]

            left_eye = (int(left_eye.x * w), int(left_eye.y * h))
            right_eye = (int(right_eye.x * w), int(right_eye.y * h))
            nose = (int(nose.x * w), int(nose.y * h))
            mouth_top = (int(mouth_top.x * w), int(mouth_top.y * h))
            mouth_bottom = (int(mouth_bottom.x * w), int(mouth_bottom.y * h))

            eye_distance = abs(right_eye[0] - left_eye[0])
            ear_width = int(eye_distance * 2.5)
            nose_width = eye_distance // 2

            # Selezione orecchie in base al tasto premuto
            if ear_type == "dog":
                ear_height = int(ear_width * dog_ears.shape[0] / dog_ears.shape[1])
                frame = overlay_image(frame, dog_ears, left_eye[0] - ear_width // 4, left_eye[1] - ear_height, ear_width, ear_height)
                nose_height = int(nose_width * dog_nose.shape[0] / dog_nose.shape[1])
                frame = overlay_image(frame, dog_nose, nose[0] - nose_width // 2, nose[1] - nose_height // 2, nose_width, nose_height)
            elif ear_type == "dalmatian":
                ear_height = int(ear_width * dalmatian_ears.shape[0] / dalmatian_ears.shape[1])
                frame = overlay_image(frame, dalmatian_ears, left_eye[0] - ear_width // 4, left_eye[1] - ear_height, ear_width, ear_height)
                nose_height = int(nose_width * dalmatian_nose.shape[0] / dalmatian_nose.shape[1])
                frame = overlay_image(frame, dalmatian_nose, nose[0] - nose_width // 2, nose[1] - nose_height // 2, nose_width, nose_height)

            # Bocca aperta: aggiungi lingua
            mouth_opening_height = abs(mouth_bottom[1] - mouth_top[1])
            mouth_open_threshold = h / 20
            if mouth_opening_height > mouth_open_threshold:
                tongue_width = nose_width * 2
                tongue_height = int(tongue_width * dog_tongue.shape[0] / dog_tongue.shape[1])
                frame = overlay_image(frame, dog_tongue, nose[0] - tongue_width // 2, mouth_bottom[1], tongue_width, tongue_height * 2)

    # Aggiungiamo legenda in alto a sinistra
    legenda = [
        "Premi 1: orecchie cane",
        "Premi 2: orecchie dalmata",
        "Premi q: esci"
    ]
    y0, dy = 30, 30
    for i, line in enumerate(legenda):
        y = y0 + i * dy
        cv2.putText(frame, line, (10, y), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2, cv2.LINE_AA)

    cv2.imshow("Filtro Snapchat", frame)

    key = cv2.waitKey(1) & 0xFF
    if key == ord("q"):
        break
    elif key == ord("1"):
        ear_type = "dog"
    elif key == ord("2"):
        ear_type = "dalmatian"

cap.release()
cv2.destroyAllWindows()


I0000 00:00:1763409979.993479  188367 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.4), renderer: Apple M4
W0000 00:00:1763409979.995687  199259 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1763409980.000792  199259 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
