#### Paquetes Necesarios

In [14]:
import mediapipe as mp
import cv2
import numpy as np
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

#### Utilidades de Visualización (mediapipe)

In [29]:
MARGIN = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green

def draw_landmarks_on_image(rgb_image, detection_result, gesture):
  hand_landmarks_list = detection_result.hand_landmarks
  handedness_list = detection_result.handedness
  annotated_image = np.copy(rgb_image)

  # Loop through the detected hands to visualize.
  for idx in range(len(hand_landmarks_list)):
    hand_landmarks = hand_landmarks_list[idx]
    handedness = handedness_list[idx]

    # Draw the hand landmarks.
    hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    hand_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      hand_landmarks_proto,
      solutions.hands.HAND_CONNECTIONS,
      solutions.drawing_styles.get_default_hand_landmarks_style(),
      solutions.drawing_styles.get_default_hand_connections_style())

    # Get the top left corner of the detected hand's bounding box.
    height, width, _ = annotated_image.shape
    x_coordinates = [landmark.x for landmark in hand_landmarks]
    y_coordinates = [landmark.y for landmark in hand_landmarks]
    text_x = int(min(x_coordinates) * width)
    text_y = int(min(y_coordinates) * height) - MARGIN

    # Draw handedness (left or right hand) on the image.
    # cv2.putText(annotated_image, f"{handedness[0].category_name}",
    #             (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
    #             FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)

    # Draw gesture on the image.
    
    cv2.putText(annotated_image, gesture,
                (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
                FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)

  return annotated_image

In [31]:
# Modelo de detección de manos
model_path = 'hand_landmarker.task'

# Creación del objeto Landmark
base_options = python.BaseOptions(model_asset_path=model_path)
options = vision.HandLandmarkerOptions(base_options=base_options,
                                       num_hands=2)
detector = vision.HandLandmarker.create_from_options(options)

# Inicializar la webcam
cap = cv2.VideoCapture(0)

# Bucle para procesar cada frame
while cap.isOpened():
    ret, frame = cap.read()
    
    if not ret:
        break

    # Corregir la inversión horizontal del frame
    frame = cv2.flip(frame, 1)

    # Crear imagen compatible con MediaPipe a partir del frame
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)

    # Detección de landmarks
    detection_result = detector.detect(mp_image)

    # Si hay detección de manos, procesar el resultado
    if len(detection_result.hand_landmarks) > 0:
        # Obtener coordenadas de los dedos y la base de la mano
        thumb_position = detection_result.hand_landmarks[0][4]
        index_position = detection_result.hand_landmarks[0][8]
        middle_position = detection_result.hand_landmarks[0][12]
        ring_position = detection_result.hand_landmarks[0][16]
        pinky_position = detection_result.hand_landmarks[0][20]
        palm_base = detection_result.hand_landmarks[0][0]

        # Calcular la distancia entre los dedos y la base de la mano
        distance1 = np.sqrt((index_position.x - palm_base.x)**2 + (index_position.y - palm_base.y)**2 + (index_position.z - palm_base.z)**2)
        distance2 = np.sqrt((middle_position.x - palm_base.x)**2 + (middle_position.y - palm_base.y)**2 + (middle_position.z - palm_base.z)**2)
        distance3 = np.sqrt((ring_position.x - palm_base.x)**2 + (ring_position.y - palm_base.y)**2 + (ring_position.z - palm_base.z)**2)
        distance4 = np.sqrt((pinky_position.x - palm_base.x)**2 + (pinky_position.y - palm_base.y)**2 + (pinky_position.z - palm_base.z)**2)
        distance5 = np.sqrt((thumb_position.x - palm_base.x)**2 + (thumb_position.y - palm_base.y)**2 + (thumb_position.z - palm_base.z)**2)
        thumb_index_distance = np.sqrt((thumb_position.x - index_position.x)**2 + (thumb_position.y - index_position.y)**2 + (thumb_position.z - index_position.z)**2)

        treshold = 0.20

        if distance1 > treshold and distance2 < treshold and distance3 < treshold and distance4 < treshold:
            annotated_image = draw_landmarks_on_image(mp_image.numpy_view(), detection_result, "UNO")
            cv2.imshow("Hand Tracking", annotated_image)
        elif distance1 > treshold and distance2 > treshold and distance3 < treshold and distance4 < treshold:
            annotated_image = draw_landmarks_on_image(mp_image.numpy_view(), detection_result, "DOS")
            cv2.imshow("Hand Tracking", annotated_image)
        elif distance1 > treshold and distance2 > treshold and distance3 > treshold and distance4 < treshold:
            annotated_image = draw_landmarks_on_image(mp_image.numpy_view(), detection_result, "TRES")
            cv2.imshow("Hand Tracking", annotated_image)
        elif distance1 < treshold and distance2 < treshold and distance3 < treshold and distance4 < treshold:
            annotated_image = draw_landmarks_on_image(mp_image.numpy_view(), detection_result, "CLOSED")
            cv2.imshow("Hand Tracking", annotated_image)
        else:
            annotated_image = draw_landmarks_on_image(mp_image.numpy_view(), detection_result, "OPEN")
            cv2.imshow("Hand Tracking", annotated_image)
    else:
        cv2.imshow("Hand Tracking", frame)

    # Salir con la tecla 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Liberar recursos
cap.release()
cv2.destroyAllWindows()
