#### Paquetes necesarios

In [3]:
import cv2
import numpy as np
import pygame
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

#### Inicialización de Variables para Detección de Manos

In [4]:
# Modelo de detección de manos
model_path = 'hand_landmarker.task'

# Opciones del objeto HandLandmarker
base_options = python.BaseOptions(model_asset_path=model_path)
options = vision.HandLandmarkerOptions(base_options=base_options,
                                       num_hands=2)

# Detector de manos
detector = vision.HandLandmarker.create_from_options(options)

#### Método de Localización de la Mano

In [5]:
def detect_hand_position(mp_image):
    detection_result = detector.detect(mp_image)

    # Si hay detección de manos, procesar el resultado
    if len(detection_result.hand_landmarks) > 0:
        # Coordenadas del centro de la mano
        center_position = detection_result.hand_landmarks[0][9]
        return center_position.x, center_position.y
    else:
        return None, None

#### Método de Detección de Gestos

In [6]:
# Función para detectar gesto de la mano y devolver el resultado
def detect_gesture(mp_image):
    # Detección de landmarks
    detection_result = detector.detect(mp_image)

    # Si hay detección de manos, procesar el resultado
    if len(detection_result.hand_landmarks) > 0:
        # Obtener coordenadas de los dedos y la base de la mano
        thumb_position = detection_result.hand_landmarks[0][4]
        index_position = detection_result.hand_landmarks[0][8]
        middle_position = detection_result.hand_landmarks[0][12]
        ring_position = detection_result.hand_landmarks[0][16]
        pinky_position = detection_result.hand_landmarks[0][20]
        palm_base = detection_result.hand_landmarks[0][0]

        # Calcular la distancia entre los dedos y la base de la mano
        distance1 = np.sqrt((index_position.x - palm_base.x)**2 + (index_position.y - palm_base.y)**2 + (index_position.z - palm_base.z)**2)
        distance2 = np.sqrt((middle_position.x - palm_base.x)**2 + (middle_position.y - palm_base.y)**2 + (middle_position.z - palm_base.z)**2)
        distance3 = np.sqrt((ring_position.x - palm_base.x)**2 + (ring_position.y - palm_base.y)**2 + (ring_position.z - palm_base.z)**2)
        distance4 = np.sqrt((pinky_position.x - palm_base.x)**2 + (pinky_position.y - palm_base.y)**2 + (pinky_position.z - palm_base.z)**2)
        distance5 = np.sqrt((thumb_position.x - palm_base.x)**2 + (thumb_position.y - palm_base.y)**2 + (thumb_position.z - palm_base.z)**2)
        thumb_index_distance = np.sqrt((thumb_position.x - index_position.x)**2 + (thumb_position.y - index_position.y)**2 + (thumb_position.z - index_position.z)**2)

        treshold = 0.20

        if distance1 > treshold and distance2 < treshold and distance3 < treshold and distance4 < treshold:
           return "1"
        elif distance1 > treshold and distance2 > treshold and distance3 < treshold and distance4 < treshold:
            return "2"
        elif distance1 > treshold and distance2 > treshold and distance3 > treshold and distance4 < treshold:
            return "3"
        elif distance1 < treshold and distance2 < treshold and distance3 < treshold and distance4 < treshold:
            return "closed"
        else:
            return "open"
    else:
        return "no hands"

#### Mezclador de Sonidos a través de Webcam

In [7]:
# Inicializar Pygame para la reproducción de sonidos
pygame.mixer.init()

# Definir sonidos para la base e instrumento
sound_1 = pygame.mixer.Sound("./sounds/trap/trap_piano.wav")
sound_2 = pygame.mixer.Sound("./sounds/trap/trap_drums.wav")
sound_3 = pygame.mixer.Sound("./sounds/trap/trap_bells.wav")
sound_4 = pygame.mixer.Sound("./sounds/trap/trap_vocals.wav")

# Definir los canales de reproducción
channel_1 = pygame.mixer.Channel(1)
channel_2 = pygame.mixer.Channel(2)
channel_3 = pygame.mixer.Channel(3)
channel_4 = pygame.mixer.Channel(4)

# Variables de control de reproducción
sound1_playing = False
sound2_playing = False
sound3_playing = False
sound4_playing = False

# Inicializar la webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Corregir la inversión horizontal del frame
    frame = cv2.flip(frame, 1)

    # Crear imagen compatible con MediaPipe a partir del frame
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)

    # Detección de posición de la mano
    x, y = detect_hand_position(mp_image)
    
    # Detección de gesto
    gesture = detect_gesture(mp_image)

    if x != None and y != None:
        height, width, _ = frame.shape
        x = int(x * width)
        y = int(y * height)

        # Reproducir sonido dependiendo de la posición de la mano
        if x < frame.shape[1] // 2 and y < frame.shape[0] // 2:
            if gesture == "open" and not sound1_playing:
                channel_1.play(sound_1, loops=-1)
                sound1_playing = True
            elif gesture == "closed" and sound1_playing:
                channel_1.stop()
                sound1_playing = False
        elif x >= frame.shape[1] // 2 and y < frame.shape[0] // 2:
            if gesture == "open" and not sound2_playing:
                channel_2.play(sound_2, loops=-1)
                sound2_playing = True
            elif gesture == "closed" and sound2_playing:
                channel_2.stop()
                sound2_playing = False
        elif x < frame.shape[1] // 2 and y >= frame.shape[0] // 2:
            if gesture == "open" and not sound3_playing:
                channel_3.play(sound_3, loops=-1)
                sound3_playing = True
            elif gesture == "closed" and sound3_playing:
                channel_3.stop()
                sound3_playing = False
        elif x >= frame.shape[1] // 2 and y >= frame.shape[0] // 2:
            if gesture == "open" and not sound4_playing:
                channel_4.play(sound_4, loops=-1)
                sound4_playing = True
            elif gesture == "closed" and sound4_playing:
                channel_4.stop()
                sound4_playing = False

    # Obtiene las dimensiones del frame
    # height, width, _ = frame.shape

    # Define el texto y el tipo de fuente
    # text = "TRAP"
    # font = cv2.FONT_HERSHEY_SIMPLEX
    # font_scale = 2
    # font_thickness = 4

    # Calcula la posición para centrar el texto en la parte superior
    # text_size = cv2.getTextSize(text, font, font_scale, font_thickness)[0]
    # x = (width - text_size[0]) // 2
    # y = 70  # Puedes ajustar esta posición según tus preferencias

    # Dibuja el texto en la imagen
    # cv2.putText(frame, text, (x, y), font, font_scale, (255, 255, 255), font_thickness)

    # Mostrar el resultado en la ventana
    cv2.imshow("Sound Mixer", frame)

    # Salir con la tecla 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Liberar recursos
cap.release()
cv2.destroyAllWindows()
pygame.mixer.quit()