#### Paquetes Necesarios

In [2]:
import cv2
import numpy as np
import pygame
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

#### Inicialización de Variables para Detección de Manos

In [3]:
# Modelo de detección de manos
model_path = './model/hand_landmarker.task'

# Opciones del objeto HandLandmarker
base_options = python.BaseOptions(model_asset_path=model_path)
options = vision.HandLandmarkerOptions(base_options=base_options,
                                       num_hands=2)

# Detector de manos
detector = vision.HandLandmarker.create_from_options(options)

#### Inicialización de Fuentes y Colores

In [4]:
# Inicializar la fuente para el texto
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1
font_thickness = 2

# Colores para los iconos
not_playing_color = (0, 255, 0)  # Verde cuando no se está reproduciendo
playing_color = (0, 0, 255)  # Rojo cuando se está reproduciendo

#### Método de Localización de la Mano

In [5]:
def detect_hand_position(mp_image):
    detection_result = detector.detect(mp_image)

    # Si hay detección de manos, procesar el resultado
    if len(detection_result.hand_landmarks) > 0:
        # Coordenadas del centro de la mano
        center_position = detection_result.hand_landmarks[0][9]
        return center_position.x, center_position.y
    else:
        return None, None

#### Método de Detección de Gestos

In [7]:
# Función para detectar gesto de la mano y devolver el resultado
def detect_gesture(mp_image):
    # Detección de landmarks
    detection_result = detector.detect(mp_image)

    # Si hay detección de manos, procesar el resultado
    if len(detection_result.hand_landmarks) > 0:
        # Obtener coordenadas de los dedos y la base de la mano
        thumb_position = detection_result.hand_landmarks[0][4]
        index_position = detection_result.hand_landmarks[0][8]
        middle_position = detection_result.hand_landmarks[0][12]
        ring_position = detection_result.hand_landmarks[0][16]
        pinky_position = detection_result.hand_landmarks[0][20]
        palm_base = detection_result.hand_landmarks[0][0]

        # Calcular la distancia entre los dedos y la base de la mano
        distance1 = np.sqrt((index_position.x - palm_base.x)**2 + (index_position.y - palm_base.y)**2 + (index_position.z - palm_base.z)**2)
        distance2 = np.sqrt((middle_position.x - palm_base.x)**2 + (middle_position.y - palm_base.y)**2 + (middle_position.z - palm_base.z)**2)
        distance3 = np.sqrt((ring_position.x - palm_base.x)**2 + (ring_position.y - palm_base.y)**2 + (ring_position.z - palm_base.z)**2)
        distance4 = np.sqrt((pinky_position.x - palm_base.x)**2 + (pinky_position.y - palm_base.y)**2 + (pinky_position.z - palm_base.z)**2)
        distance5 = np.sqrt((thumb_position.x - palm_base.x)**2 + (thumb_position.y - palm_base.y)**2 + (thumb_position.z - palm_base.z)**2)

        treshold = 0.20

        if distance1 > treshold and distance2 < treshold and distance3 < treshold and distance4 < treshold:
           return "1"
        elif distance1 > treshold and distance2 > treshold and distance3 < treshold and distance4 < treshold:
            return "2"
        elif distance1 > treshold and distance2 > treshold and distance3 > treshold and distance4 < treshold:
            return "3"
        elif distance1 < treshold and distance2 < treshold and distance3 < treshold and distance4 < treshold:
            return "closed"
        else:
            return "open"
    else:
        return "no hands"

#### Mezclador de Sonidos a través de Webcam

In [11]:
# Carpeta actual de sonidos (inicialmente "rock")
current_genre_folder = "rock"

# Modo actual (Mezclador / Selección de género)
current_mode = "mixer"

# Inicializar Pygame para la reproducción de sonidos
pygame.mixer.init()

# Definir los canales de reproducción
channels = [pygame.mixer.Channel(i) for i in range(1, 5)]

# Función para cargar sonidos
def load_sounds(genre_folder):
    return [
        pygame.mixer.Sound(f"./sounds/{genre_folder}/sound_{i}.wav") for i in range(1, 5)
    ]

# Cargar sonidos iniciales
sounds = load_sounds(current_genre_folder)

# Variables de control de reproducción
sound_playing = [False] * 4

# Inicializar la webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Corregir la inversión horizontal del frame
    frame = cv2.flip(frame, 1)

    height, width, _ = frame.shape

    # Crear imagen compatible con MediaPipe a partir del frame
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)

    # Detección de posición de la mano
    x, y = detect_hand_position(mp_image)
    
    # Detección de gesto
    gesture = detect_gesture(mp_image)

    if x is not None and y is not None:
        x = int(x * width)
        y = int(y * height)

        if current_mode == "selector":
            if gesture in ["1", "2", "3"]:
                genres = ["rock", "rap", "hiphop"]
                current_genre_folder = genres[int(gesture) - 1]
                sounds = load_sounds(current_genre_folder)

        elif current_mode == "mixer":
            # Reproducir sonido dependiendo de la posición de la mano
            for i, channel in enumerate(channels):
                x_condition = (i % 2 == 0) 
                y_condition = (i < 2)
                
                if (x < frame.shape[1] // 2) == x_condition and (y < frame.shape[0] // 2) == y_condition:
                    if gesture == "open" and not sound_playing[i]:
                        channel.play(sounds[i], loops=-1)
                        sound_playing[i] = True
                    elif gesture == "closed" and sound_playing[i]:
                        channel.stop()
                        sound_playing[i] = False

    # Obtener el ancho del texto del género
    genre_text_size, _ = cv2.getTextSize(current_genre_folder, font, font_scale, font_thickness)
    genre_text_width = genre_text_size[0]

    # Calcular la posición central para el género
    genre_x = (width - genre_text_width) // 2
    genre_y = 50

    if current_mode == "mixer":
        cv2.putText(frame, f'{current_genre_folder.upper()}', (genre_x, genre_y), font, font_scale, (0, 0, 0), font_thickness)
        cv2.putText(frame, 'Sound 1', (30, 100), font, font_scale, playing_color if sound_playing[0] else not_playing_color, font_thickness)
        cv2.putText(frame, 'Sound 2', (width - 165, 100), font, font_scale, playing_color if sound_playing[1] else not_playing_color, font_thickness)
        cv2.putText(frame, 'Sound 3', (30, height - 100), font, font_scale, playing_color if sound_playing[2] else not_playing_color, font_thickness)
        cv2.putText(frame, 'Sound 4', (width - 165, height - 100), font, font_scale, playing_color if sound_playing[3] else not_playing_color, font_thickness)

    elif current_mode == "selector":
        cv2.putText(frame, 'Select Mode:', (20, 50), font, font_scale, (0, 0, 0), font_thickness)
        cv2.putText(frame, f'{current_genre_folder.upper()}', (230, 50), font, font_scale, (0, 0, 0), font_thickness)

    # Mostrar el resultado en la ventana
    cv2.imshow("Sound Mixer", frame)

    # Esperar a que se pulse una tecla
    key = cv2.waitKey(1) & 0xFF

    # Salir del bucle si se pulsa la tecla 'q'
    if key == ord('q'):
        break

    # Cambiar el modo a MIXER si se pulsa la tecla 'm'
    elif key == ord('m'):
        current_mode = "mixer"

    # Cambiar el modo a SELECTOR si se pulsa la tecla 's'
    elif key == ord('s'):
        current_mode = "selector"

        for channel in channels:
            channel.stop()

        sound_playing = [False] * 4

    # Cambiar el género a ROCK si se pulsa la tecla '1'
    elif key == ord('1') and all([not sound_playing[i] for i in range(4)]):
        current_genre_folder = "rock"
        sounds = load_sounds(current_genre_folder)

    # Cambiar el género a RAP si se pulsa la tecla '2'
    elif key == ord('2') and all([not sound_playing[i] for i in range(4)]):
        current_genre_folder = "rap"
        sounds = load_sounds(current_genre_folder)

    # Cambiar el género a HIPHOP si se pulsa la tecla '3'
    elif key == ord('3') and all([not sound_playing[i] for i in range(4)]):
        current_genre_folder = "hiphop"
        sounds = load_sounds(current_genre_folder)

# Liberar recursos
cap.release()
cv2.destroyAllWindows()
pygame.mixer.quit()