In [None]:
!pip install opencv-python mediapipe speechrecognition pyaudio numpy

import cv2
import mediapipe as mp
import speech_recognition as sr
import numpy as np
import os
from datetime import datetime


os.makedirs("obras", exist_ok=True)


mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1)
mp_draw = mp.solutions.drawing_utils


recognizer = sr.Recognizer()
mic = sr.Microphone()


color = (0, 0, 255)
brush_thickness = 5
drawing = False
feedback_text = ""


canvas = np.ones((480, 640, 3), dtype=np.uint8) * 255

def reconocer_voz():
    global color, brush_thickness, canvas, feedback_text
    try:
        with mic as source:
            recognizer.adjust_for_ambient_noise(source)
            audio = recognizer.listen(source, timeout=2)
            comando = recognizer.recognize_google(audio, language='es-ES').lower()
            print("Comando de voz detectado:", comando)

            if "rojo" in comando:
                color = (0, 0, 255)
                feedback_text = "Color: ROJO"
            elif "verde" in comando:
                color = (0, 255, 0)
                feedback_text = "Color: VERDE"
            elif "azul" in comando:
                color = (255, 0, 0)
                feedback_text = "Color: AZUL"
            elif "limpiar" in comando:
                canvas[:] = 255
                feedback_text = "Lienzo limpio"
            elif "guardar" in comando:
                filename = f"obras/obra_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
                cv2.imwrite(filename, canvas)
                feedback_text = "Obra guardada"
            elif "pincel" in comando:
                brush_thickness = 5
                feedback_text = "Pincel normal"
            elif "grueso" in comando:
                brush_thickness = 15
                feedback_text = "Pincel grueso"
    except:
        pass


cap = cv2.VideoCapture(0)

prev_x, prev_y = None, None
frame_count = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb)

    h, w, _ = frame.shape
    if result.multi_hand_landmarks:
        for handLms in result.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, handLms, mp_hands.HAND_CONNECTIONS)

            index_finger = handLms.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
            x = int(index_finger.x * w)
            y = int(index_finger.y * h)

            if prev_x is not None and prev_y is not None:
                cv2.line(canvas, (prev_x, prev_y), (x, y), color, brush_thickness)
            prev_x, prev_y = x, y

    else:
        prev_x, prev_y = None, None


    cv2.putText(frame, feedback_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)

    combo = cv2.addWeighted(frame, 0.5, canvas, 0.5, 0)
    cv2.imshow("Pintura Interactiva", combo)

    frame_count += 1
    if frame_count % 100 == 0:
        reconocer_voz()

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
