In [1]:
import cv2
import mediapipe as mp
from mediapipe.tasks.python.vision.gesture_recognizer import GestureRecognizerResult
import numpy as np
import math
import time

# Initialization of Mediapipe
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils
model_path = './gesture_recognizer.task'
base_options = mp.tasks.BaseOptions(model_asset_path=model_path)

# Configuration variables
colors = [(0, 0, 255), (0, 255, 0), (255, 0, 0), (0, 255, 255)]  # Red, Green, Blue, Yellow
current_color = colors[0]
brush_size = 10
menu_width = 100
drawing = False
erasing = False
single_finger = True
canvas = None
dominant_hand = None
calibrating = True
frame_width = None
frame_height = None
current_frame = None
processed_frame = None

I0000 00:00:1736174530.973319   31432 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1736174530.976099   31509 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.0.9-0ubuntu0.2), renderer: Mesa Intel(R) UHD Graphics 620 (WHL GT2)


In [2]:
def draw_menu(frame):
    for i, color in enumerate(colors):
        cx, cy = 50, 100 + i * 100
        cv2.circle(frame, (cx, cy), 30, color, -1)

def count_visible_fingers(hand_landmarks):
    finger_tips = [
        mp_hands.HandLandmark.THUMB_TIP,
        mp_hands.HandLandmark.INDEX_FINGER_TIP,
        mp_hands.HandLandmark.MIDDLE_FINGER_TIP,
        mp_hands.HandLandmark.RING_FINGER_TIP,
        mp_hands.HandLandmark.PINKY_TIP
    ]
    visible_fingers = 0
    for tip in finger_tips:
        tip_coord = hand_landmarks[tip]
        base_coord = hand_landmarks[tip - 2]  # Base of each finger
        if tip_coord.y < base_coord.y:  # Finger is up if tip is above base
            visible_fingers += 1
    return visible_fingers

def calculate_brush_size(hand_landmarks):
    index_tip = hand_landmarks[mp_hands.HandLandmark.INDEX_FINGER_TIP]
    middle_tip = hand_landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_TIP]
    distance = math.sqrt((index_tip.x - middle_tip.x)**2 + (index_tip.y - middle_tip.y)**2)
    return int(distance * 300)

def detect_dominant_hand(result):
    global calibrating
    if result.hand_landmarks and result.handedness and result.gestures:
        for hand_idx in range(len(result.hand_landmarks)):
            hand_label = result.handedness[hand_idx][0].category_name
            gesture_name = result.gestures[hand_idx][0].category_name
            if gesture_name == "Open_Palm":
                calibrating = False
                return hand_label
    return None

def process_dominant_hand_interaction(frame, hand_landmarks, gesture):
    global drawing, erasing, single_finger, current_color, brush_size, canvas

    drawing = False
    erasing = False
    single_finger = False
    if gesture == "Open_Palm":
        drawing = False
        print('open palm')
    elif gesture == "Pointing_Up":
        drawing = True
        single_finger = True
        brush_size = 10
        print('pointing up')
    elif gesture == "Victory":
        drawing = True
        brush_size = calculate_brush_size(hand_landmarks)
        print('victory')
    elif gesture == "Closed_Fist":
        drawing = False
        erasing = True


    # Get index finger coordinates
    index_finger = hand_landmarks[mp_hands.HandLandmark.INDEX_FINGER_TIP]
    x, y = int(index_finger.x * frame_width), int(index_finger.y * frame_height)

    # Count visible fingers
    visible_fingers = count_visible_fingers(hand_landmarks)
    
    # if visible_fingers == 0:
    #     drawing = drawing # False
    # else:
    #     #drawing = True
    #     if visible_fingers == 1:
    #         brush_size = 10
    #     elif visible_fingers >= 2:
    #         brush_size = calculate_brush_size(hand_landmarks)

    # Menu interaction
    if x < menu_width:
        # Calculate which color zone the index finger is in
        for i, color in enumerate(colors):
            color_y = 100 + i * 100  # Y position of each color selector
            # Check if index finger is within the vertical range of this color
            if abs(y - color_y) < 30:  # 30 pixel threshold for selection
                current_color = color
                break
    else:
        if drawing:
            if single_finger:
                cv2.circle(canvas, (x, y), brush_size, current_color, -1)
            else:
                middle_finger = hand_landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_TIP]
                # Calculate rectangle parameters between fingers
                x1 = int(index_finger.x * frame_width)
                y1 = int(index_finger.y * frame_height)
                x2 = int(middle_finger.x * frame_width)
                y2 = int(middle_finger.y * frame_height)
                
                # Calculate center point of rectangle
                center_x = (x1 + x2) // 2
                center_y = (y1 + y2) // 2
                
                # Calculate rotation angle
                angle = math.atan2(y2 - y1, x2 - x1)
                
                # Create rectangle points
                rect_length = math.sqrt((x2 - x1)**2 + (y2 - y1)**2)
                rect_height = 10  # Small fixed height
                
                # Calculate rectangle corners
                cos_angle = math.cos(angle)
                sin_angle = math.sin(angle)
                
                # Define rectangle points
                points = np.array([
                    [-rect_length/2, -rect_height/2],
                    [rect_length/2, -rect_height/2],
                    [rect_length/2, rect_height/2],
                    [-rect_length/2, rect_height/2]
                ], dtype=np.float32)
                
                # Rotate points
                rotated_points = np.array([
                    [cos_angle, -sin_angle],
                    [sin_angle, cos_angle]
                ]) @ points.T
                
                # Translate points to center position
                final_points = (rotated_points.T + [center_x, center_y]).astype(np.int32)
                
                # Draw the rotated rectangle
                cv2.fillPoly(canvas, [final_points], current_color)
        if erasing:
            # calculate hand middle point
            wrist = hand_landmarks[mp_hands.HandLandmark.WRIST]
            wrist_x, wrist_y = int(wrist.x * frame_width), int(wrist.y * frame_height)
            middle_finger_mcp = hand_landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_MCP]
            middle_finger_mcp_x, middle_finger_mcp_y = int(middle_finger_mcp.x * frame_width), int(middle_finger_mcp.y * frame_height)
            middle_x = int((wrist_x + middle_finger_mcp_x) /2)
            middle_y = int((wrist_y + middle_finger_mcp_y) /2)
            eraser_size = int(math.sqrt((wrist_x - middle_finger_mcp_x)**2 + (wrist_y - middle_finger_mcp_y)**2) /2)
            # erase
            cv2.circle(canvas, (middle_x, middle_y), eraser_size, (0,0,0), -1)
            # show circle of eraser
            cv2.circle(frame, (middle_x, middle_y), eraser_size, (0,0,0), 2)


    # Show index finger color
    if not erasing:
        cv2.circle(frame, (x, y), 10, current_color, -1)

def gesture_callback(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
    global calibrating, dominant_hand, canvas, frame_width, frame_height, current_frame, processed_frame
    
    frame = current_frame.copy()

    if canvas is None:
        frame_height, frame_width, _ = frame.shape
        canvas = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)
    
    if calibrating:
        cv2.putText(frame, "Show your dominant hand", (10, 50), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
        if result.gestures:
            dominant_hand = detect_dominant_hand(result)
            if dominant_hand:
                print('dominant hand set to', dominant_hand)
    else: # paint
        if result.hand_landmarks and result.handedness:
            for i, hand_landmarks in enumerate(result.hand_landmarks):
                handedness = result.handedness[i][0].category_name
                gesture = result.gestures[i][0].category_name
                if handedness == dominant_hand:
                    process_dominant_hand_interaction(frame, hand_landmarks, gesture)
    
    # Combine canvas with frame
    frame = cv2.addWeighted(frame, 0.5, canvas, 0.5, 0)
    draw_menu(frame)
    
    processed_frame = frame


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


W0000 00:00:1736174531.031694   31502 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [3]:
# Set up gesture recognizer
options = mp.tasks.vision.GestureRecognizerOptions(
    base_options=base_options,
    running_mode=mp.tasks.vision.RunningMode.LIVE_STREAM,
    result_callback=gesture_callback
)
recognizer = mp.tasks.vision.GestureRecognizer.create_from_options(options)

# Start camera
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
        
    frame = cv2.flip(frame, 1)  # Horizontal flip for mirror effect
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Store the current frame for the callback to use
    current_frame = frame.copy()
    
    # Process frame with gesture recognizer
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
    current_timestamp_ms = int(time.time() * 1000)
    recognizer.recognize_async(mp_image, current_timestamp_ms)

    # Display the processed frame if available
    if processed_frame is not None:
        cv2.imshow("Interactive Paint", processed_frame)
    else:
        cv2.imshow("Interactive Paint", frame)
    
    # Check for key presses
    key = cv2.waitKey(1)
    if key & 0xFF == 27:  # ESC to exit
        break
    elif key & 0xFF == ord('d'):  # Toggle drawing
        drawing = not drawing

cap.release()
cv2.destroyAllWindows()


I0000 00:00:1736174531.055091   31432 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1736174531.057389   31512 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.0.9-0ubuntu0.2), renderer: Mesa Intel(R) UHD Graphics 620 (WHL GT2)
W0000 00:00:1736174531.058117   31432 gesture_recognizer_graph.cc:129] Hand Gesture Recognizer contains CPU only ops. Sets HandGestureRecognizerGraph acceleration to Xnnpack.
I0000 00:00:1736174531.059886   31432 hand_gesture_recognizer_graph.cc:250] Custom gesture classifier is not defined.
W0000 00:00:1736174531.074481   31497 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1736174531.108871   31515 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1736174531.143588   31518 inference_feedback_manager.cc:11

dominant hand set to Left
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
open palm
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing up
pointing u