## Practice MediaPipe

1. Input continuous BGR images from webcam.
2. Use <i>MediaPipe()</i> to detect and track one of your hands.
3. Obtain the positions of 21 HandLandmarks.
4. Design an algorithm to recognize three hand gestures of Rock, Scissor, Paper.
5. Use <i>cv2.putText()</i> to write the type of the recognized hand gesture on the upper left corner.
6. Show your output images.
7. Upload your Jupyter code file (*.ipynb)

In [10]:
import cv2
import numpy as np
import mediapipe as mp

In [11]:
def recognize_gesture(landmarks):
    thumb_tip = landmarks[mp_hands.HandLandmark.THUMB_TIP]
    index_tip = landmarks[mp_hands.HandLandmark.INDEX_FINGER_TIP]
    middle_tip = landmarks[mp_hands.HandLandmark.MIDDLE_FINGER_TIP]
    ring_tip = landmarks[mp_hands.HandLandmark.RING_FINGER_TIP]
    pinky_tip = landmarks[mp_hands.HandLandmark.PINKY_TIP]

    # Calculate distances between tips of fingers and palm base
    palm_base = landmarks[mp_hands.HandLandmark.WRIST]
    distances = [
        np.linalg.norm(np.array([thumb_tip.x - palm_base.x, thumb_tip.y - palm_base.y])),
        np.linalg.norm(np.array([index_tip.x - palm_base.x, index_tip.y - palm_base.y])),
        np.linalg.norm(np.array([middle_tip.x - palm_base.x, middle_tip.y - palm_base.y])),
        np.linalg.norm(np.array([ring_tip.x - palm_base.x, ring_tip.y - palm_base.y])),
        np.linalg.norm(np.array([pinky_tip.x - palm_base.x, pinky_tip.y - palm_base.y]))
    ]

    # Define thresholds for recognizing gestures
    if all(d > 0.4 for d in distances):
        return "Paper"
    elif distances[1] > 0.4 and distances[2] > 0.4 and all(d < 0.3 for d in distances[3:]):
        return "Scissors"
    else:
        return "Rock"

In [12]:
# Initialize MediaPipe Hands model
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()
mp_drawing = mp.solutions.drawing_utils

# Open webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        continue

    # Convert the BGR image to RGB and process it with MediaPipe Hands
    results = hands.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

    # Draw hand landmarks and recognize gestures
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            landmarks = hand_landmarks.landmark
            gesture = recognize_gesture(landmarks)
            cv2.putText(frame, gesture, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)

    # Display the image
    cv2.imshow('MediaPipe Hands', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

hands.close()
cap.release()
cv2.destroyAllWindows()

I0000 00:00:1717578982.225146   46605 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1717578982.226530   47939 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.0.5-1ubuntu1), renderer: Mesa Intel(R) UHD Graphics 620 (KBL GT2)
