In [1]:
# pip install mediapipe

In [2]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

In [3]:
mp_hands = mp.solutions.hands  
mp_drawing = mp.solutions.drawing_utils  # Drawing utilities

In [4]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  # COLOR COVERSION RGB 2 BGR
    return image, results

### Draw simple landmark with default style

In [None]:
def draw_landmarks(image, results):
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)

#### Draw landmark with stylistic way

In [6]:
def draw_styled_landmarks(image, results):
    # Check if hand landmarks were detected
    if results.multi_hand_landmarks and results.multi_handedness:
        for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
            # Retrieve the handedness label for this hand (e.g., "Left" or "Right")
            handedness = results.multi_handedness[idx].classification[0].label
            if handedness == 'Left':
                mp_drawing.draw_landmarks(
                    image, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                    mp_drawing.DrawingSpec(color=(121,22,76), thickness=1, circle_radius=3),
                    mp_drawing.DrawingSpec(color=(121,44,250), thickness=1, circle_radius=2)
                )
            else:  # Assume the other hand is "Right"
                mp_drawing.draw_landmarks(
                    image, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                    mp_drawing.DrawingSpec(color=(245,117,66), thickness=1, circle_radius=3),
                    mp_drawing.DrawingSpec(color=(245,66,230), thickness=1, circle_radius=2)
                )

In [7]:
def extract_key_points(results):
    import numpy as np
    # Initialize key points for left and right hands as zeros (21 landmarks * 3 coordinates)
    left_hand = np.zeros(21 * 3)
    right_hand = np.zeros(21 * 3)
    
    # Check if any hand landmarks and handedness information are detected
    if results.multi_hand_landmarks and results.multi_handedness:
        for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
            # Flatten the key points: each landmark provides x, y, and z coordinates
            keypoints = np.array([[res.x, res.y, res.z] for res in hand_landmarks.landmark]).flatten()
            # Retrieve handedness label ("Left" or "Right")
            handedness = results.multi_handedness[idx].classification[0].label
            
            # Save the keypoints based on the handedness of the detected hand
            if handedness == "Left":
                left_hand = keypoints
            elif handedness == "Right":
                right_hand = keypoints
    
    # Combine the key points for left and right hands into a single array
    return np.concatenate([left_hand, right_hand])

#### # 3. Data Collection

In [None]:
# Path for exported data
DATA_PATH = os.path.join('/Users/robayedashraf/Downloads/Dataset/data')
IMAGE_PATH = os.path.join('/Users/robayedashraf/Downloads/Dataset/imagedata')
IMAGE = os.path.join('/Users/robayedashraf/Downloads/Dataset/image')

# Actions that we try to detect
# actions = np.array(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'])
actions = np.array(['A'])

# Number of sequences (videos)
no_sequences = 20

# Length of each sequence (frames)
sequence_length = 30

In [None]:
# Create directories for each action and sequence
for action in actions:
    for sequence in range(no_sequences):
        try:
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
            os.makedirs(os.path.join(IMAGE_PATH, action, str(sequence)))
        except:
            pass
    try:
        os.makedirs(os.path.join(IMAGE, str(action)))
    except:
        pass

### Collect data

In [11]:
# Collect data
cap = cv2.VideoCapture(0)
x = 0  # Flag to break the loop gracefully
# Access MediaPipe model
with mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5, min_tracking_confidence=0.8) as hands:
    # Loop through actions
    cv2.waitKey(5000)
    for action in actions:
        # Loop through sequences (videos)
        for sequence in range(no_sequences):
            # Loop through frames in a sequence
            for frame_num in range(sequence_length):
                # Read feed
                ret, frame = cap.read()
                frame = cv2.resize(frame, (1280, 720))  # Resize frame to 1280x720
                sample_image_data_path = os.path.join(IMAGE_PATH, action, str(sequence), f"{frame_num}.jpg")
                cv2.imwrite(sample_image_data_path, frame)
                frame=cv2.flip(frame, 1)
                # Make detections
                image, results = mediapipe_detection(frame, hands)

                # Draw landmarks
                draw_styled_landmarks(image, results)
                # Save a sample frame as an image for this character (only once for the first sequence)
                if sequence < 3 and frame_num ==0:
                    sample_image_path = os.path.join(IMAGE, action, f"sample{sequence}.jpg")
                    cv2.imwrite(sample_image_path, image)
                # Apply wait logic
                cv2.putText(image, 'COLLECTING', (120,200), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                cv2.putText(image, f'Collecting frames for {action} Video Number {sequence}', (15,12),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1, cv2.LINE_AA)
                # Show to screen
                cv2.imshow('OpenCV Feed', image)

                # Export keypoints
                keypoints = extract_key_points(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)

                # Break gracefully
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    x=1
                    break
            if x==1:
                break
        if x==1:
            break
    cap.release()
    cv2.waitKey(5000)
    cv2.destroyAllWindows()


I0000 00:00:1750227230.253822 6559313 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.4), renderer: Apple M3 Pro
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1750227230.260786 6559576 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1750227230.269621 6559579 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1750227235.353564 6559577 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.
