## Simple mediapipe inference

In [1]:
import cv2
import mediapipe as mp

# Initialize MediaPipe Hands.
mp_hands = mp.solutions.hands
# Initialize MediaPipe drawing utilities.
mp_drawing = mp.solutions.drawing_utils

# Define the drawing specifications for landmarks and connections.
drawing_styles = mp.solutions.drawing_styles

# Initialize video capture from the default webcam.
cap = cv2.VideoCapture(0)

# Set up the Hands model.
with mp_hands.Hands(
    static_image_mode=False,       # Video stream; set to False.
    max_num_hands=2,               # Maximum number of hands to detect.
    min_detection_confidence=0.5,  # Minimum confidence for detection.
    min_tracking_confidence=0.5    # Minimum confidence for tracking.
) as hands:
    while cap.isOpened():
        success, image = cap.read()
        if not success:
            print("Ignoring empty camera frame.")
            continue

        # Flip the image horizontally for a later selfie-view display.
        # Convert the BGR image to RGB.
        image = cv2.flip(image, 1)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # To improve performance, optionally mark the image as not writeable to pass by reference.
        image_rgb.flags.writeable = False
        results = hands.process(image_rgb)

        # Draw the hand annotations on the image.
        image_rgb.flags.writeable = True
        image_output = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                # Draw landmarks and connections.
                mp_drawing.draw_landmarks(
                    image_output,
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS,
                    # Use default drawing styles or customize as needed.
                    mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=4),
                    mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2)
                )

        # Display the resulting image.
        cv2.imshow('MediaPipe Hands', image_output)

        # Exit the loop when 'q' key is pressed.
        if cv2.waitKey(5) & 0xFF == ord('q'):
            break

# Release the webcam and close OpenCV windows.
cap.release()
cv2.destroyAllWindows()


# Recored Gestures

In [6]:
import cv2
import mediapipe as mp
import pandas as pd
import numpy as np
import os
from collections import defaultdict

# Initialize MediaPipe Hands.
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Initialize video capture from the default webcam.
cap = cv2.VideoCapture(0)

# Initialize data storage
data = defaultdict(list)  # Dictionary to hold lists for each class

# Parameters
NUM_CLASSES = 5
SAMPLES_PER_CLASS = 1000
current_counts = {cls: 0 for cls in range(NUM_CLASSES)}  # Track samples per class

# Create a window to display instructions
instruction_window = "Instructions"
cv2.namedWindow(instruction_window)

# Instructions to display on the screen
instructions = """
Press:
1 - Gesture Class 0
2 - Gesture Class 1
3 - Gesture Class 2
4 - Gesture Class 3
5 - Gesture Class 4
q - Quit
"""

# Function to display instructions
def display_instructions(frame):
    overlay = frame.copy()
    alpha = 0.6
    cv2.rectangle(overlay, (10, 10), (400, 150), (0, 0, 0), -1)
    cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame)
    cv2.putText(frame, "Gesture Data Collection", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
    cv2.putText(frame, "Press 1, 2, or 3 ... to assign Class 0, 1, or 2 ... respectively.", 
                (20, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
    cv2.putText(frame, "Press 'q' to quit.", 
                (20, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
    return frame

# Initialize MediaPipe Hands.
with mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,  # Assuming single hand for simplicity
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
) as hands:
    while cap.isOpened():
        success, image = cap.read()
        if not success:
            print("Ignoring empty camera frame.")
            continue

        # Flip the image horizontally for a mirror view and convert to RGB.
        image = cv2.flip(image, 1)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # To improve performance, mark the image as not writeable.
        image_rgb.flags.writeable = False
        results = hands.process(image_rgb)

        # Draw the hand annotations on the image.
        image_rgb.flags.writeable = True
        image_output = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    image_output,
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS,
                    mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=4),
                    mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2)
                )
                
                # Extract landmark coordinates
                landmarks = []
                for lm in hand_landmarks.landmark:
                    landmarks.extend([lm.x, lm.y, lm.z])  # Flattened list [x0, y0, z0, x1, y1, z1, ..., x20, y20, z20]

                # Normalize landmarks
                # Landmark 0 as origin
                x0, y0, z0 = landmarks[0], landmarks[1], landmarks[2]
                # Distance between landmark 0 and 1
                x1, y1, z1 = landmarks[3], landmarks[4], landmarks[5]  # Landmark 1
                d01 = np.sqrt((x1 - x0)**2 + (y1 - y0)**2 + (z1 - z0)**2)
                if d01 == 0:
                    d01 = 1e-6  # Prevent division by zero

                # Normalize all landmarks
                normalized_landmarks = []
                for i in range(0, len(landmarks), 3):
                    xi, yi, zi = landmarks[i], landmarks[i+1], landmarks[i+2]
                    norm_x = (xi - x0) / d01
                    norm_y = (yi - y0) / d01
                    norm_z = (zi - z0) / d01
                    normalized_landmarks.extend([norm_x, norm_y, norm_z])

        # Display instructions
        image_output = display_instructions(image_output)

        # Show the image
        cv2.imshow('Hand Landmark Data Collection', image_output)

        # Handle key presses
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            print("Exiting and saving data...")
            break
        elif key in [ord(str(i)) for i in range(1, NUM_CLASSES + 1)]:
            class_label = int(chr(key)) - 1  # Classes 0, 1, 2
            if results.multi_hand_landmarks:
                if current_counts[class_label] < SAMPLES_PER_CLASS:
                    data[class_label].append(normalized_landmarks)
                    current_counts[class_label] += 1
                    print(f"Collected sample {current_counts[class_label]}/{SAMPLES_PER_CLASS} for Class {class_label}")
                else:
                    print(f"Class {class_label} already has {SAMPLES_PER_CLASS} samples.")
            else:
                print("No hand detected. Please try again.")

        # Check if all classes have enough samples
        if all(count >= SAMPLES_PER_CLASS for count in current_counts.values()):
            print("All classes have reached the desired sample size.")
            break

# Release the webcam and close OpenCV windows.
cap.release()
cv2.destroyAllWindows()

# Prepare data for saving
all_data = []
for class_label, samples in data.items():
    for sample in samples:
        all_data.append(sample + [class_label])

# Convert to DataFrame
columns = []
for i in range(21):  # 21 landmarks
    columns += [f'x{i}', f'y{i}', f'z{i}']
columns += ['class']

df = pd.DataFrame(all_data, columns=columns)

# Save to CSV with append functionality
output_filename = 'hand_gesture_data.csv'

# Check if the file exists
file_exists = os.path.isfile(output_filename)

# Append data to CSV
if file_exists:
    df.to_csv(output_filename, mode='a', index=False, header=False)
    print(f"Appended {len(df)} samples to existing {output_filename}")
else:
    df.to_csv(output_filename, index=False)
    print(f"Data saved to new file {output_filename}")

Exiting and saving data...
Appended 0 samples to existing hand_gesture_data.csv
