# Data Collection for ASL Gesture Recognition

This notebook collects training data for ASL gesture recognition using Mediapipe hand detection.


In [None]:
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
import json
import os
from datetime import datetime

# Initialize Mediapipe
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)
mp_drawing = mp.solutions.drawing_utils


In [None]:
# Configuration
GESTURES = ['A', 'B', 'C', 'D', 'E']
SAMPLES_PER_GESTURE = 100
DATA_DIR = '../data/raw'

# Create data directory
os.makedirs(DATA_DIR, exist_ok=True)

print(f"Collecting {SAMPLES_PER_GESTURE} samples for each gesture: {GESTURES}")


In [None]:
def extract_landmarks(image):
    """Extract hand landmarks from image"""
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_image)
    
    if results.multi_hand_landmarks:
        landmarks = []
        for landmark in results.multi_hand_landmarks[0].landmark:
            landmarks.append([landmark.x, landmark.y, landmark.z])
        return landmarks
    return None

def collect_gesture_data(gesture, num_samples):
    """Collect data for a specific gesture"""
    cap = cv2.VideoCapture(0)
    samples = []
    
    print(f"\nCollecting data for gesture: {gesture}")
    print("Press SPACE to capture sample, 'q' to quit")
    
    while len(samples) < num_samples:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Detect hands
        landmarks = extract_landmarks(frame)
        
        # Draw landmarks if detected
        if landmarks:
            # Draw landmarks on frame
            annotated_frame = frame.copy()
            mp_drawing.draw_landmarks(
                annotated_frame,
                results.multi_hand_landmarks[0],
                mp_hands.HAND_CONNECTIONS
            )
            cv2.putText(annotated_frame, f"Samples: {len(samples)}/{num_samples}", 
                       (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.putText(annotated_frame, f"Gesture: {gesture}", 
                       (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.imshow('Data Collection', annotated_frame)
        else:
            cv2.putText(frame, "No hand detected", (10, 30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            cv2.imshow('Data Collection', frame)
        
        key = cv2.waitKey(1) & 0xFF
        if key == ord(' '):  # Space to capture
            if landmarks:
                samples.append({
                    'gesture': gesture,
                    'landmarks': landmarks,
                    'timestamp': datetime.now().isoformat()
                })
                print(f"Captured sample {len(samples)}/{num_samples}")
        elif key == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()
    
    # Save data
    filename = f"{DATA_DIR}/{gesture}_samples.json"
    with open(filename, 'w') as f:
        json.dump(samples, f, indent=2)
    
    print(f"Saved {len(samples)} samples to {filename}")
    return samples


In [None]:
# Collect data for all gestures
all_data = {}

for gesture in GESTURES:
    samples = collect_gesture_data(gesture, SAMPLES_PER_GESTURE)
    all_data[gesture] = samples

print("\nData collection completed!")


In [None]:
# Combine all data
combined_data = []
for gesture, samples in all_data.items():
    combined_data.extend(samples)

# Save combined dataset
with open(f"{DATA_DIR}/combined_dataset.json", 'w') as f:
    json.dump(combined_data, f, indent=2)

print(f"Total samples collected: {len(combined_data)}")
print(f"Samples per gesture: {len(combined_data) // len(GESTURES)}")
