### Hand Gesture Data Collection

This cell captures hand gesture samples using a webcam and saves them into a CSV file.  
The recording pipeline includes:

- Real-time hand detection with **MediaPipe Hands**
- Normalization of landmarks relative to the wrist
- Scaling based on hand size for consistent feature representation
- Labeling with:
  - gesture name
  - left/right hand indicator
- Visualization with mirrored preview for user comfort
- Configurable:
  - number of samples
  - capture interval
  - camera resolution

Each sample contains:

- 21 hand landmarks
- 3 coordinates per landmark (x, y, z), normalized
- gesture label
- `is_right_hand` flag (1 = Right, 0 = Left)

The collected data is appended to an existing CSV if present, allowing incremental dataset building.

**Usage example:**

```python
capture_hands_to_csv(
    output_csv="data/gestures.csv",
    gesture_label="thumbs_up",
    max_samples=200,
    capture_interval=0.2
)

After running, press q to stop capturing early.

The resulting CSV will be used later for model training.

In [None]:
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
import os
import time
import copy

def capture_hands_to_csv(
    output_csv,
    gesture_label,
    max_samples=100,
    capture_interval=0.1,
    frame_width=640,
    frame_height=480,
):

    mp_hands = mp.solutions.hands
    
    hands = mp_hands.Hands(
        static_image_mode=True,        # process each frame independently
        max_num_hands=1,               # capture only one hand
        min_detection_confidence=0.9,  # detection confidence threshold
        min_tracking_confidence=0.7    # tracking confidence threshold
    )

    rows = []              # collected samples
    count = 0              # number of saved samples
    last_capture_time = 0  # time of last saved frame

    cap = cv2.VideoCapture(0)  # open webcam
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, frame_width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, frame_height)

    if not cap.isOpened():
        print("!!! Unable to access camera")
        return

    print(f"Camera ready. Capturing every {capture_interval}s. Press 'q' to quit.")

    while cap.isOpened() and count < max_samples:
        success, frame = cap.read()
        if not success:
            print("!!! Can't read frame")
            break

        success, frame = cap.read()
        if not success:
            break

        success, frame = cap.read()
        if not success:
            break

        frame_mirrored = cv2.flip(frame, 1)  # mirror for natural user view

        image_rgb = cv2.cvtColor(frame_mirrored, cv2.COLOR_BGR2RGB)  # convert to RGB
        result = hands.process(image_rgb)                             # run hand detection

        frame_flipped = cv2.flip(frame, 1)  # display frame (mirrored)

        # draw landmarks for user feedback
        if result.multi_hand_landmarks:
            for hand_landmarks in result.multi_hand_landmarks:
                mp.solutions.drawing_utils.draw_landmarks(
                    frame_flipped, hand_landmarks, mp_hands.HAND_CONNECTIONS
                )

        cv2.imshow("Hand Capture", frame_flipped)

        label = "None"
        if result.multi_hand_landmarks and result.multi_handedness:
            hand_landmarks = result.multi_hand_landmarks[0]
            label = result.multi_handedness[0].classification[0].label
            is_right = 1 if label == "Right" else 0  # right-hand flag

            coords = []
            wrist_x = hand_landmarks.landmark[0].x
            wrist_y = hand_landmarks.landmark[0].y
            wrist_z = hand_landmarks.landmark[0].z

            # compute scale based on max distance from wrist
            distances = [np.linalg.norm([
                lm.x - wrist_x,
                lm.y - wrist_y,
                lm.z - wrist_z
            ]) for lm in hand_landmarks.landmark]
            scale = max(distances)

            # normalize landmarks
            for lm in hand_landmarks.landmark:
                x, y, z = lm.x, lm.y, lm.z
                x = (x - wrist_x) / scale
                y = (y - wrist_y) / scale
                z = (z - wrist_z) / scale
                coords.extend([x, y, z])

            rows.append(coords + [gesture_label, is_right])
            count += 1
            last_capture_time = time.time()
            print(f"\t Sample {count}/{max_samples} captured ({label} hand, gesture: {gesture_label})")

        text = f"{label} hand | Gesture: {gesture_label}"
        cv2.putText(frame_flipped, text, (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

        cv2.imshow("Hand Capture", frame_flipped)

        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):  # exit on 'q'
            break

        # wait until next capture
        while time.time() - last_capture_time < capture_interval:
            time.sleep(0.01)

    cap.release()
    cv2.destroyAllWindows()
    hands.close()

    if not rows:
        print("No samples recorded.")
        return

    # save dataset to CSV
    columns = [f"{axis}{i}" for i in range(21) for axis in ("x", "y", "z")] + ["gesture", "is_right_hand"]
    df = pd.DataFrame(rows, columns=columns)
    os.makedirs(os.path.dirname(output_csv), exist_ok=True)

    # append to existing CSV
    if os.path.exists(output_csv):
        df_existing = pd.read_csv(output_csv)
        df = pd.concat([df_existing, df], ignore_index=True)

    df.to_csv(output_csv, index=False)
    print(f"\nCSV saved: {output_csv} ({len(df)} total rows)")

# Collect values for each gesture

In [None]:
gesture_label="question" 
output_csv = '../data/raw/upd_question.csv'

capture_hands_to_csv(
        output_csv=output_csv,
        gesture_label=gesture_label,
        max_samples=500,
        capture_interval=0.2
    )

In [None]:
gesture_label="if" 
output_csv = '../data/raw/upd_if.csv'

capture_hands_to_csv(
        output_csv=output_csv,
        gesture_label=gesture_label,
        max_samples=500,
        capture_interval=0.2
    )

In [None]:
gesture_label="civilian" 
output_csv = '../data/raw/upd_civilian.csv'

capture_hands_to_csv(
        output_csv=output_csv,
        gesture_label=gesture_label,
        max_samples=500,
        capture_interval=0.2
    )

In [None]:
gesture_label="cool" 
output_csv = '../data/raw/upd_cool.csv'

capture_hands_to_csv(
        output_csv=output_csv,
        gesture_label=gesture_label,
        max_samples=500,
        capture_interval=0.15
    )

In [None]:
gesture_label="don" 
output_csv = '../data/raw/upd_don.csv'

capture_hands_to_csv(
        output_csv=output_csv,
        gesture_label=gesture_label,
        max_samples=500,
        capture_interval=0.2
    )

In [None]:
gesture_label="mafia" 
output_csv = '../data/raw/upd_mafia.csv'

capture_hands_to_csv(
        output_csv=output_csv,
        gesture_label=gesture_label,
        max_samples=500,
        capture_interval=0.1
    )

In [None]:
gesture_label="sheriff" 
output_csv = '../data/raw/upd_sheriff.csv'

capture_hands_to_csv(
        output_csv=output_csv,
        gesture_label=gesture_label,
        max_samples=500,
        capture_interval=0.15
    )

In [None]:
gesture_label="you" 
output_csv = '../data/raw/upd1_you.csv'

capture_hands_to_csv(
        output_csv=output_csv,
        gesture_label=gesture_label,
        max_samples=500,
        capture_interval=0.15
    )

In [None]:
gesture_label="me" 
output_csv = '../data/raw/upd1_me.csv'

capture_hands_to_csv(
        output_csv=output_csv,
        gesture_label=gesture_label,
        max_samples=500,
        capture_interval=0.15
    )