In [1]:
import cv2
import os
import string
import mediapipe as mp
import numpy as np
import csv
import uuid

# --- Configuration ---
ALLOWED_LABELS = [ch for ch in string.ascii_lowercase if ch not in ['j', 'z']]
DATASET_DIR = "asl_dataset"
CSV_PATH = os.path.join(DATASET_DIR, "all_landmarks.csv")
LABEL_KEY = None

# --- Setup Directories ---
os.makedirs(DATASET_DIR, exist_ok=True)
if not os.path.exists(CSV_PATH):
    with open(CSV_PATH, 'w', newline='') as f:
        writer = csv.writer(f)
        header = ['label'] + [f"{axis}{i}" for i in range(21) for axis in ['x', 'y', 'z']]
        writer.writerow(header)

# --- MediaPipe Setup ---
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1)
connections = mp_hands.HAND_CONNECTIONS

def depth_to_color(z, min_z, max_z):
    ratio = (z - min_z) / (max_z - min_z + 1e-6)
    return (0, int(255 * (1 - ratio)), int(255 * ratio))  # BGR format

def draw_landmarks(frame, landmarks, w, h):
    landmark_px = []
    z_vals = [lm.z for lm in landmarks]
    min_z, max_z = min(z_vals), max(z_vals)

    for idx, lm in enumerate(landmarks):
        x, y = int(lm.x * w), int(lm.y * h)
        landmark_px.append((x, y, lm.z))
        color = depth_to_color(lm.z, min_z, max_z)
        cv2.circle(frame, (x, y), 6, color, -1)

    for start, end in connections:
        x1, y1, z1 = landmark_px[start]
        x2, y2, z2 = landmark_px[end]
        color = depth_to_color((z1 + z2) / 2, min_z, max_z)
        cv2.line(frame, (x1, y1), (x2, y2), color, 2)

    return landmark_px

def save_data(label, img, landmarks):
    uid = uuid.uuid4().hex[:16]
    label_dir = os.path.join(DATASET_DIR, label)
    os.makedirs(label_dir, exist_ok=True)

    img_filename = os.path.join(label_dir, f"{label}_{uid}.png")
    cv2.imwrite(img_filename, img)

    with open(CSV_PATH, 'a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([label.upper()] + landmarks)

    print(f"Saved image: {img_filename}")
    print(f"Saved landmarks to: {CSV_PATH}")

# --- Main Loop ---
cap = cv2.VideoCapture(0)
print("Press a–y (excluding j & z) to set label. Space to save. ESC to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    original = frame.copy()
    h, w = frame.shape[:2]

    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb)

    if results.multi_hand_landmarks:
        hand_landmarks = results.multi_hand_landmarks[0]
        landmarks = hand_landmarks.landmark
        landmark_px = draw_landmarks(frame, landmarks, w, h)

        # Bounding Box
        x_coords = [lm.x * w for lm in landmarks]
        y_coords = [lm.y * h for lm in landmarks]
        x_min, x_max = max(int(min(x_coords)) - 20, 0), min(int(max(x_coords)) + 20, w)
        y_min, y_max = max(int(min(y_coords)) - 20, 0), min(int(max(y_coords)) + 20, h)

        # Blur background outside hand
        mask = np.zeros(frame.shape[:2], dtype=np.uint8)
        cv2.rectangle(mask, (x_min, y_min), (x_max, y_max), 255, -1)
        blurred = cv2.GaussianBlur(original, (31, 31), 0)
        frame = np.where(mask[:, :, None] == 255, original, blurred)

        # Grayscale and crop
        hand_roi = original[y_min:y_max, x_min:x_max]
        gray = cv2.cvtColor(hand_roi, cv2.COLOR_BGR2GRAY)
        contrast = cv2.equalizeHist(gray)
        cropped_hand = cv2.resize(contrast, (128, 128))
        cv2.imshow("Cropped Hand", cv2.resize(cropped_hand, (200, 200)))

        # Prepare CSV landmark row
        landmark_row = [val for lm in landmarks for val in (lm.x, lm.y, lm.z)]

    # Show current label
    if LABEL_KEY:
        cv2.putText(frame, f"Label: {LABEL_KEY.upper()}", (10, 40),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)

    cv2.imshow("Webcam", frame)
    key = cv2.waitKey(1) & 0xFF

    if key == 27:  # ESC
        break
    elif chr(key).lower() in ALLOWED_LABELS:
        LABEL_KEY = chr(key).lower()
        print(f"Label set to: {LABEL_KEY.upper()}")
    elif key == 32 and LABEL_KEY and 'cropped_hand' in locals():
        save_data(LABEL_KEY, cropped_hand, landmark_row)

cap.release()
cv2.destroyAllWindows()

ModuleNotFoundError: No module named 'mediapipe'

In [6]:
%pip install opencv-python mediapipe 

Note: you may need to restart the kernel to use updated packages.


In [8]:
import cv2
import os
import string
import mediapipe as mp
import numpy as np
import csv
import uuid

# --- Configuration ---
ALLOWED_LABELS = [ch for ch in string.ascii_lowercase if ch not in ['j', 'z']]
DATASET_DIR = "asl_dataset"
CSV_PATH = os.path.join(DATASET_DIR, "all_landmarks.csv")
LABEL_KEY = None

# --- Setup Directories ---
os.makedirs(DATASET_DIR, exist_ok=True)
if not os.path.exists(CSV_PATH):
    with open(CSV_PATH, 'w', newline='') as f:
        writer = csv.writer(f)
        header = ['label'] + [f"{axis}{i}" for i in range(21) for axis in ['x', 'y', 'z']]
        writer.writerow(header)

# --- MediaPipe Setup ---
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1)
connections = mp_hands.HAND_CONNECTIONS

def depth_to_color(z, min_z, max_z):
    ratio = (z - min_z) / (max_z - min_z + 1e-6)
    return (0, int(255 * (1 - ratio)), int(255 * ratio))  # BGR format

def draw_landmarks(frame, landmarks, w, h):
    landmark_px = []
    z_vals = [lm.z for lm in landmarks]
    min_z, max_z = min(z_vals), max(z_vals)

    for idx, lm in enumerate(landmarks):
        x, y = int(lm.x * w), int(lm.y * h)
        landmark_px.append((x, y, lm.z))
        color = depth_to_color(lm.z, min_z, max_z)
        cv2.circle(frame, (x, y), 6, color, -1)

    for start, end in connections:
        x1, y1, z1 = landmark_px[start]
        x2, y2, z2 = landmark_px[end]
        color = depth_to_color((z1 + z2) / 2, min_z, max_z)
        cv2.line(frame, (x1, y1), (x2, y2), color, 2)

    return landmark_px

def save_data(label, img, landmarks):
    uid = uuid.uuid4().hex[:16]
    label_dir = os.path.join(DATASET_DIR, label)
    os.makedirs(label_dir, exist_ok=True)

    img_filename = os.path.join(label_dir, f"{label}_{uid}.png")
    cv2.imwrite(img_filename, img)

    with open(CSV_PATH, 'a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([label.upper()] + landmarks)

    print(f"Saved image: {img_filename}")
    print(f"Saved landmarks to: {CSV_PATH}")

# --- Main Loop ---
cap = cv2.VideoCapture(0)
print("Press a–y (excluding j & z) to set label. Space to save. ESC to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    original = frame.copy()
    h, w = frame.shape[:2]

    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb)

    if results.multi_hand_landmarks:
        hand_landmarks = results.multi_hand_landmarks[0]
        landmarks = hand_landmarks.landmark
        landmark_px = draw_landmarks(frame, landmarks, w, h)

        # Bounding Box
        x_coords = [lm.x * w for lm in landmarks]
        y_coords = [lm.y * h for lm in landmarks]
        x_min, x_max = max(int(min(x_coords)) - 20, 0), min(int(max(x_coords)) + 20, w)
        y_min, y_max = max(int(min(y_coords)) - 20, 0), min(int(max(y_coords)) + 20, h)

        # Blur background outside hand
        mask = np.zeros(frame.shape[:2], dtype=np.uint8)
        cv2.rectangle(mask, (x_min, y_min), (x_max, y_max), 255, -1)
        blurred = cv2.GaussianBlur(original, (31, 31), 0)
        frame = np.where(mask[:, :, None] == 255, original, blurred)

        # Grayscale and crop
        hand_roi = original[y_min:y_max, x_min:x_max]
        gray = cv2.cvtColor(hand_roi, cv2.COLOR_BGR2GRAY)
        contrast = cv2.equalizeHist(gray)
        cropped_hand = cv2.resize(contrast, (128, 128))
        cv2.imshow("Cropped Hand", cv2.resize(cropped_hand, (200, 200)))

        # Prepare CSV landmark row
        landmark_row = [val for lm in landmarks for val in (lm.x, lm.y, lm.z)]

    # Show current label
    if LABEL_KEY:
        cv2.putText(frame, f"Label: {LABEL_KEY.upper()}", (10, 40),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)

    cv2.imshow("Webcam", frame)
    key = cv2.waitKey(1) & 0xFF

    if key == 27:  # ESC
        break
    elif chr(key).lower() in ALLOWED_LABELS:
        LABEL_KEY = chr(key).lower()
        print(f"Label set to: {LABEL_KEY.upper()}")
    elif key == 32 and LABEL_KEY and 'cropped_hand' in locals():
        save_data(LABEL_KEY, cropped_hand, landmark_row)

cap.release()
cv2.destroyAllWindows()

Press a–y (excluding j & z) to set label. Space to save. ESC to quit.
Label set to: B
Saved image: asl_dataset\b\b_78df0e0f5c6d4a69.png
Saved landmarks to: asl_dataset\all_landmarks.csv
Saved image: asl_dataset\b\b_84e823f00b7146bb.png
Saved landmarks to: asl_dataset\all_landmarks.csv
Saved image: asl_dataset\b\b_a0aec1b07a2045f0.png
Saved landmarks to: asl_dataset\all_landmarks.csv
Saved image: asl_dataset\b\b_b9042f71b2cd4e01.png
Saved landmarks to: asl_dataset\all_landmarks.csv
Saved image: asl_dataset\b\b_6c2fa78512e347a0.png
Saved landmarks to: asl_dataset\all_landmarks.csv
Saved image: asl_dataset\b\b_ca7a195c9bf94ef8.png
Saved landmarks to: asl_dataset\all_landmarks.csv
Saved image: asl_dataset\b\b_60869f282a954be2.png
Saved landmarks to: asl_dataset\all_landmarks.csv
Saved image: asl_dataset\b\b_9821595af8f34666.png
Saved landmarks to: asl_dataset\all_landmarks.csv
Saved image: asl_dataset\b\b_b3d512a9589441ec.png
Saved landmarks to: asl_dataset\all_landmarks.csv
Saved image: a