In [None]:
import os
import cv2
import numpy as np
import mediapipe as mp
import pandas as pd
from tqdm import tqdm

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Configure paths
DATASET_PATH = "/workspaces/asl_detection/machine_learning/datasets/asl_now/Combined_Dataset"
OUTPUT_PATH = "/workspaces/asl_detection/machine_learning/datasets/asl_now/Combined_Keypoints"
os.makedirs(OUTPUT_PATH, exist_ok=True)

def extract_hand_keypoints(results):
    """
    Extracts keypoints of the signing hand (right hand from the viewer's perspective)
    """
    # Initialize array for the signing hand (21 keypoints with x, y, z)
    hand_keypoints = np.zeros(21 * 3)
    
    if results.multi_hand_landmarks:
        # If multiple hands are detected, find the correct hand
        for hand_idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
            # Hand classification is from the camera's perspective
            handedness = results.multi_handedness[hand_idx].classification[0].label
            if handedness == "Right":  # We are looking for the right hand from the camera's perspective
                hand_keypoints = np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]).flatten()
                break
        # If no right hand was found, take the first detected hand
        if np.all(hand_keypoints == 0) and results.multi_hand_landmarks:
            hand_landmarks = results.multi_hand_landmarks[0]
            hand_keypoints = np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]).flatten()
    
    return hand_keypoints

def process_image(image_path, hands):
    """
    Processes a single image and extracts hand keypoints
    """
    # Load image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error loading image: {image_path}")
        return None
    
    # Convert color space for MediaPipe (without additional transformations)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Extract keypoints with MediaPipe Hands
    results = hands.process(image_rgb)
    
    # If no hand is detected, try with flipped image
    if not results.multi_hand_landmarks:
        image_flipped = cv2.flip(image_rgb, 1)
        results = hands.process(image_flipped)
    
    # Extract hand keypoints
    keypoints = extract_hand_keypoints(results)
    
    return keypoints, results

def save_visualization(image_path, results, letter, idx):
    """
    Saves a visualization of the hand detection
    """
    image = cv2.imread(image_path)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                image,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS)
    
    # Save the visualized image
    vis_dir = os.path.join(OUTPUT_PATH, "visualizations", letter)
    os.makedirs(vis_dir, exist_ok=True)
    output_path = os.path.join(vis_dir, f"{idx:04d}.png")
    cv2.imwrite(output_path, image)

def main(visualize=False):
    # Alphabet with all letters except 'j' and 'z'
    alphabet = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y']
    
    # List for extracted keypoints
    all_keypoints = []
    # Dictionary for statistics
    stats = {letter: {'total': 0, 'not_detected': 0} for letter in alphabet}
    
    # Initialize MediaPipe Hands with adjusted detection accuracy
    with mp_hands.Hands(
            static_image_mode=True,
            max_num_hands=2,
            min_detection_confidence=0.2,
            min_tracking_confidence=0.2) as hands:
        
        # Iterate over all letter folders
        for letter in alphabet:
            letter_dir = os.path.join(DATASET_PATH, letter)
            if not os.path.isdir(letter_dir):
                print(f"Folder for letter {letter} not found: {letter_dir}")
                continue
            
            print(f"Processing letter: {letter}")
            
            # Find all PNG files in the folder
            image_files = [f for f in os.listdir(letter_dir) if f.endswith('.png')]
            stats[letter]['total'] = len(image_files)
            
            # Iterate over all images in the folder with progress bar
            for idx, image_file in enumerate(tqdm(image_files, desc=f"Letter {letter}")):
                image_path = os.path.join(letter_dir, image_file)
                
                # Extract keypoints from the image
                result = process_image(image_path, hands)
                if result is None:
                    stats[letter]['not_detected'] += 1
                    continue
                    
                keypoints, mp_results = result
                
                # Save only if a hand was detected
                if not np.all(keypoints == 0):
                    # Save keypoints with label and filename
                    keypoint_data = {
                        'letter': letter,
                        'filename': image_file,
                        'keypoints': keypoints
                    }
                    all_keypoints.append(keypoint_data)
                else:
                    stats[letter]['not_detected'] += 1
                
                # Optionally visualize every 50th image
                if visualize and idx % 50 == 0:
                    save_visualization(image_path, mp_results, letter, idx)
    
    print(f"\nSuccessfully processed {len(all_keypoints)} images.")
    
    # Save extracted keypoints
    keypoints_df = pd.DataFrame(all_keypoints)
    
    # Save CSV file with metadata
    metadata_df = keypoints_df[['letter', 'filename']].copy()
    metadata_df.to_csv(os.path.join(OUTPUT_PATH, 'metadata.csv'), index=False)
    
    # Save NumPy file with all keypoints
    keypoints_array = np.array([data['keypoints'] for data in all_keypoints])
    # Adjust labels to the index in the alphabet
    labels = np.array([alphabet.index(data['letter']) for data in all_keypoints])
    
    np.savez(os.path.join(OUTPUT_PATH, 'asl_keypoints.npz'),
             keypoints=keypoints_array,
             labels=labels)
    
    print(f"\nKeypoints saved under: {OUTPUT_PATH}")
    print("\nStatistics per letter:")
    print("Letter | Total | Not detected | Detection rate")
    print("-" * 50)
    for letter in alphabet:
        total = stats[letter]['total']
        not_detected = stats[letter]['not_detected']
        detection_rate = ((total - not_detected) / total * 100) if total > 0 else 0
        print(f"{letter:^9} | {total:^6} | {not_detected:^12} | {detection_rate:^6.1f}%")

if __name__ == "__main__":
    main(visualize=False)  # Set to True for visualizations