In [None]:
import cv2
import mediapipe as mp
import numpy as np
from pathlib import Path
import glob
import os

def pad_sequence(sequence, target_length=102):
    """Pads the sequence to the target length or truncates it accordingly."""
    current_length = len(sequence)
    
    if current_length == target_length:
        return sequence
    
    # If the sequence is too short, pad it with the last frame
    if current_length < target_length:
        padding_length = target_length - current_length
        last_frame = sequence[-1]
        padding = np.tile(last_frame, (padding_length, 1))
        return np.vstack([sequence, padding])
    
    # If the sequence is too long, truncate it
    return sequence[:target_length]

def extract_keypoints(video_path: str, output_path: str, target_frames=102):
    """Extracts keypoints from a single video."""
    # Initialize MediaPipe Holistic
    mp_holistic = mp.solutions.holistic
    holistic = mp_holistic.Holistic(
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5
    )

    # Open video
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error opening video: {video_path}")
        return None

    # Arrays for keypoints
    all_keypoints = []
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to RGB (MediaPipe requires RGB)
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Perform Holistic Detection
        results = holistic.process(frame_rgb)
        
        # Extract and normalize keypoints
        pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*3)
        
        lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
        
        rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
        
        # Merge all normalized keypoints
        frame_keypoints = np.concatenate([pose, lh, rh])
        all_keypoints.append(frame_keypoints)
        
        frame_count += 1

    # Release resources
    cap.release()
    holistic.close()

    if frame_count == 0:
        print(f"No frames found in: {video_path}")
        return None

    # Convert to numpy array
    keypoints_array = np.array(all_keypoints)
    
    # Pad to target length
    keypoints_array = pad_sequence(keypoints_array, target_frames)
    
    # Save
    np.save(output_path, keypoints_array)
    print(f"Processed: {video_path}")
    print(f"  - Original frames: {frame_count}")
    print(f"  - After padding: {len(keypoints_array)}")
    print(f"  - Saved to: {output_path}")
    
    return keypoints_array

def process_video_directory(input_dir: str, output_dir: str, video_pattern="*.mp4"):
    """Processes all videos in a directory."""
    # Create output directory if it does not exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Find all videos
    video_paths = glob.glob(os.path.join(input_dir, video_pattern))
    total_videos = len(video_paths)
    
    print(f"Found: {total_videos} videos")
    
    # Process each video
    for i, video_path in enumerate(video_paths, 1):
        # Create output path
        video_name = os.path.basename(video_path)
        output_name = os.path.splitext(video_name)[0] + "_keypoints.npy"
        output_path = os.path.join(output_dir, output_name)
        
        print(f"\nProcessing video {i}/{total_videos}: {video_name}")
        extract_keypoints(video_path, output_path)

if __name__ == "__main__":
    # Define paths
    base_dir = "/workspaces/asl_detection/machine_learning/datasets"
    video_dir = f"{base_dir}/test_extraction"
    output_dir = f"{base_dir} /test_keypoints"
    
    # Process all videos
    process_video_directory(video_dir, output_dir)
