In [None]:
import cv2
import mediapipe as mp
import numpy as np
import os

In [None]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [None]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*3)
    left_hand = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    right_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, left_hand, right_hand])

In [None]:
def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    holistic = mp_holistic.Holistic()

    keypoints_seq = []
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = holistic.process(image)

        keypoints = extract_keypoints(results)
        keypoints_seq.append(keypoints)

    cap.release()
    cv2.destroyAllWindows()
    
    return np.array(keypoints_seq)

In [44]:
import cv2
import mediapipe as mp
import numpy as np
from pathlib import Path
import glob
import os

def pad_sequence(sequence, target_length=102):
    """Pads the sequence to the target length or truncates it accordingly."""
    current_length = len(sequence)
    
    if current_length == target_length:
        return sequence
    
    # If the sequence is too short, pad it with the last frame
    if current_length < target_length:
        padding_length = target_length - current_length
        last_frame = sequence[-1]
        padding = np.tile(last_frame, (padding_length, 1))
        return np.vstack([sequence, padding])
    
    # If the sequence is too long, truncate it
    return sequence[:target_length]

def extract_keypoints(video_path: str, output_path: str, target_frames=102):
    """Extracts keypoints from a single video."""
    # Initialize MediaPipe Holistic
    mp_holistic = mp.solutions.holistic
    holistic = mp_holistic.Holistic(
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5
    )

    # Open video
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error opening video: {video_path}")
        return None

    # Arrays for keypoints
    all_keypoints = []
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to RGB (MediaPipe requires RGB)
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Perform Holistic Detection
        results = holistic.process(frame_rgb)
        
        # Extract and normalize keypoints
        pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*3)
        
        lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
        
        rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
        
        # Merge all normalized keypoints
        frame_keypoints = np.concatenate([pose, lh, rh])
        all_keypoints.append(frame_keypoints)
        
        frame_count += 1

    # Release resources
    cap.release()
    holistic.close()

    if frame_count == 0:
        print(f"No frames found in: {video_path}")
        return None

    # Convert to numpy array
    keypoints_array = np.array(all_keypoints)
    
    # Pad to target length
    keypoints_array = pad_sequence(keypoints_array, target_frames)
    
    # Save
    np.save(output_path, keypoints_array)
    print(f"Processed: {video_path}")
    print(f"  - Original frames: {frame_count}")
    print(f"  - After padding: {len(keypoints_array)}")
    print(f"  - Saved to: {output_path}")
    
    return keypoints_array

def process_video_directory(input_dir: str, output_dir: str, video_pattern="*.mp4"):
    """Processes all videos in a directory."""
    # Create output directory if it does not exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Find all videos
    video_paths = glob.glob(os.path.join(input_dir, video_pattern))
    total_videos = len(video_paths)
    
    print(f"Found: {total_videos} videos")
    
    # Process each video
    for i, video_path in enumerate(video_paths, 1):
        # Create output path
        video_name = os.path.basename(video_path)
        output_name = os.path.splitext(video_name)[0] + "_keypoints.npy"
        output_path = os.path.join(output_dir, output_name)
        
        print(f"\nProcessing video {i}/{total_videos}: {video_name}")
        extract_keypoints(video_path, output_path)

if __name__ == "__main__":
    # Define paths
    base_dir = "/workspaces/asl_detection/machine_learning/datasets"
    video_dir = f"{base_dir}/test_extraction"
    output_dir = f"{base_dir} /test_keypoints"
    
    # Process all videos
    process_video_directory(video_dir, output_dir)


Found: 1 videos

Processing video 1/1: 69546_yes_72.mp4


W0000 00:00:1741649265.075285   24813 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1741649265.107605   24813 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1741649265.110720   24815 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1741649265.110880   24818 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1741649265.111003   24813 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1741649265.123197   24819 inference_feedback_manager.cc:114] Feedback manager 

Processed: /workspaces/asl_detection/machine_learning/datasets/test_extraction/69546_yes_72.mp4
  - Original frames: 72
  - After padding: 102
  - Saved to: /workspaces/asl_detection/machine_learning/datasets /test_keypoints/69546_yes_72_keypoints.npy


In [45]:
import cv2
import mediapipe as mp
import numpy as np
from pathlib import Path
from mediapipe.framework.formats import landmark_pb2

def visualize_keypoints(video_path: str, keypoints_path: str):
    # MediaPipe Holistic für die Visualisierung initialisieren
    mp_holistic = mp.solutions.holistic
    mp_drawing = mp.solutions.drawing_utils
    mp_drawing_styles = mp.solutions.drawing_styles

    # Video und Keypoints laden
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Fehler beim Öffnen des Videos")
        return

    # Keypoints laden
    keypoints = np.load(keypoints_path)
    print(f"Geladene Keypoints Shape: {keypoints.shape}")

    # Indizes für verschiedene Keypoint-Typen (ohne Gesicht)
    pose_indices = slice(0, 33*3)
    lh_indices = slice(33*3, (33+21)*3)
    rh_indices = slice((33+21)*3, (33+21+21)*3)

    frame_idx = 0
    while cap.isOpened() and frame_idx < len(keypoints):
        ret, frame = cap.read()
        if not ret:
            break

        # Keypoints für aktuellen Frame extrahieren
        current_keypoints = keypoints[frame_idx]
        
        # Keypoints in MediaPipe-Format umwandeln
        pose_landmarks = create_landmark_proto(current_keypoints[pose_indices], 33)
        left_hand_landmarks = create_landmark_proto(current_keypoints[lh_indices], 21)
        right_hand_landmarks = create_landmark_proto(current_keypoints[rh_indices], 21)

        # Landmarks zeichnen
        # Pose
        if pose_landmarks:
            mp_drawing.draw_landmarks(
                frame,
                pose_landmarks,
                mp_holistic.POSE_CONNECTIONS,
                landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
            )
        
        # Hände
        if left_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,
                left_hand_landmarks,
                mp_holistic.HAND_CONNECTIONS,
                landmark_drawing_spec=mp_drawing_styles.get_default_hand_landmarks_style()
            )
        if right_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,
                right_hand_landmarks,
                mp_holistic.HAND_CONNECTIONS,
                landmark_drawing_spec=mp_drawing_styles.get_default_hand_landmarks_style()
            )

        # Frame anzeigen
        cv2.imshow('Holistic Keypoints Visualization', frame)

        # Warte auf Tastendruck (1ms)
        # Mit 'q' beenden, mit Leertaste pausieren
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break
        elif key == ord(' '):  # Leertaste für Pause
            cv2.waitKey(0)  # Warte auf beliebige Taste zum Fortsetzen

        frame_idx += 1

    # Aufräumen
    cap.release()
    cv2.destroyAllWindows()

def create_landmark_proto(keypoints, num_landmarks):
    """Erstellt ein MediaPipe-Landmark-Protobuf aus den Keypoints"""
    if len(keypoints) != num_landmarks * 3:
        return None
    
    landmarks = landmark_pb2.NormalizedLandmarkList()
    for i in range(num_landmarks):
        landmark = landmarks.landmark.add()
        landmark.x = keypoints[i*3]
        landmark.y = keypoints[i*3 + 1]
        landmark.z = keypoints[i*3 + 2]
    return landmarks

if __name__ == "__main__":
    # Pfade definieren
    video_path = "/workspaces/asl_detection/machine_learning/datasets/test_extraction/69546_yes_72.mp4"
    keypoints_path = "/workspaces/asl_detection/machine_learning/datasets /test_keypoints/69546_yes_72_keypoints.npy"
    
    # Keypoints visualisieren
    visualize_keypoints(video_path, keypoints_path) 

Geladene Keypoints Shape: (102, 225)
