In [None]:
import cv2
import mediapipe as mp
import numpy as np
import os

In [None]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [None]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*3)
    left_hand = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    right_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, left_hand, right_hand])

In [None]:
def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    holistic = mp_holistic.Holistic()

    keypoints_seq = []
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = holistic.process(image)

        keypoints = extract_keypoints(results)
        keypoints_seq.append(keypoints)

    cap.release()
    cv2.destroyAllWindows()
    
    return np.array(keypoints_seq)

In [None]:
video_path = "/home/haggenmueller/asl_detection/machine_learning/datasets/wlasl/raw_videos"
features = process_video(video_path)

print("Shape der extrahierten Features:", features.shape)  # (Anzahl Frames, 99)