# Mediapipe Holistic Keypoints Extraction

In [None]:
import os
import json
import cv2
import time
import random
import mediapipe as mp

In [None]:
# Initialize Mediapipe Holistic
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [None]:
def process_video(video_path):
    keypoints_all = []
    cap = cv2.VideoCapture(video_path)
    with mp_holistic.Holistic(static_image_mode=False,
                              min_detection_confidence=0.5,
                              min_tracking_confidence=0.5) as holistic:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = holistic.process(image)
            frame_keypoints = {}

            # Pose keypoints
            if results.pose_landmarks:
                frame_keypoints['pose'] = [
                    [lm.x, lm.y, lm.z, lm.visibility] for lm in results.pose_landmarks.landmark
                ]
            else:
                frame_keypoints['pose'] = None

            # Face keypoints
            if results.face_landmarks:
                frame_keypoints['face'] = [
                    [lm.x, lm.y, lm.z] for lm in results.face_landmarks.landmark
                ]
            else:
                frame_keypoints['face'] = None

            # Left hand keypoints
            if results.left_hand_landmarks:
                frame_keypoints['left_hand'] = [
                    [lm.x, lm.y, lm.z] for lm in results.left_hand_landmarks.landmark
                ]
            else:
                frame_keypoints['left_hand'] = None

            # Right hand keypoints
            if results.right_hand_landmarks:
                frame_keypoints['right_hand'] = [
                    [lm.x, lm.y, lm.z] for lm in results.right_hand_landmarks.landmark
                ]
            else:
                frame_keypoints['right_hand'] = None

            keypoints_all.append(frame_keypoints)
            time.sleep(random.uniform(0.01, 0.02))
    cap.release()
    return keypoints_all

In [None]:
base_path = '/home/haggenmueller/asl_detection'
labels_file = f'{base_path}/machine_learning/datasets/wlasl/WLASL_v0.3.json'
videos_folder = f'{base_path}/machine_learning/datasets/wlasl/raw_videos'
output_folder = f'{base_path}/machine_learning/datasets/wlasl/keypoints'

In [None]:
if not os.path.exists(videos_folder):
    print(f"Videos folder '{videos_folder}' does not exist!")
else:
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Load labels
    with open(labels_file, 'r') as f:
        data_labels = json.load(f)

    # Process each entry in the label JSON
    for entry in data_labels:
        gloss = entry.get('gloss')
        instances = entry.get('instances', [])
        for inst in instances:
            video_id = inst.get('video_id')
            # Local filename: video_id.mp4
            video_filename = f"{video_id}.mp4"
            video_path = os.path.join(videos_folder, video_filename)
            if not os.path.exists(video_path):
                print(f"Video {video_filename} not found.")
                continue

            output_path = os.path.join(output_folder, f"{gloss}_{video_id}.json")
            if os.path.exists(output_path):
                print(f"JSON for {video_filename} already exists. Skipping.")
                continue

            print(f"Processing {video_filename}")
            keypoints = process_video(video_path)

            output_data = {
                "gloss": gloss,
                "video_id": video_id,
                "keypoints": keypoints
            }
            with open(output_path, 'w') as f:
                json.dump(output_data, f)
            print(f"Saved keypoints to {output_path}")