In [1]:
import json
import os

import cv2
import mediapipe as mp
import numpy as np

# MediaPipe Setup
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils


In [2]:
# Create base directories
def create_directories():
    """Creates necessary directories if they don't exist."""
    try:
        # Try to get the base directory using __file__ (works in scripts)
        base_dir = os.path.dirname(
            os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
        )
    except NameError:
        # Fallback for Jupyter notebooks
        base_dir = os.path.abspath(os.path.join(os.getcwd(), "../.."))

    # Create directory structure
    dataset_dir = os.path.join(base_dir, "datasets")
    own_dataset_dir = os.path.join(dataset_dir, "own_dataset")
    video_dir = os.path.join(own_dataset_dir, "videos")
    keypoints_dir = os.path.join(own_dataset_dir, "keypoints")

    # Create directories
    for directory in [dataset_dir, own_dataset_dir, video_dir, keypoints_dir]:
        os.makedirs(directory, exist_ok=True)
        print(f"Created directory: {directory}")

    return video_dir, keypoints_dir

In [3]:
def mediapipe_detection(image, model):
    """Detects landmarks in an image."""
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

In [4]:
def draw_styled_landmarks(image, results):
    """Draws landmarks on the image."""
    if results.face_landmarks:
        mp_drawing.draw_landmarks(
            image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION
        )
    if results.pose_landmarks:
        mp_drawing.draw_landmarks(
            image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS
        )
    if results.left_hand_landmarks:
        mp_drawing.draw_landmarks(
            image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS
        )
    if results.right_hand_landmarks:
        mp_drawing.draw_landmarks(
            image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS
        )

In [5]:
def extract_keypoints(results):
    """Extracts all keypoints (pose, face, hands)"""
    # Pose (33 points * 3 coordinates)
    pose = np.zeros(33 * 3)
    if results.pose_landmarks:
        for i, lm in enumerate(results.pose_landmarks.landmark):
            pose[i * 3] = lm.x
            pose[i * 3 + 1] = lm.y
            pose[i * 3 + 2] = lm.z

    # Face (468 points * 3 coordinates)
    face = np.zeros(468 * 3)
    if results.face_landmarks:
        for i, lm in enumerate(results.face_landmarks.landmark):
            face[i * 3] = lm.x
            face[i * 3 + 1] = lm.y
            face[i * 3 + 2] = lm.z

    # Left hand (21 points * 3 coordinates)
    lh = np.zeros(21 * 3)
    if results.left_hand_landmarks:
        for i, lm in enumerate(results.left_hand_landmarks.landmark):
            lh[i * 3] = lm.x
            lh[i * 3 + 1] = lm.y
            lh[i * 3 + 2] = lm.z

    # Right hand (21 points * 3 coordinates)
    rh = np.zeros(21 * 3)
    if results.right_hand_landmarks:
        for i, lm in enumerate(results.right_hand_landmarks.landmark):
            rh[i * 3] = lm.x
            rh[i * 3 + 1] = lm.y
            rh[i * 3 + 2] = lm.z

    return np.concatenate([pose, face, lh, rh])


In [6]:
# def process_video_to_keypoints(
#    video_path, output_path, action, flip=False, add_noise=False
#):
#    """Processes a single video to keypoints."""
#    cap = cv2.VideoCapture(video_path)
#    keypoints_sequence = []
#
 #   with mp_holistic.Holistic(min_detection_confidence=0.5) as holistic:
#        while cap.isOpened():
#            ret, frame = cap.read()
#            if not ret:
 #               break
#
 #           # Flip image if requested
  #          if flip:
   #             frame = cv2.flip(frame, 1)
#
 #           # Process frame
  #          image, results = mediapipe_detection(frame, holistic)
  #ä          keypoints = extract_keypoints(results)
#
 #           # Add noise if requested
#            if add_noise:
  #              noise = np.random.normal(0, 0.01, keypoints.shape)
 #               keypoints = keypoints + noise
#
 #           keypoints_sequence.append(keypoints.tolist())
#
 #   cap.release()
#
 #   # Save keypoints
  #  data = {"gloss": action, "keypoints": keypoints_sequence}
#
 #   try:
#        with open(output_path, "w") as f:
 #           json.dump(data, f)
  #  except Exception as e:
   #     print(f"Error saving {output_path}: {str(e)}")
#
#

In [7]:
def process_video_to_keypoints(
    video_path, output_path, action, flip=False, add_noise=False
):
    """Processes a single video to keypoints."""
    cap = cv2.VideoCapture(video_path)
    keypoints_sequence = []

    with mp_holistic.Holistic(min_detection_confidence=0.5) as holistic:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            # Flip image if requested
            if flip:
                frame = cv2.flip(frame, 1)

            # Process frame
            image, results = mediapipe_detection(frame, holistic)
            keypoints = extract_keypoints(results)

            # Add noise if requested
            if add_noise:
                noise = np.random.normal(0, 0.01, keypoints.shape)
                keypoints = keypoints + noise

            keypoints_sequence.append(keypoints.tolist())

    cap.release()
    cv2.destroyAllWindows()  # Ressourcen freigeben

    # Save keypoints
    data = {"gloss": action, "keypoints": keypoints_sequence}

    try:
        with open(output_path, "w") as f:
            json.dump(data, f)
    except Exception as e:
        print(f"Error saving {output_path}: {str(e)}")

In [8]:
def extract_action_from_filename(filename):
    """Extracts the action name from a filename."""
    # Entferne Dateiendung
    name = filename.replace(".mp4", "")

    # Entferne bekannte Zusätze
    for suffix in ["_dani", "_meiwand"]:
        name = name.replace(suffix.lower(), "").replace(suffix.upper(), "")

    # Entferne Zahlen am Anfang und Ende
    name = "".join([char for char in name if not char.isdigit()])

    # Teile den Namen an Unterstrichen
    parts = name.split("_")

    # Entferne leere Strings und führende/nachfolgende Leerzeichen
    parts = [part.strip() for part in parts if part.strip()]

    if not parts:
        return None

    # Verbinde die übrigen Teile wieder mit Unterstrichen
    return "_".join(parts)


In [None]:
def collect_data_from_videos():
    """Extracts keypoints from videos and saves them."""
    # Create directories and get paths
    video_directory, keypoints_directory = create_directories()

    # Check if video directory is empty
    video_files = [f for f in os.listdir(video_directory) if f.endswith(".mp4")]
    if not video_files:
        print(f"\nNo .mp4 files found in {video_directory}")
        print(
            "Please add video files in the format: Action_name1.mp4, Action_name2.mp4, etc."
        )
        return []

    # Extract unique actions
    actions = set()
    for file in video_files:
        action = extract_action_from_filename(file)
        if action:
            actions.add(action)

    actions = sorted(list(actions))

    print("\n=== Starting Video Processing ===")
    print(f"\nFound Actions: {actions}")

    # Process each action
    for action in actions:
        # Find all files containing this action name
        action_videos = [
            f for f in video_files if action in extract_action_from_filename(f) or ""
        ]
        print(f"\nAction '{action}':")
        print(f"- Number of videos: {len(action_videos)}")
        print(f"- Videos: {action_videos}")

        print(f"\nProcessing Action '{action}' ({len(action_videos)} videos)")

        for video_file in action_videos:
            base_name = video_file.replace(".mp4", "")
            video_path = os.path.join(video_directory, video_file)

            # Process original video
            output_path = os.path.join(keypoints_directory, f"{base_name}.json")
            if not os.path.exists(output_path):
                process_video_to_keypoints(video_path, output_path, action)

            # Process flipped video
            output_path = os.path.join(keypoints_directory, f"{base_name}_flipped.json")
            if not os.path.exists(output_path):
                process_video_to_keypoints(video_path, output_path, action, flip=True)

            # Process video with noise
            output_path = os.path.join(keypoints_directory, f"{base_name}_noisy.json")
            if not os.path.exists(output_path):
                process_video_to_keypoints(
                    video_path, output_path, action, add_noise=True
                )

    return actions


if __name__ == "__main__":
    actions = collect_data_from_videos()
    if actions:
        print(f"\nProcessed actions: {actions}")
    else:
        print("\nNo actions processed. Please add video files first.")