# **Program Untuk Pengambilan Data Video**

Program berbasis jupyter ini dibuat untuk menjalankan proses ekstraksi fitur data skeletal numerik dari Mediapipe yang diperoleh dari dataset video.

---

## Proses _import library_

In [1]:
import cv2 as opencv
import numpy
import pandas
import os
from pathlib import Path
import mediapipe

## Pengaturan Mediapipe Holistic

In [2]:
mediapipe_holistic = mediapipe.solutions.holistic
holistic = mediapipe_holistic.Holistic(
    static_image_mode=False,
    model_complexity=1,
    smooth_landmarks=True,
    enable_segmentation=False,
    refine_face_landmarks=False,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5,
)

# Initialize mediapipe detection settings
mediapipe_holistic = mediapipe.solutions.holistic
mediapipe_drawing = mediapipe.solutions.drawing_utils
mediapipe_drawing_styles = mediapipe.solutions.drawing_styles

## Inisialisasi _function_

### _Function_ penghitung jarak _euclidean_

In [3]:
def euclidean_distance(pointA, pointB):
    return ((pointA.x - pointB.x) ** 2 + (pointA.y - pointB.y) ** 2) ** 0.5

### _Function_ normalisasi landmark

In [4]:
def normalize_landmarks(landmarks, shoulder_center_point, shoulder_width):
    if not landmarks or shoulder_width == 0:
        return [(0, 0)] * len(landmarks)
    return [
        ((landmark_point.x - shoulder_center_point[0]) / shoulder_width,
         (landmark_point.y - shoulder_center_point[1]) / shoulder_width
        ) for landmark_point in landmarks
    ]

### _Function_ ekstraksi fitur _landmark_ dari frame melalui Mediapipe

In [None]:
def extract_landmarks_from_frame(frame):
    frame_rgb = opencv.cvtColor(frame, opencv.COLOR_BGR2RGB)
    detection_results = holistic.process(frame_rgb)
    if not detection_results.pose_landmarks:
        return None

    pose_landmark_1_to_16 = detection_results.pose_landmarks.landmark[:17]
    left_shoulder = detection_results.pose_landmarks.landmark[mediapipe_holistic.PoseLandmark.LEFT_SHOULDER]
    right_shoulder = detection_results.pose_landmarks.landmark[mediapipe_holistic.PoseLandmark.RIGHT_SHOULDER]
    
    shoulder_center_point = (
        (left_shoulder.x + right_shoulder.x) / 2,
        (left_shoulder.y + right_shoulder.y) / 2
    )
    
    shoulder_width = euclidean_distance(left_shoulder, right_shoulder)

    normalized_custom_pose = normalize_landmarks(
        pose_landmark_1_to_16, 
        shoulder_center_point, 
        shoulder_width
    )

    normalized_right_hand = normalize_landmarks(
        detection_results.right_hand_landmarks.landmark, 
        shoulder_center_point, 
        shoulder_width
    ) if detection_results.right_hand_landmarks else [(0, 0)] * 21

    normalized_left_hand = normalize_landmarks(
        detection_results.left_hand_landmarks.landmark,
        shoulder_center_point,
        shoulder_width
    ) if detection_results.left_hand_landmarks else [(0, 0)] * 21

    extracted_frame_landmarks = []
    for landmark_x, landmark_y in normalized_custom_pose + normalized_right_hand + normalized_left_hand:
        extracted_frame_landmarks.extend([landmark_x, landmark_y])

    return extracted_frame_landmarks

## Proses Utama

### Menentukan lokasi _folder input_ dan _folder output_

In [6]:
input_folder = Path("input_videos/")
output_folder = Path("feature_extraction_output")
output_folder.mkdir(exist_ok=True)

In [7]:
MAX_FRAMES = None  # No max limit for variable-length
SEQUENCE_COUNT = 0

In [8]:
for label_folder in input_folder.iterdir():
    if not label_folder.is_dir():
        continue
    label = label_folder.name

    for video_path in label_folder.glob("*.mp4"):
        video_file = opencv.VideoCapture(str(video_path))
        sequence = []

        while True:
            available, frame = video_file.read()

            if not available:
                break

            frame_data = extract_landmarks_from_frame(frame)
            if frame_data:
                sequence.append(frame_data)
            
        video_file.release()

        if not sequence:
            print(f"[INFO] Skipped (no pose): {video_path.name}")
            continue

        data_frame = pandas.DataFrame(sequence)
        output_file = output_folder / f"{label}_{SEQUENCE_COUNT:03}.csv"
        data_frame.to_csv(output_file, index=False, header=False)
        SEQUENCE_COUNT += 1
        print(f"[SUCCESS] Saved variable-length sequence: {output_file}")

print(f"\n[INFO] Total sequences saved: {SEQUENCE_COUNT}")

[SUCCESS] Saved variable-length sequence: feature_extraction_output\a_000.csv
[SUCCESS] Saved variable-length sequence: feature_extraction_output\b_001.csv
[SUCCESS] Saved variable-length sequence: feature_extraction_output\c_002.csv
[SUCCESS] Saved variable-length sequence: feature_extraction_output\d_003.csv
[SUCCESS] Saved variable-length sequence: feature_extraction_output\e_004.csv
[SUCCESS] Saved variable-length sequence: feature_extraction_output\f_005.csv
[SUCCESS] Saved variable-length sequence: feature_extraction_output\g_006.csv
[SUCCESS] Saved variable-length sequence: feature_extraction_output\h_007.csv
[SUCCESS] Saved variable-length sequence: feature_extraction_output\i_008.csv
[SUCCESS] Saved variable-length sequence: feature_extraction_output\j_009.csv
[SUCCESS] Saved variable-length sequence: feature_extraction_output\k_010.csv
[SUCCESS] Saved variable-length sequence: feature_extraction_output\l_011.csv
[SUCCESS] Saved variable-length sequence: feature_extraction_out