In [None]:
import numpy as np
import mediapipe as mp
import cv2
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import tensorflow as tf
from tensorflow import timestamp
from tensorflow.keras.models import load_model
import time


In [None]:
# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
# mp_face = mp.solutions.
hands = mp_hands.Hands(
                       max_num_hands=2,
                       min_detection_confidence=0.5,
                       min_tracking_confidence=0.5)
pose = mp_pose.Pose(
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

# Drawing utility
mp_draw = mp.solutions.drawing_utils

In [None]:
"""
* Extracting raw landmark data from a specified video, then writing it to a json file
* the idea is that we will have to replicate the functionality in java script so the client can
* send the json data to the server.
*
* note: might consider uploading the npz file instead but have to test for that
"""
import json

# Reading From a video file
cap = cv2.VideoCapture('videos/SamplePushups.mp4')

cv2.namedWindow('MediaPipe', cv2.WINDOW_NORMAL)
cv2.resizeWindow('MediaPipe', 960, 540)  # width, height

# Data storage
frames_data = []
frame_index = 0
start_time = time.time()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    try:
        # Convert to RGB format
        RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # --- Hands ---
        results = hands.process(RGB)

        frame_info = {
            "frame_index": frame_index,
            "timestamp": time.time() - start_time,
            "hands": {
                "Left": [{"x": 0, "y": 0, "z": 0}]*21,   # default zeros
                "Right": [{"x": 0, "y": 0, "z": 0}]*21
            },
            "pose": [{"x": 0, "y": 0, "z": 0}]*33  # default zeros
        }

        if results.multi_hand_landmarks:
            for i, hand_landmarks in enumerate(results.multi_hand_landmarks):
                handedness = results.multi_handedness[i].classification[0].label if results.multi_handedness else "Unknown"
                hand_data = [{"x": lm.x, "y": lm.y, "z": lm.z} for lm in hand_landmarks.landmark]
                frame_info["hands"][handedness] = hand_data
                mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

        # --- Pose ---
        pose_results = pose.process(RGB)
        if pose_results.pose_landmarks:
            pose_data = [{"x": lm.x, "y": lm.y, "z": lm.z} for lm in pose_results.pose_landmarks.landmark]
            frame_info["pose"] = pose_data
            mp_draw.draw_landmarks(frame, pose_results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        frames_data.append(frame_info)
        frame_index += 1

        cv2.imshow('MediaPipe', frame)
    except:
        break

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# Save all frames to JSON file
session_data = {
    "metadata": {
        "source": "video",
        "fps": cap.get(cv2.CAP_PROP_FPS) or 30,  # fallback if FPS=0
        "num_frames": len(frames_data)
    },
    "frames": frames_data
}

with open("output/raw/hand_landmarks_session.json", "w") as f:
    json.dump(session_data, f, indent=2)

print("Data saved to hand_landmarks_session.json")

In [None]:
import json
import numpy as np

# Load JSON file
with open("output/raw/hand_landmarks_session.json", "r") as f:
    session_data = json.load(f)

frames = session_data["frames"]
num_frames = len(frames)

# Each frame has:
# - left hand 21 × 3
# - right hand 21 × 3
# - pose 33 × 3
# Total features per frame: 21*3*2 + 33*3 = 225

all_landmarks = []

for frame in frames:
    frame_features = []

    # Hands
    hands_data = frame.get("hands", [])

    # Left hand
    left_hand = hands_data.get("left", [])
    if left_hand:
        for lm in left_hand["landmarks"]:
            frame_features += [lm["x"], lm["y"], lm["z"]]
    else:
        frame_features += [0.0] * 63  # 21*3 zeros if missing

    # Right hand
    right_hand = hands_data.get("right", [])
    if right_hand:
        for lm in right_hand["landmarks"]:
            frame_features += [lm["x"], lm["y"], lm["z"]]
    else:
        frame_features += [0.0] * 63

    # Pose
    pose_data = frame.get("pose", [])
    if pose_data:
        for lm in pose_data:
            frame_features += [lm["x"], lm["y"], lm["z"]]
    else:
        frame_features += [0.0] * 99  # 33*3 zeros if missing

    all_landmarks.append(frame_features)

# Convert to NumPy array
landmarks_array = np.array(all_landmarks, dtype=np.float32)

# Save as compressed npz
np.savez_compressed("output/compressed/landmarks.npz", landmarks=landmarks_array)

print(f"Saved {landmarks_array.shape} landmarks to landmarks.npz")


In [None]:
import numpy as np
import pandas as pd

def peek_landmarks_npz(npz_file):
    """
    Load a .npz file and return a pandas DataFrame for inspection.
    """
    data = np.load(npz_file)
    landmarks = data["landmarks"]

    # Generate column names
    columns = []

    # Left hand (21 × 3)
    for i in range(21):
        columns += [f"left_x{i}", f"left_y{i}", f"left_z{i}"]

    # Right hand (21 × 3)
    for i in range(21):
        columns += [f"right_x{i}", f"right_y{i}", f"right_z{i}"]

    # Pose (33 × 3)
    for i in range(33):
        columns += [f"pose_x{i}", f"pose_y{i}", f"pose_z{i}"]

    df = pd.DataFrame(landmarks, columns=columns)
    return df

# Example usage
df_peek = peek_landmarks_npz("output/compressed/landmarks.npz")
print(df_peek.head())


In [None]:
df_peek.describe()

In [None]:
df_peek.columns