In [12]:
import mediapipe as mp
import cv2
import numpy as np
from tqdm import tqdm

In [8]:
video_path = "/Users/williamchalons/code/WiwiC/VERA/data/raw/myvideo.mp4"
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("❌ Error loading video")
else:
    print("✅ Video loaded")

✅ Video loaded


In [9]:
face_mesh = mp.solutions.face_mesh.FaceMesh(
    refine_landmarks=True,
    max_num_faces=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

cap = cv2.VideoCapture(video_path)
ret, frame = cap.read()

if not ret:
    print("❌ Could not read first frame")
else:
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(rgb)

    if results.multi_face_landmarks:
        print("✅ FaceMesh detected a face")
    else:
        print("❌ FaceMesh did NOT detect a face")

✅ FaceMesh detected a face


I0000 00:00:1764603611.206947       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 90.5), renderer: Apple M2 Pro
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [14]:
import pandas as pd

In [None]:
def extract_facemesh_landmarks(video_path, output_json):
    mp_face_mesh = mp.solutions.face_mesh
    face_mesh = mp_face_mesh.FaceMesh(
        refine_landmarks=True,
        max_num_faces=1,
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5
    )

    cap = cv2.VideoCapture(video_path)

    frame_landmarks = []
    frame_timestamps = []

    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    for frame_idx in tqdm(range(frame_count), desc="Processing video"):
        ret, frame = cap.read()
        if not ret:
            break

        timestamp = frame_idx / fps
        frame_timestamps.append(timestamp)

        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = face_mesh.process(rgb)

        if results.multi_face_landmarks:
            lm = results.multi_face_landmarks[0].landmark

            frame_landmarks.append([
                [p.x, p.y, p.z] for p in lm
            ])
        else:
            frame_landmarks.append(None)

    cap.release()

    output = {
        "video_path": video_path,
        "fps": fps,
        "timestamps": frame_timestamps,
        "landmarks": frame_landmarks
    }

    with open(output_json, "w") as f:
        json.dump(output, f)

    return output_json
