In [1]:
import mediapipe as mp
import cv2
import numpy as np
import pandas as pd
import os
from glob import glob
from tqdm import tqdm

# Suppression des warnings
import warnings
warnings.filterwarnings('ignore')

# Drawing helpers
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

# Dossiers vidéos
GOOD_DIR = r"C:\Users\caovi\OneDrive\Desktop\projet annuel\data\datasets\plank\good"
BAD_DIR  = r"C:\Users\caovi\OneDrive\Desktop\projet annuel\data\datasets\plank\bad"

# Chemins de sortie
OUT_FULL_CSV = r"C:\Users\caovi\OneDrive\Desktop\projet annuel\core\plank_model\data\plank_dataset_full.csv"
OUT_KEYPOINTS_CSV = r"C:\Users\caovi\OneDrive\Desktop\projet annuel\core\plank_model\data\plank_dataset_keypoints.csv"

# Downsample (1 frame sur n)
DOWNSAMPLE = 3


In [2]:
# ## 2. Définition des landmarks importants

IMPORTANT_LMS = [
    "NOSE",
    "LEFT_SHOULDER",
    "RIGHT_SHOULDER",
    "LEFT_ELBOW",
    "RIGHT_ELBOW",
    "LEFT_WRIST",
    "RIGHT_WRIST",
    "LEFT_HIP",
    "RIGHT_HIP",
    "LEFT_KNEE",
    "RIGHT_KNEE",
    "LEFT_ANKLE",
    "RIGHT_ANKLE",
    "LEFT_HEEL",
    "RIGHT_HEEL",
    "LEFT_FOOT_INDEX",
    "RIGHT_FOOT_INDEX",
]

# Colonnes des CSV
HEADERS_KEYPOINTS = ["label"]
for lm in IMPORTANT_LMS:
    HEADERS_KEYPOINTS += [f"{lm.lower()}_x", f"{lm.lower()}_y", f"{lm.lower()}_z", f"{lm.lower()}_v"]

HEADERS_FULL = ["label"]
for i in range(33):
    HEADERS_FULL += [f"x{i}", f"y{i}", f"z{i}", f"v{i}"]

In [3]:
# ## 3. Fonction pour extraire les landmarks d'une vidéo
def extract_landmarks_from_video(video_path, label, downsample=3):
    """
    Extrait les landmarks full et keypoints depuis une vidéo
    Retourne deux listes de lignes : full, keypoints
    """
    cap = cv2.VideoCapture(video_path)
    pose = mp_pose.Pose(static_image_mode=False)
    
    full_rows = []
    keypoint_rows = []
    frame_id = 0
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_id += 1
        if frame_id % downsample != 0:
            continue
        
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = pose.process(rgb)
        
        if not results.pose_landmarks:
            continue
        
        lm = results.pose_landmarks.landmark
        
        # --- Full landmarks ---
        full_row = [label]
        for p in lm:
            full_row.extend([p.x, p.y, p.z, p.visibility])
        full_rows.append(full_row)
        
        # --- Keypoints importants ---
        key_row = [label]
        for lm_name in IMPORTANT_LMS:
            kp = lm[mp_pose.PoseLandmark[lm_name].value]
            key_row.extend([kp.x, kp.y, kp.z, kp.visibility])
        keypoint_rows.append(key_row)
    
    cap.release()
    pose.close()
    
    return full_rows, keypoint_rows


In [4]:
# ## 4. Charger toutes les vidéos et extraire les données

good_videos = glob(os.path.join(GOOD_DIR, "*.mp4"))
bad_videos  = glob(os.path.join(BAD_DIR, "*.mp4"))

print("GOOD videos :", len(good_videos))
print("BAD  videos :", len(bad_videos))

full_data = []
keypoint_data = []

# --- GOOD ---
for v in tqdm(good_videos, desc="Processing GOOD"):
    try:
        f_rows, k_rows = extract_landmarks_from_video(v, 0, downsample=DOWNSAMPLE)
        full_data.extend(f_rows)
        keypoint_data.extend(k_rows)
    except:
        print("Error on:", v)

# --- BAD ---
for v in tqdm(bad_videos, desc="Processing BAD"):
    try:
        f_rows, k_rows = extract_landmarks_from_video(v, 1, downsample=DOWNSAMPLE)
        full_data.extend(f_rows)
        keypoint_data.extend(k_rows)
    except:
        print("Error on:", v)

print("Total frames (full)      :", len(full_data))
print("Total frames (keypoints) :", len(keypoint_data))


GOOD videos : 14
BAD  videos : 14


Processing GOOD: 100%|██████████| 14/14 [02:47<00:00, 11.96s/it]
Processing BAD: 100%|██████████| 14/14 [01:34<00:00,  6.71s/it]

Total frames (full)      : 3935
Total frames (keypoints) : 3935





In [5]:
# ## 5. Créer les DataFrames & Sauvegarde CSV

os.makedirs(os.path.dirname(OUT_FULL_CSV), exist_ok=True)

df_full = pd.DataFrame(full_data, columns=HEADERS_FULL)
df_full.to_csv(OUT_FULL_CSV, index=False)
print("Dataset FULL sauvegardé →", OUT_FULL_CSV)

df_key = pd.DataFrame(keypoint_data, columns=HEADERS_KEYPOINTS)
df_key.to_csv(OUT_KEYPOINTS_CSV, index=False)
print("Dataset KEYPOINTS sauvegardé →", OUT_KEYPOINTS_CSV)


Dataset FULL sauvegardé → C:\Users\caovi\OneDrive\Desktop\projet annuel\core\plank_model\data\plank_dataset_full.csv
Dataset KEYPOINTS sauvegardé → C:\Users\caovi\OneDrive\Desktop\projet annuel\core\plank_model\data\plank_dataset_keypoints.csv


In [7]:
# ## 6. Vérification rapide

print("FULL dataset shape :", df_full.shape)
print("Labels distribution (full) :\n", df_full['label'].value_counts())

print("\nKEYPOINTS dataset shape :", df_key.shape)
print("Labels distribution (keypoints) :\n", df_key['label'].value_counts())


FULL dataset shape : (3935, 133)
Labels distribution (full) :
 label
0    2681
1    1254
Name: count, dtype: int64

KEYPOINTS dataset shape : (3935, 69)
Labels distribution (keypoints) :
 label
0    2681
1    1254
Name: count, dtype: int64
