In [1]:
import mediapipe as mp
import cv2
import numpy as np
import pandas as pd
import os
from glob import glob
from tqdm import tqdm

# Suppression des warnings
import warnings
warnings.filterwarnings('ignore')

# Drawing helpers
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

# Dossiers vidéos
GOOD_DIR = r"C:\Users\caovi\OneDrive\Desktop\projet annuel\data\datasets\squat\good"
BAD_DIR  = r"C:\Users\caovi\OneDrive\Desktop\projet annuel\data\datasets\squat\bad"

# Chemins de sortie
#OUT_FULL_CSV = r"C:\Users\caovi\OneDrive\Desktop\projet annuel\core\squat_model\data\squat_dataset_full.csv"
#OUT_KEYPOINTS_CSV = r"C:\Users\caovi\OneDrive\Desktop\projet annuel\core\squat_model\data\squat_dataset_keypoints.csv"

VIDEO_INDEX_PATH = r"C:\Users\caovi\OneDrive\Desktop\projet annuel\core\squat_model\data\video_index_squat.csv"

OUT_TRAIN_FULL = r"C:\Users\caovi\OneDrive\Desktop\projet annuel\core\squat_model\data\squat_train_full.csv"
OUT_TEST_FULL  = r"C:\Users\caovi\OneDrive\Desktop\projet annuel\core\squat_model\data\squat_test_full.csv"

OUT_TRAIN_KEY = r"C:\Users\caovi\OneDrive\Desktop\projet annuel\core\squat_model\data\squat_train_keypoints.csv"
OUT_TEST_KEY  = r"C:\Users\caovi\OneDrive\Desktop\projet annuel\core\squat_model\data\squat_test_keypoints.csv"
# Downsample (1 frame sur n)
DOWNSAMPLE = 3


In [2]:
# ## 2. Définition des landmarks importants

IMPORTANT_LMS = [
    "NOSE",
    "LEFT_SHOULDER",
    "RIGHT_SHOULDER",
    "LEFT_ELBOW",
    "RIGHT_ELBOW",
    "LEFT_WRIST",
    "RIGHT_WRIST",
    "LEFT_HIP",
    "RIGHT_HIP",
    "LEFT_KNEE",
    "RIGHT_KNEE",
    "LEFT_ANKLE",
    "RIGHT_ANKLE",
    "LEFT_HEEL",
    "RIGHT_HEEL",
    "LEFT_FOOT_INDEX",
    "RIGHT_FOOT_INDEX",
]

# Colonnes des CSV
HEADERS_KEYPOINTS = ["label"]
for lm in IMPORTANT_LMS:
    HEADERS_KEYPOINTS += [f"{lm.lower()}_x", f"{lm.lower()}_y", f"{lm.lower()}_z", f"{lm.lower()}_v"]

HEADERS_FULL = ["label"]
for i in range(33):
    HEADERS_FULL += [f"x{i}", f"y{i}", f"z{i}", f"v{i}"]

In [3]:
# ## 3. Fonction pour extraire les landmarks d'une vidéo
def extract_landmarks_from_video(video_path, label, downsample=3):
    """
    Extrait les landmarks full et keypoints depuis une vidéo
    Retourne deux listes de lignes : full, keypoints
    """
    cap = cv2.VideoCapture(video_path)
    pose = mp_pose.Pose(static_image_mode=False)
    
    full_rows = []
    keypoint_rows = []
    frame_id = 0
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_id += 1
        if frame_id % downsample != 0:
            continue
        
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = pose.process(rgb)
        
        if not results.pose_landmarks:
            continue
        
        lm = results.pose_landmarks.landmark
        
        # --- Full landmarks ---
        full_row = [label]
        for p in lm:
            full_row.extend([p.x, p.y, p.z, p.visibility])
        full_rows.append(full_row)
        
        # --- Keypoints importants ---
        key_row = [label]
        for lm_name in IMPORTANT_LMS:
            kp = lm[mp_pose.PoseLandmark[lm_name].value]
            key_row.extend([kp.x, kp.y, kp.z, kp.visibility])
        keypoint_rows.append(key_row)
    
    cap.release()
    pose.close()
    
    return full_rows, keypoint_rows


In [4]:
# ## 4. Charger toutes les vidéos et extraire les données

good_videos = glob(os.path.join(GOOD_DIR, "*.mp4"))
bad_videos  = glob(os.path.join(BAD_DIR, "*.mp4"))

print("GOOD videos :", len(good_videos))
print("BAD  videos :", len(bad_videos))

rows = []

for v in glob(os.path.join(GOOD_DIR, "*.mp4")):
    rows.append({"video": os.path.basename(v), "label": 0})

for v in glob(os.path.join(BAD_DIR, "*.mp4")):
    rows.append({"video": os.path.basename(v), "label": 1})

df_index = pd.DataFrame(rows).sample(frac=1, random_state=42)

cut = int(len(df_index) * 0.7)
df_index["split"] = "test"
df_index.loc[df_index.index[:cut], "split"] = "train"

df_index.to_csv(VIDEO_INDEX_PATH, index=False)
df_index

train_full, test_full = [], []
train_key,  test_key  = [], []

for _, row in tqdm(df_index.iterrows(), total=len(df_index)):
    if row["label"] == 0:
        video_path = os.path.join(GOOD_DIR, row["video"])
    else:
        video_path = os.path.join(BAD_DIR, row["video"])

    f_rows, k_rows = extract_landmarks_from_video(video_path, row["label"], downsample=DOWNSAMPLE)

    if row["split"] == "train":
        train_full.extend(f_rows)
        train_key.extend(k_rows)
    else:
        test_full.extend(f_rows)
        test_key.extend(k_rows)

print("TRAIN frames full :", len(train_full))
print("TEST  frames full :", len(test_full))
print("TRAIN frames key  :", len(train_key))
print("TEST  frames key  :", len(test_key))

GOOD videos : 12
BAD  videos : 13


100%|██████████| 25/25 [06:45<00:00, 16.20s/it]

TRAIN frames full : 2942
TEST  frames full : 1511
TRAIN frames key  : 2942
TEST  frames key  : 1511





In [5]:
# ## 5. Créer les DataFrames & Sauvegarde CSV

os.makedirs(os.path.dirname(OUT_TRAIN_FULL), exist_ok=True)

pd.DataFrame(train_full, columns=HEADERS_FULL).to_csv(OUT_TRAIN_FULL, index=False)
pd.DataFrame(test_full,  columns=HEADERS_FULL).to_csv(OUT_TEST_FULL,  index=False)

pd.DataFrame(train_key, columns=HEADERS_KEYPOINTS).to_csv(OUT_TRAIN_KEY, index=False)
pd.DataFrame(test_key,  columns=HEADERS_KEYPOINTS).to_csv(OUT_TEST_KEY,  index=False)

print("FULL TRAIN →", OUT_TRAIN_FULL)
print("FULL TEST  →", OUT_TEST_FULL)
print("KEY  TRAIN →", OUT_TRAIN_KEY)
print("KEY  TEST  →", OUT_TEST_KEY)


FULL TRAIN → C:\Users\caovi\OneDrive\Desktop\projet annuel\core\squat_model\data\squat_train_full.csv
FULL TEST  → C:\Users\caovi\OneDrive\Desktop\projet annuel\core\squat_model\data\squat_test_full.csv
KEY  TRAIN → C:\Users\caovi\OneDrive\Desktop\projet annuel\core\squat_model\data\squat_train_keypoints.csv
KEY  TEST  → C:\Users\caovi\OneDrive\Desktop\projet annuel\core\squat_model\data\squat_test_keypoints.csv


In [6]:
print("TRAIN FULL :", len(train_full))
print("TEST FULL  :", len(test_full))

print("TRAIN KEY :", len(train_key))
print("TEST KEY  :", len(test_key))

print("\nLabel distrib TRAIN FULL :", pd.Series([r[0] for r in train_full]).value_counts())
print("Label distrib TEST FULL  :", pd.Series([r[0] for r in test_full]).value_counts())

print("\nLabel distrib TRAIN KEY :", pd.Series([r[0] for r in train_key]).value_counts())
print("Label distrib TEST KEY  :", pd.Series([r[0] for r in test_key]).value_counts())


TRAIN FULL : 2942
TEST FULL  : 1511
TRAIN KEY : 2942
TEST KEY  : 1511

Label distrib TRAIN FULL : 0    1695
1    1247
Name: count, dtype: int64
Label distrib TEST FULL  : 1    1036
0     475
Name: count, dtype: int64

Label distrib TRAIN KEY : 0    1695
1    1247
Name: count, dtype: int64
Label distrib TEST KEY  : 1    1036
0     475
Name: count, dtype: int64
