In [1]:
# SCALER & TRAIN/TEST SPLIT POUR PLANK (FULL + KEYPOINTS)

import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pickle

In [2]:
FULL_DATASET = r"C:\Users\PORTABLE\Desktop\projet_annuel\core\plank_model\data\plank_dataset_full.csv"
KEY_DATASET  = r"C:\Users\PORTABLE\Desktop\projet_annuel\core\plank_model\data\plank_dataset_keypoints.csv"

OUT_DATA_DIR = r"C:\Users\PORTABLE\Desktop\projet_annuel\core\plank_model\data"
OUT_MODEL_DIR = r"C:\Users\PORTABLE\Desktop\projet_annuel\core\plank_model\model"

os.makedirs(OUT_DATA_DIR, exist_ok=True)
os.makedirs(OUT_MODEL_DIR, exist_ok=True)


df_full = pd.read_csv(FULL_DATASET)
df_key  = pd.read_csv(KEY_DATASET)

print("FULL:", df_full.shape)
print("KEY :", df_key.shape)

FULL: (3935, 133)
KEY : (3935, 69)


In [5]:
# 3) Split 80/20

def split_dataset(df):
    X = df.drop("label", axis=1)
    y = df["label"]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.20, random_state=42, stratify=y
    )

    # Reconcat pour sauvegarde
    train_df = pd.concat([y_train.reset_index(drop=True),
                          X_train.reset_index(drop=True)], axis=1)

    test_df  = pd.concat([y_test.reset_index(drop=True),
                          X_test.reset_index(drop=True)], axis=1)

    return train_df, test_df


train_full, test_full = split_dataset(df_full)
train_key,  test_key  = split_dataset(df_key)

print("\nFULL → Train/Test :", train_full.shape, test_full.shape)
print("KEY  → Train/Test :", train_key.shape, test_key.shape)



FULL → Train/Test : (3148, 133) (787, 133)
KEY  → Train/Test : (3148, 69) (787, 69)


In [6]:
# 4) Fit scalers

scaler_full = StandardScaler()
scaler_key  = StandardScaler()

# Fit uniquement sur TRAIN
scaler_full.fit(train_full.drop("label", axis=1))
scaler_key.fit(train_key.drop("label", axis=1))

# 5) Save scalers

with open(os.path.join(OUT_MODEL_DIR, "scaler_full.pkl"), "wb") as f:
    pickle.dump(scaler_full, f)

with open(os.path.join(OUT_MODEL_DIR, "scaler_keypoints.pkl"), "wb") as f:
    pickle.dump(scaler_key, f)

print("\nScalers sauvegardés !")


Scalers sauvegardés !


In [None]:
# 6) Sauvegarde des datasets

train_full.to_csv(os.path.join(OUT_DATA_DIR, "plank_train_full.csv"), index=False)
test_full.to_csv(os.path.join(OUT_DATA_DIR,  "plank_test_full.csv"), index=False)

train_key.to_csv(os.path.join(OUT_DATA_DIR,  "plank_train_keypoints.csv"), index=False)
test_key.to_csv(os.path.join(OUT_DATA_DIR,   "plank_test_keypoints.csv"), index=False)

print("Fichiers train/test sauvegardés dans :", OUT_DATA_DIR)

# 7) Résumé

print("\n=== RÉSUMÉ ===")
print("Scaler FULL       → scaler_full.pkl")
print("Scaler KEYPOINTS  → scaler_keypoints.pkl")
print("Train FULL        → plank_train_full.csv")
print("Test FULL         → plank_test_full.csv")
print("Train KEYPOINTS   → plank_train_keypoints.csv")
print("Test KEYPOINTS    → plank_test_keypoints.csv")

Fichiers train/test sauvegardés dans : C:\Users\caovi\OneDrive\Desktop\projet annuel\core\plank_model\data

=== RÉSUMÉ ===
Scaler FULL       → scaler_full.pkl
Scaler KEYPOINTS  → scaler_keypoints.pkl
Train FULL        → plank_train_full.csv
Test FULL         → plank_test_full.csv
Train KEYPOINTS   → plank_train_keypoints.csv
Test KEYPOINTS    → plank_test_keypoints.csv


: 