In [3]:
import os
import pathlib
import numpy as np
import cv2
from tensorflow import keras
import tensorflow as tf
from keras import layers, preprocessing, callbacks, losses, utils, models, regularizers
import sys
import PyQt5
from PyQt5.QtWidgets import QApplication, QLabel, QWidget, QVBoxLayout, QHBoxLayout, QGridLayout,QFrame
from PyQt5.QtGui import QPixmap, QPainter, QPen, QColor
from PyQt5.QtCore import Qt
import time
import glob
from collections import defaultdict
import time
import mediapipe as mp

In [3]:
# Détection GPU
devices = tf.config.list_physical_devices()
gpu_devices = [d for d in devices if d.device_type == "GPU"]
if gpu_devices:
    print("GPU détecté :", gpu_devices)
else:
    print("Aucun GPU Apple détecté, utilisation du CPU.")

GPU détecté : [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [None]:
# =========================
# 1) Parsing des fichiers
# =========================

def load_sequence_txt(path):
    feats = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            # Détection automatique du séparateur
            if ";" in line:
                parts = [p for p in line.split(";") if p.strip() != ""]
            else:
                parts = [p for p in line.split() if p.strip() != ""]
            try:
                vec = list(map(float, parts))
            except ValueError:
                print(f"Ligne ignorée dans {path}: {line[:50]}...")
                continue
            feats.append(vec)
    X = np.array(feats, dtype=np.float32)
    return X

In [5]:
# =========================
# 2) Prétraitements
# =========================

def normalize_framewise(X):
    Xn = X.copy()
    mu = Xn.mean(axis=0, keepdims=True)      # (1, D)
    sigma = Xn.std(axis=0, keepdims=True) + 1e-8
    Xn = (Xn - mu) / sigma
    return Xn

def load_dataset_from_folder(seq_folder):
    sequences = sorted(glob.glob(os.path.join(seq_folder, "*.txt")))
    X_list, y_list = [], []
    label_to_id = {}
    next_id = 0

    for path in sequences:
        label = os.path.splitext(os.path.basename(path))[0]
        with open(path, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                parts = [p for p in line.split() if p.strip() != ""]
                try:
                    vec = list(map(float, parts))
                except ValueError:
                    continue
                X_list.append(vec)
                if label not in label_to_id:
                    label_to_id[label] = next_id
                    next_id += 1
                y_list.append(label_to_id[label])

    X = np.array(X_list, dtype=np.float32)
    y = np.array(y_list, dtype=np.int64)

    id_to_label = {v: k for k, v in label_to_id.items()}
    class_names = [id_to_label[i] for i in range(len(id_to_label))]

    print(f"Dataset chargé : {len(X)} exemples, {X.shape[1]} coordonnées, classes = {class_names}")
    return X, y, class_names

In [6]:
# =========================
# 4) Modèle (BatchNorm + Dropout + L2)
# =========================

def make_model(D, C):
    reg = regularizers.l2(1e-4)
    model = keras.Sequential([
        layers.Input(shape=(D,)),
        layers.BatchNormalization(),
        layers.Dense(256, activation='relu', kernel_regularizer=reg),
        layers.Dropout(0.3),
        layers.BatchNormalization(),
        layers.Dense(128, activation='relu', kernel_regularizer=reg),
        layers.Dropout(0.3),
        layers.Dense(C, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


In [7]:
# ================
# 5) Entraînement
# ================

# Dossier où se trouvent les fichiers *.txt
seq_folder = "/Users/valentindaveau/Documents/UE_Vision/Coords" 

# Construire le dataset
X, y, class_names = load_dataset_from_folder(seq_folder)
print("Dataset:", X.shape, y.shape, class_names)

train_val=0.8

# Split simple (train/val)
idx = np.arange(len(X))
np.random.shuffle(idx)
split = int(train_val * len(X))
tr, va = idx[:split], idx[split:]
Xtr, ytr = X[tr], y[tr]
Xva, yva = X[va], y[va]

# Normalisation globale basée sur le train
mu = Xtr.mean(axis=0, keepdims=True)
sigma = Xtr.std(axis=0, keepdims=True) + 1e-8
Xtr = (Xtr - mu) / sigma
Xva = (Xva - mu) / sigma
np.save("norm_mu.npy", mu)
np.save("norm_sigma.npy", sigma)
print("Normalisation globale appliquée (moyenne et écart-type sauvegardés).")

D = X.shape[1]
C = len(class_names)

model_path = "modele_gestes_20x32_80%_16g.keras"

if os.path.exists(model_path):
    print("Modèle trouvé, chargement du modèle sauvegardé...")
    model = keras.models.load_model(model_path)
    class_names = np.load("class_names.npy", allow_pickle=True).tolist()
    print("Noms de classes chargés :", class_names)
else:
    print("Aucun modèle trouvé, entraînement en cours...")
    model = make_model(D,C)
    start_time = time.time()
    history = model.fit(Xtr, ytr, validation_data=(Xva, yva), epochs=20, batch_size=32)
    end_time = time.time()
    training_time = end_time - start_time
    print(f"Temps d'entraînement : {training_time:.2f} secondes ({training_time/60:.2f} min)")
    model.save(model_path)
    print(f"Modèle sauvegardé sous : {model_path}")
    np.save("class_names.npy", class_names)
    print("Noms de classes sauvegardés dans class_names.npy")



def normalize_landmarks(X20: np.ndarray) -> np.ndarray:
    wrist = X20[0]
    Xc = X20 - wrist
    # distance de ref: WRIST (0) -> MIDDLE MCP (index MediaPipe 9, devenu X20[8] après mapping)
    scale = np.linalg.norm(Xc[8]) + 1e-8
    return Xc / scale

Dataset chargé : 452428 exemples, 63 coordonnées, classes = ['call', 'dislike', 'fist', 'four', 'like', 'mute', 'ok', 'one', 'peace', 'peace_inverted', 'rock', 'stop', 'stop_inverted', 'three', 'three3', 'thumb_index2']
Dataset: (452428, 63) (452428,) ['call', 'dislike', 'fist', 'four', 'like', 'mute', 'ok', 'one', 'peace', 'peace_inverted', 'rock', 'stop', 'stop_inverted', 'three', 'three3', 'thumb_index2']
Normalisation globale appliquée (moyenne et écart-type sauvegardés).
Aucun modèle trouvé, entraînement en cours...
Metal device set to: Apple M2

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB

Epoch 1/20


2025-11-10 20:11:27.348216: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Temps d'entraînement : 3376.71 secondes (56.28 min)
Modèle sauvegardé sous : modele_gestes_20x32_80%_16g.keras
Noms de classes sauvegardés dans class_names.npy


In [4]:
# =========================
# 6) Chargement et informations du modèle
# =========================

model = keras.models.load_model("modele_gestes_20x32_80%_16g.keras")
model.summary()
class_names = np.load("class_names.npy", allow_pickle=True).tolist()
mu = np.load("norm_mu.npy")
sigma = np.load("norm_sigma.npy")

class_names = np.load("class_names.npy", allow_pickle=True).tolist()
print("Classes connues par le modèle :", class_names)
print("Nombre total de classes :", len(class_names))

Metal device set to: Apple M2

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 batch_normalization (BatchN  (None, 63)               252       
 ormalization)                                                   
                                                                 
 dense (Dense)               (None, 256)               16384     
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 batch_normalization_1 (Batc  (None, 256)              1024      
 hNormalization)                                                 
                                                                 
 dense_1 (Dense)             (None, 128)               32896     
                                              

In [5]:
# =========================
# 7) Test à la caméra
# =========================

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

# Ouverture caméra
cap = None
for i in [0, 1]:
    cap = cv2.VideoCapture(i, cv2.CAP_AVFOUNDATION)
    if cap.isOpened():
        print(f"Caméra ouverte à l'index {i}")
        break
if cap is None or not cap.isOpened():
    raise RuntimeError("Aucune caméra utilisable détectée.")

cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
cap.set(cv2.CAP_PROP_FPS, 30)
time.sleep(1.0)

cv2.namedWindow("Reconnaissance de gestes", cv2.WINDOW_NORMAL)
cv2.resizeWindow("Reconnaissance de gestes", 900, 700)

last_pred_time = 0
pred_interval = 0.3
last_preds = None
last_top3_idx = None

print("Caméra prête. Appuie sur ESC pour quitter.")

while True:
    ret, frame = cap.read()
    if not ret:
        print("cap.read() a échoué.")
        break

    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)

    out = frame.copy()
    now = time.time()

    # --- Détection et prédiction ---
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(out, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            coords = np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark], dtype=np.float32).flatten()
            coords = (coords - mu.flatten()) / sigma.flatten()

            # Nouvelle prédiction une fois par seconde
            if now - last_pred_time > pred_interval:
                preds = model.predict(np.expand_dims(coords, axis=0), verbose=0)[0]
                top3_idx = preds.argsort()[-3:][::-1]
                last_preds = preds
                last_top3_idx = top3_idx
                last_pred_time = now

    # --- Affichage du dernier résultat connu (à chaque frame) ---
    if last_preds is not None and last_top3_idx is not None:
        for i, idx in enumerate(last_top3_idx):
            text = f"{class_names[idx]} : {last_preds[idx]*100:.1f}%"
            y_pos = 40 + i * 35
            color = (0, 255, 0) if i == 0 else (255, 255, 255)
            cv2.putText(out, text, (20, y_pos),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2, cv2.LINE_AA)

    # --- Affichage caméra ---
    cv2.imshow("Reconnaissance de gestes", out)
    if cv2.waitKey(5) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()

I0000 00:00:1762806266.913417 10660802 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 90.5), renderer: Apple M2
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1762806266.990963 10661283 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1762806267.006066 10661283 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Caméra ouverte à l'index 0
Caméra prête. Appuie sur ESC pour quitter.


W0000 00:00:1762806270.094705 10661286 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.
2025-11-10 21:24:30.240357: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
