In [None]:
import pandas as pd
import numpy as np
import seaborn as sns

from collections import defaultdict
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Masking
from tensorflow.keras.utils import to_categorical


# 1. Charger les données CSV
df = pd.read_csv("Standardize.csv", header=None)
df.columns = ['user', 'gesture', 'repetition', 'x', 'y', 'z', 't']

# 2. Regrouper par (user, gesture, repetition)
grouped = df.groupby(['user', 'gesture', 'repetition'])
with open("Standardize.csv", 'r') as file:
    for line in file:
        pass
    
sequences = {}
gesture_groups = []
gesture_group = []
for i in range(10):

    for (user, gesture, repetition), group in grouped:
        if gesture != "gesture":
            if i == int(gesture):
                gesture_group.append(group)
    gesture_groups.append(gesture_group)
    gesture_group=[]


for i in range(10):  # 10 gestes
    for j in range(len(gesture_groups[i])):  # nombre de répétitions
        sequence = []
        for row in gesture_groups[i][j].itertuples(index=False):
            sequence.append([row.user, row.gesture, row.repetition, row.x, row.y, row.z, row.t])
        gesture_groups[i][j] = sequence  # remplacement du DataFrame par la liste de lignes

print(len(gesture_groups[1][1]))

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Masking, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt

# Étape 1 — Trouver la longueur maximale globale
max_len = max(len(seq) for gesture in gesture_groups for seq in gesture)

X = []
y = []

# Étape 2 — Interpolation + extraction des features x,y,z,t
for gesture_id in range(10):
    for sequence in gesture_groups[gesture_id]:
        seq_array = np.array(sequence)
        original_len = seq_array.shape[0]

        original_indices = np.linspace(0, 1, original_len)
        target_indices = np.linspace(0, 1, max_len)

        interpolated_features = []
        for col in range(3, 7):  # x, y, z, t
            interpolated_col = np.interp(target_indices, original_indices, seq_array[:, col].astype(float))
            interpolated_features.append(interpolated_col)

        interpolated_seq = np.stack(interpolated_features, axis=1)  # shape: (max_len, 4)
        X.append(interpolated_seq)
        y.append(gesture_id)

X = np.array(X)  # shape: (n_samples, max_len, 4)
y = np.array(y)

# Étape 3 — Standardisation des données x,y,z,t (individuellement par colonne)
# Flatten, scale, reshape
X_reshaped = X.reshape(-1, 4)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_reshaped)
X = X_scaled.reshape(X.shape)  # back to (n_samples, max_len, 4)

# Étape 4 — Encodage + Split
y = to_categorical(y, num_classes=10)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Construction du modèle amélioré
# Construction du modèle surpuissant
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(max_len, 4)))
model.add(LSTM(512, return_sequences=True))  # 🧠 très gros LSTM
model.add(Dropout(0.2))
model.add(LSTM(256, return_sequences=True))  # 🧠 deuxième gros LSTM
model.add(Dropout(0.2))
model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(10, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Entraînement sans EarlyStopping
history = model.fit(
    X_train, y_train,
    epochs=100,              # 🔥 100 epochs complets
    batch_size=16,
    validation_split=0.1,
    verbose=1
)

# Prédictions
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

# Matrice de confusion
cm = confusion_matrix(y_true_classes, y_pred_classes)
totals = cm.sum(axis=1)

for indexi, i in enumerate(cm):
    for indexj, j in enumerate(cm[indexi]):
        cm[indexi][indexj] = j / (totals[indexi]) * 100

disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(xticks_rotation=45, cmap="Blues")
plt.xlabel("Prédit")
plt.ylabel("Réel")
plt.title("Matrice de confusion en pourcentages (%) — modèle très puissant")
plt.show()

# Courbes d'apprentissage (Accuracy et Loss)
plt.figure(figsize=(12,5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy au cours des epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss au cours des epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()









