# Exercice : Langage des signes

## Création du learning et validation dataset

In [None]:
import numpy as np
import pandas as pd
from utils.data_processing import load_dataframe_from_file, store_data

def flatten_row(row):
    input_flat = row['inputs'].flatten().tolist()
    label_flat = row['label'].tolist()
    return input_flat + label_flat

# DataFrame de base
df = load_dataframe_from_file('../datasets/LangageDesSignes/data_formatted.csv', 5)

# Creation du learning dataset
df['class'] = df['label'].apply(lambda x: int(np.argmax(x)))

df_learning = (
    df.groupby('class')
      .apply(lambda x: x.sample(n=50, random_state=42))
      .reset_index(drop=True)
)

df_learning = df_learning.drop(columns=['class'])
df_learning['inputs'] = df_learning['inputs'].apply(lambda x: x[1:] if isinstance(x, np.ndarray) else x)
# print(df_learning)

df_learning_good = pd.DataFrame(df_learning.apply(flatten_row, axis=1).tolist())
df_learning_good.to_csv('../datasets/LangageDesSignes/learning_dataset.csv', index=False, sep=",", header=False)


# Creation du validation dataset sans la colonne label
df['class'] = df['label'].apply(lambda x: int(np.argmax(x)))

selected_indices = df.groupby('class').apply(lambda x: x.sample(n=50, random_state=42)).index.get_level_values(1)

df_remaining = df.drop(index=selected_indices)

df_validation = df_remaining[['inputs']].reset_index(drop=True)
df_validation['inputs'] = df_validation['inputs'].apply(lambda x: x[1:] if isinstance(x, np.ndarray) else x)
# print(df_validation)

df_validation_good = pd.DataFrame(df_validation['inputs'].apply(lambda x: x.flatten().tolist()).tolist())
df_validation_good.to_csv('../datasets/LangageDesSignes/validation_dataset.csv', index=False, sep=",", header=False)

test_learn = load_dataframe_from_file("../datasets/LangageDesSignes/learning_dataset.csv",5)
test_valid = load_dataframe_from_file("../datasets/LangageDesSignes/validation_dataset.csv",0)
print(test_learn)
print(test_valid)

## Apprentissage du réseau

In [10]:
import numpy as np
from multilayer.layer import Layer
from multilayer.model import Model
from utils.data_processing import load_dataframe_from_file

model = Model([
    Layer(units=10, activation="sigmoid", input_size=42),
    Layer(units=5, activation="sigmoid"),
])

training_df = load_dataframe_from_file("../datasets/LangageDesSignes/learning_dataset.csv", 5)
x_train = np.array([x[1:] for x in training_df["inputs"]], dtype=float)
y_train = np.stack(training_df["label"].values)

 # Je vous épargne l'output, on se limite au cinq premiers. ;-)
print(f"x_train =\n {x_train[0:5]}")
print(f"y_train =\n {y_train[0:5]}")

history = model.fit(x_train, y_train, 0.5, 0.001, 2000)

for case in x_train[0:5]:
    answer = model.forward(case)
    print(f"answer_predicted: {answer.round()} | answer_real: TO DO")

x_train =
 [[ 0.          0.         -0.33170732  0.17605634 -0.58536585  0.4084507
  -0.80487805  0.50704225 -1.          0.47887324 -0.38536585  0.95070423
  -0.59512195  0.94366197 -0.50731707  0.64788732 -0.4195122   0.66901408
  -0.2097561   1.         -0.43414634  0.88732394 -0.36585366  0.57042254
  -0.28780488  0.66197183 -0.04878049  0.95070423 -0.24390244  0.88732394
  -0.21463415  0.58450704 -0.13170732  0.66197183  0.09268293  0.85915493
  -0.04390244  0.83802817 -0.08292683  0.62676056 -0.0097561   0.64788732]
 [ 0.          0.         -0.33854167  0.18243243 -0.58854167  0.4527027
  -0.79166667  0.56081081 -1.          0.55405405 -0.375       0.9527027
  -0.61979167  0.95945946 -0.5625      0.66216216 -0.46875     0.64189189
  -0.203125    1.         -0.46875     0.88513514 -0.41145833  0.54054054
  -0.30729167  0.58783784 -0.03125     0.93918919 -0.28125     0.84459459
  -0.25520833  0.52702703 -0.14583333  0.57432432  0.11458333  0.82432432
  -0.07291667  0.81756757 -0.