## Imports

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import sklearn

import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation

from pathlib import Path
from IPython.display import Image, display, Video, HTML
from ipywidgets import interact, widgets

from signlens.params import *
from signlens.preprocessing import data, preprocess
from utils import plot_landmarks, model_utils

# reload automatically python functions outside notebook
%load_ext autoreload
%autoreload 2

2024-03-21 17:27:12.046145: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Fetch data

In [2]:
# variables loaded from .env
print(f"DATA_FRAC : {DATA_FRAC}")
print(f"NUM_CLASSES : {NUM_CLASSES}")
print(f"MAX_SEQ_LEN : {MAX_SEQ_LEN}")

DATA_FRAC : 0.5
NUM_CLASSES : 10
MAX_SEQ_LEN : 100


In [3]:
NUM_CLASSES = 10

In [4]:
sequences_data = data.load_data_subset_csv(noface=True, balanced=True, n_classes=NUM_CLASSES)

✅ File with frames already exists, loaded matching 'sequence_id' rows.
✅ Filtered on n_frames = 100. Size reduced from 94477 to 86168 (91.2%)
✅ Filtered on n_classes = 10. Size reduced from 86168 to 3491 (4.1%)
✅ Balanced data, with average of 174.5 elements per class. Size reduced from 3491 to 1745 (50.0%)
✅ Loaded 3491 rows (1.8% of the original 94477 rows) from the dataset.


In [5]:
sequences_data.sign.value_counts()

sign
go        175
chair     175
before    175
book      175
drink     175
who       174
no        174
fine      174
all       174
yes       174
Name: count, dtype: int64

In [6]:
from sklearn.model_selection import train_test_split

X_files = sequences_data.file_path
y = preprocess.label_dictionnary(sequences_data)

# Train test split
X_train_files, X_test_files, y_train, y_test = train_test_split(X_files, y, test_size=0.2, stratify=y)

# Train split into train and val
X_train_files, X_val_files, y_train, y_val = train_test_split(X_train_files, y_train, test_size=0.2, stratify=y_train)

In [7]:
X_train = preprocess.group_pad_sequences(X_train_files)
X_val = preprocess.group_pad_sequences(X_val_files)
X_test = preprocess.group_pad_sequences(X_test_files)


In [8]:
X_train=np.array([matrix.toarray().reshape(100, 75, 3) for matrix in X_train])
X_val=np.array([matrix.toarray().reshape(100, 75, 3) for matrix in X_val])
X_test=np.array([matrix.toarray().reshape(100, 75, 3) for matrix in X_test])

In [9]:
X_train.shape

(1116, 100, 75, 3)

In [10]:
y_train.shape

(1116, 10)

## Model

In [21]:
from tensorflow.keras import Model, Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import models, layers, regularizers

from tensorflow.keras.utils import to_categorical

In [32]:
normalizer = layers.Normalization(input_shape=(MAX_SEQ_LEN, N_LANDMARKS_NO_FACE, 3))
normalizer.adapt(X_train)


  super().__init__(**kwargs)


In [33]:
# 1- RNN Architecture
model = Sequential()
model.add(normalizer)
model.add(layers.Reshape((MAX_SEQ_LEN, N_LANDMARKS_NO_FACE*3), input_shape=(MAX_SEQ_LEN, N_LANDMARKS_NO_FACE, 3)))
model.add(layers.Masking(mask_value=0.0))

model.add(layers.SimpleRNN(units=40, activation='tanh', input_shape=(MAX_SEQ_LEN, N_LANDMARKS_NO_FACE * 3)))
model.add(layers.Dense(20, activation="relu"))
model.add(layers.Dense(20, activation="relu"))
model.add(layers.Dense(NUM_CLASSES, activation='softmax'))


y_cat = to_categorical(y)

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

model.summary()

In [40]:
# 1- RNN Architecture
model = Sequential()
model.add(layers.Reshape((MAX_SEQ_LEN, N_LANDMARKS_NO_FACE*3), input_shape=(MAX_SEQ_LEN, N_LANDMARKS_NO_FACE, 3)))
model.add(layers.Masking(mask_value=0.0))

# model.add(layers.SimpleRNN(units=40, activation='tanh', return_sequences=True))
# model.add(layers.GRU(units=20, activation='tanh', return_sequences=False))
# model.add(layers.Dense(10, activation='relu'))
# model.add(layers.Dropout(0.2))
# model.add(layers.Dense(10, activation='relu'))
# model.add(layers.Dropout(0.2))
# model.add(layers.Dense(NUM_CLASSES, activation='relu'))

model.add(layers.SimpleRNN(units=128, return_sequences=True))
model.add(layers.Dropout(0.3))
model.add(layers.LSTM(units=64))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(NUM_CLASSES, activation='relu'))



model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

model.summary()

In [None]:
# Fit model

es = EarlyStopping(patience=10, restore_best_weights=True)

history = model.fit(X_train, y_train,
                    validation_data=(X_val, y_val),
                    epochs=100,
                    batch_size=32, 
                    verbose=1, 
                    callbacks = [es] # This will call the Early Stopping Criterion for each epoch
                   )


Epoch 1/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 374ms/step - accuracy: 0.0879 - loss: 1.4733 - val_accuracy: 0.1036 - val_loss: 1.5483
Epoch 2/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 260ms/step - accuracy: 0.1071 - loss: 1.4202 - val_accuracy: 0.0893 - val_loss: 1.5475
Epoch 3/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 302ms/step - accuracy: 0.0879 - loss: 1.3677 - val_accuracy: 0.1000 - val_loss: 1.3636
Epoch 4/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 522ms/step - accuracy: 0.1151 - loss: 1.2049 - val_accuracy: 0.1000 - val_loss: 1.1286
Epoch 5/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 706ms/step - accuracy: 0.1157 - loss: 1.0713 - val_accuracy: 0.1000 - val_loss: 0.7327
Epoch 6/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 491ms/step - accuracy: 0.1119 - loss: 0.8693 - val_accuracy: 0.0964 - val_loss: 0.6569
Epoch 7/100
[1m3

In [None]:
model_utils.plot_history_interactive(history)