## Isolated Sign Language Recognition with DNN

In this notebook, I will create Isolated Sign Language Recognition model for [Google - Isolated Sign Language Recognition Competition](https://www.kaggle.com/competitions/asl-signs) using DNN.  This dataset's training records have different number of frames, in order to make it easy to start with and train faster, I will calcuate the mean frame for each training data. So this model will have input shape (n, 253, 3) and output shape (n, 250). During inference, it's very tricky. I will create a TFLite Model that can accept a single test file with input shape (None, 253, 3) and output shape (250,). 

This Model can get about 0.24 CV and 0.23 LB. In the beggining, the LB is 0. I guess that's because of missing value. I add code like following in the Neural network to replace missing value with 0, luckily it works.

```python
x = tf.where(tf.math.is_nan(inputs), tf.zeros_like(inputs), inputs)
```

## Configuration

In [None]:
class CFG:
    data_path = "../input/asl-signs/"
    quick_experiment = False
    is_training = True
    use_aggregation_dataset = True
    num_classes = 250
    rows_per_frame = 543 

## Import Library

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tqdm import tqdm
import json
import os
import gc
from sklearn.model_selection import train_test_split

## Utilities

In [None]:
def load_relevant_data_subset_with_imputation(pq_path):
    data_columns = ['x', 'y', 'z']
    data = pd.read_parquet(pq_path, columns=data_columns)
    data.replace(np.nan, 0, inplace=True)
    n_frames = int(len(data) / CFG.rows_per_frame)
    data = data.values.reshape(n_frames, CFG.rows_per_frame, len(data_columns))
    return data.astype(np.float32)

def load_relevant_data_subset(pq_path):
    data_columns = ['x', 'y', 'z']
    data = pd.read_parquet(pq_path, columns=data_columns)
    n_frames = int(len(data) / CFG.rows_per_frame)
    data = data.values.reshape(n_frames, CFG.rows_per_frame, len(data_columns))
    return data.astype(np.float32)

def read_dict(file_path):
    path = os.path.expanduser(file_path)
    with open(path, "r") as f:
        dic = json.load(f)
    return dic

## Load data

In [None]:
train = pd.read_csv(f"{CFG.data_path}train.csv")
train.head()

There are 21 participants. Each of them create about 3000 to 5000 training records.

In [None]:
train.participant_id.nunique()

In [None]:
train.participant_id.value_counts().plot(kind="bar")

There are 94477 training samples in total.

In [None]:
len(train)

There are 250 kinds of sign languages that we need to make prediction on. Each kind of sign languages contains about 300 to 400 samples.

In [None]:
label_index = read_dict(f"{CFG.data_path}sign_to_prediction_index_map.json")
index_label = dict([(label_index[key], key) for key in label_index])
print(label_index)
train["label"] = train["sign"].map(lambda sign: label_index[sign])
train.head()

In [None]:
train["sign"].value_counts()

## Modeling
I am still exploring how to handle this dataset. In order to make it easy to start with and train faster, I use mean frame as training input data.

In [None]:
if CFG.is_training:
    if CFG.use_aggregation_dataset == False:
        xs = []
        ys = []
        num_frames = np.zeros(len(train))
        for i in tqdm(range(len(train))):
            path = f"{CFG.data_path}{train.iloc[i].path}"
            data = load_relevant_data_subset_with_imputation(path)
            ## Mean Aggregation
            xs.append(np.mean(data, axis=0))
            ys.append(train.iloc[i].label)
            num_frames[i] = data.shape[0]
            if CFG.quick_experiment and i == 4999:
                break
        ## Save number of frames of each training sample for data analysis
        train["num_frames"] = num_frames
        X = np.array(xs)
        y = np.array(ys)
        print(train["num_frames"].describe())
        train.to_csv("train.csv", index=False)
    else:
        X = np.load("/kaggle/input/isolated-sign-language-aggregation-dataset/X.npy")
        y = np.load("/kaggle/input/isolated-sign-language-aggregation-dataset/y.npy")
    print(X.shape, y.shape)

In [None]:
def get_model():
    inputs = tf.keras.Input((543, 3), dtype=tf.float32)
    vector = tf.keras.layers.Dense(128, activation="relu")(inputs)
    vector = tf.keras.layers.Dense(64, activation="relu")(vector)
    vector = tf.keras.layers.Dense(32, activation="relu")(vector)
    vector = tf.keras.layers.Dense(16, activation="relu")(vector)
    vector = tf.keras.layers.Flatten()(vector)
    output = tf.keras.layers.Dense(250, activation="softmax")(vector)
    model = tf.keras.Model(inputs=inputs, outputs=output)
    model.compile(
        loss=tf.keras.losses.SparseCategoricalCrossentropy(), 
        metrics=[
            "accuracy", 
            tf.keras.metrics.SparseTopKCategoricalAccuracy(k=10)
        ]
    )
    return model

In [None]:
if CFG.is_training:
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
    print(X_train.shape, y_train.shape, X_val.shape, y_val.shape)
    del X, y
    gc.collect()
    model = get_model()
    callbacks = [tf.keras.callbacks.ModelCheckpoint("model.h5")]
    model.fit(X_train, y_train, epochs=30, validation_data=(X_val, y_val), batch_size=128, callbacks=callbacks)
else:
    model = tf.keras.models.load_model("/kaggle/input/sign-language-prediction-model/model.h5")
model.summary()

## Create Model for inference

In [None]:
def get_inference_model(model):
    inputs = tf.keras.Input((543, 3), dtype=tf.float32, name="inputs")
    x = tf.where(tf.math.is_nan(inputs), tf.zeros_like(inputs), inputs)
    x = tf.reduce_mean(x, axis=0, keepdims=True)
    for i in range(1, len(model.layers)):
        x = model.layers[i](x)
    output = tf.keras.layers.Activation(activation="linear", name="outputs")(x)
    inference_model = tf.keras.Model(inputs=inputs, outputs=output) 
    inference_model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=["accuracy"])
    return inference_model

In [None]:
inference_model = get_inference_model(model)
inference_model.summary()

## Create submission file

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(inference_model)
tflite_model = converter.convert()
model_path = "model.tflite"
# Save the model.
with open(model_path, 'wb') as f:
    f.write(tflite_model)

In [None]:
!zip submission.zip $model_path

## Making Prediction

In [None]:
!pip install tflite-runtime

The performance is not optimal so far. However it can make correct prediction sometimes.

In [None]:
import tflite_runtime.interpreter as tflite
interpreter = tflite.Interpreter(model_path)
found_signatures = list(interpreter.get_signature_list().keys())
prediction_fn = interpreter.get_signature_runner("serving_default")
for i in range(100):
    frames = load_relevant_data_subset(f'/kaggle/input/asl-signs/{train.iloc[i].path}')
    output = prediction_fn(inputs=frames)
    sign = np.argmax(output["outputs"])
    print(f"Predicted label: {index_label[sign]}, Actual Label: {train.iloc[i].sign}")