# Introduction

This notebook is created for [Kaggle's Sign Langueage Classifier Competition](https://www.kaggle.com/competitions/asl-signs/code)

In this notebook was inspired by some other notebooks:

- We used the preprocessed tensorflow Dataset from [tfdataset-of-google-isl-recognition-data](https://www.kaggle.com/datasets/aapokossi/saved-tfdataset-of-google-isl-recognition-data).

- We train our model following the notebook. [Submission for variable length time-series model](https://www.kaggle.com/code/aapokossi/submission-for-variable-length-time-series-model) and [LSTM Baseline for Starters - Sign Language eeff0f](https://www.kaggle.com/code/stanptown/lstm-baseline-for-starters-sign-language-eeff0f). We did more careful feature engineering, and tried tunning the layers, other hyper-parameter and/or the epoch to increase the accuracy.


<a id="section-one"></a>
# Import Libraries and Set File Directories

In [1]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, optimizers

In [2]:
# set files directories
LANDMARK_FILES_DIR = "/kaggle/input/asl-signs/train_landmark_files"
TRAIN_FILE = "/kaggle/input/asl-signs/train.csv"

<a id="section-two"></a>
# Visualize data
 More detailed visualization including creating gif for a sign video with landmarks is in another notebook. 

In [3]:
# read the data, the type of the data
sample = pd.read_parquet("/kaggle/input/asl-signs/train_landmark_files/16069/100015657.parquet")
sample.head()

Unnamed: 0,frame,row_id,type,landmark_index,x,y,z
0,103,103-face-0,face,0,0.437886,0.437599,-0.051134
1,103,103-face-1,face,1,0.443258,0.392901,-0.067054
2,103,103-face-2,face,2,0.443997,0.409998,-0.04299
3,103,103-face-3,face,3,0.435256,0.362771,-0.039492
4,103,103-face-4,face,4,0.44378,0.381762,-0.068013


<a id="section-three"></a>
# Load Data

In [5]:
# Set constants and pick important landmarks
# larnmarks first try
# LANDMARK_IDX = [0,9,11,13,14,17,117,118,119,199,346,347,348] + list(range(468,543))

# better landmarks as features
NOSE=[
    1,2,98,327
]
LNOSE = [98]
RNOSE = [327]
LIP = [ 0, 
    61, 185, 40, 39, 37, 267, 269, 270, 409,
    291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
    78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
    95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
]
LLIP = [84,181,91,146,61,185,40,39,37,87,178,88,95,78,191,80,81,82]
RLIP = [314,405,321,375,291,409,270,269,267,317,402,318,324,308,415,310,311,312]

POSE = [500, 502, 504, 501, 503, 505, 512, 513]
LPOSE = [513,505,503,501]
RPOSE = [512,504,502,500]

REYE = [
    33, 7, 163, 144, 145, 153, 154, 155, 133,
    246, 161, 160, 159, 158, 157, 173,
]
LEYE = [
    263, 249, 390, 373, 374, 380, 381, 382, 362,
    466, 388, 387, 386, 385, 384, 398,
]

LHAND = np.arange(468, 489).tolist()
RHAND = np.arange(522, 543).tolist()

LANDMARK_IDX = LIP + LHAND + RHAND + NOSE + REYE + LEYE #+POSE

print(len(LANDMARK_IDX))
print(LANDMARK_IDX)

DATA_PATH = "/kaggle/input/saved-tfdataset-of-google-isl-recognition-data/GoogleISLDatasetBatched"
DS_CARDINALITY = 185
VAL_SIZE  = 20
N_SIGNS = 250
ROWS_PER_FRAME = 543

118
[0, 61, 185, 40, 39, 37, 267, 269, 270, 409, 291, 146, 91, 181, 84, 17, 314, 405, 321, 375, 78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 1, 2, 98, 327, 33, 7, 163, 144, 145, 153, 154, 155, 133, 246, 161, 160, 159, 158, 157, 173, 263, 249, 390, 373, 374, 380, 381, 382, 362, 466, 388, 387, 386, 385, 384, 398]


In [6]:
len([0,9,11,13,14,17,117,118,119,199,346,347,348] + list(range(468,543)))

88

To keep it simple, we will use the preprocessed [tf.Dataset](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) from [tfdataset-of-google-isl-recognition-data](https://www.kaggle.com/datasets/aapokossi/saved-tfdataset-of-google-isl-recognition-data).

In [7]:
def preprocess(ragged_batch, labels):
    ragged_batch = tf.gather(ragged_batch, LANDMARK_IDX, axis=2)
    ragged_batch = tf.where(tf.math.is_nan(ragged_batch), tf.zeros_like(ragged_batch), ragged_batch)
    return tf.concat([ragged_batch[...,i] for i in range(3)],-1), labels

dataset = tf.data.Dataset.load(DATA_PATH)
dataset = dataset.map(preprocess)
val_ds = dataset.take(VAL_SIZE).cache().prefetch(tf.data.AUTOTUNE)
train_ds = dataset.skip(VAL_SIZE).cache().shuffle(20).prefetch(tf.data.AUTOTUNE)

<a id="section-four"></a>
# Train Model

In [8]:
print(dataset)

<MapDataset element_spec=(RaggedTensorSpec(TensorShape([None, None, 354]), tf.float32, 1, tf.int64), TensorSpec(shape=(None, 1), dtype=tf.int64, name=None))>


Now let us get to the fun part, training the model!

In [9]:
# include early stopping and reducelr
def get_callbacks():
    return [
            tf.keras.callbacks.EarlyStopping(
            monitor="val_accuracy",
            patience=10,
            restore_best_weights=True
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor = "val_accuracy",
            factor = 0.5,
            patience = 3
        ),
    ]

# # Custom callback to update weight decay with learning rate
# class WeightDecayCallback(tf.keras.callbacks.Callback):
#     def __init__(self, wd_ratio=0.05):
#         self.step_counter = 0
#         self.wd_ratio = wd_ratio
    
#     def on_epoch_begin(self, epoch, logs=None):
#         model.optimizer.weight_decay = model.optimizer.learning_rate * self.wd_ratio
#         print(f'learning rate: {model.optimizer.learning_rate.numpy():.2e}, weight decay: {model.optimizer.weight_decay.numpy():.2e}')

# def get_callbacks():
#     return [
# #             tf.keras.callbacks.EarlyStopping(
# #             monitor="val_accuracy",
# #             patience=10,
# #             restore_best_weights=True
# #         ),
#         WeightDecayCallback(0.05),
# #         tf.keras.callbacks.ReduceLROnPlateau(
# #             monitor = "val_accuracy",
# #             factor = 0.5,
# #             patience = 3
# #         ),
#     ]

# a single dense block followed by a normalization block and relu activation
def dense_block(units, name):
    fc = layers.Dense(units)
    norm = layers.LayerNormalization()
    act = layers.Activation("gelu")
    return lambda x: act(norm(fc(x)))
def dense_b1(units, name):
    fc = layers.Dense(units,activation="gelu")
    return lambda x: fc(x)
def dense_b(units, name):
    fc = layers.Dense(units,activation="softmax")
    return lambda x: fc(x)
def classifier1(lstm_units):
#     lstm = layers.LSTM(lstm_units)
    lstm = layers.LSTM(lstm_units,return_sequences=True)
#     norm = layers.LayerNormalization()
#     act = layers.Activation("gelu")
#     out = layers.Dense(N_SIGNS, activation="softmax")
#     lstm = layers.LSTM(int(lstm_units/2))
#     out = layers.Dense(N_SIGNS, activation="softmax")
    return lambda x: lstm(x)
# the lstm block with the final dense block for the classification
def classifier(lstm_units):
#     lstm = layers.LSTM(lstm_units)
    lstm = layers.LSTM(lstm_units)
#     norm = layers.LayerNormalization()
#     act = layers.Activation("gelu")
#     out = layers.Dense(N_SIGNS, activation="softmax")
#     lstm = layers.LSTM(int(lstm_units/2))
#     out = layers.Dense(N_SIGNS, activation="softmax")
    return lambda x: lstm(x)

In [10]:
# choose the number of nodes per layer
encoder_units = [512,256] # tune this
lstm_units = 256 # tune this

#define the inputs (ragged batches of time series of landmark coordinates)
inputs = tf.keras.Input(shape=(None,3*len(LANDMARK_IDX)), ragged=True)

# dense encoder model
x = inputs
for i, n in enumerate(encoder_units):
    print(n)
    x = dense_block(n, f"encoder_{i}")(x)
# x= dense_b1(256,"encoder250")(x)    
x = layers.Dropout(0.4)(x)

# classifier model
x = classifier1(lstm_units)(x)
# print(x)

# x = tf.expand_dims(out, axis=0)
# x = classifier(lstm_units,"LSTMx")(x)
x = layers.Dropout(0.4)(x)
x = classifier(lstm_units)(x)
x = layers.Dropout(0.4)(x)
x= dense_b1(256,"encoder250")(x)
out = dense_b(250,"encoder250")(x)
# tensor_2 = tf.expand_dims(out, axis=1)
# tensor_2 = tf.expand_dims(tensor_2, axis=1)

# Tile the tensor along the new dimensions to get shape (None, None, 256)
# tensor_3 = tf.tile(tensor_2, multiples=[1, tf.shape(out)[1], 1, 1])
# print(tensor_3)
model = tf.keras.Model(inputs=inputs, outputs=out)
model.summary()

512
256
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, 354)]       0         
                                                                 
 dense (Dense)               (None, None, 512)         181760    
                                                                 
 layer_normalization (LayerN  (None, None, 512)        1024      
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, None, 512)         0         
                                                                 
 dense_1 (Dense)             (None, None, 256)         131328    
                                                                 
 layer_normalization_1 (Laye  (None, None, 256)        512       
 rNormalization)                                     

In [11]:
# add a decreasing learning rate scheduler to help convergence
steps_per_epoch = DS_CARDINALITY - VAL_SIZE
boundaries = [steps_per_epoch * n for n in [30,50,70]]
values = [1e-3,1e-4,1e-5,1e-6]
lr_sched = optimizers.schedules.PiecewiseConstantDecay(boundaries, values)
optimizer = optimizers.Adam(lr_sched)

model.compile(optimizer=optimizer,
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy","sparse_top_k_categorical_accuracy"])

In [12]:
# USE_VAL = False
# if 
# if 
# fit the model with 100 epochs iteration
model.fit(train_ds,
          validation_data = val_ds,
          callbacks = get_callbacks(),
          epochs = 150)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150


<keras.callbacks.History at 0x7d639c29e990>

Epoch 67/150
165/165 [==============================] - 61s 372ms/step - loss: 0.7207 - accuracy: 0.8041 - sparse_top_k_categorical_accuracy: 0.9363 - val_loss: 1.1879 - val_accuracy: 0.7181 - val_sparse_top_k_categorical_accuracy: 0.8932 - lr: 1.0000e-05

In [13]:
model.save('lstm.h5')

<a id="section-five"></a>
# Submit Model

Now it is time to submit. In this competition, we should submit the model itself.

In [14]:
model.summary(expand_nested=True)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, 354)]       0         
                                                                 
 dense (Dense)               (None, None, 512)         181760    
                                                                 
 layer_normalization (LayerN  (None, None, 512)        1024      
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, None, 512)         0         
                                                                 
 dense_1 (Dense)             (None, None, 256)         131328    
                                                                 
 layer_normalization_1 (Laye  (None, None, 256)        512       
 rNormalization)                                             

In [15]:
def get_inference_model(model):
    inputs = tf.keras.Input(shape=(ROWS_PER_FRAME,3), name="inputs")
    
    # drop most of the face mesh
    x = tf.gather(inputs, LANDMARK_IDX, axis=1)

    # fill nan
    x = tf.where(tf.math.is_nan(x), tf.zeros_like(x), x)

    # flatten landmark xyz coordinates ()
    x = tf.concat([x[...,i] for i in range(3)], -1)

    x = tf.expand_dims(x,0)
    
    # call trained model
    out = model(x)
    
    # explicitly name the final (identity) layer for the submission format
    out = layers.Activation("linear", name="outputs")(out)
    
    inference_model = tf.keras.Model(inputs=inputs, outputs=out)
    inference_model.compile(loss="sparse_categorical_crossentropy",
                            metrics="accuracy")
    return inference_model

In [16]:
inference_model = get_inference_model(model)
inference_model.summary(expand_nested=True)

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 inputs (InputLayer)            [(None, 543, 3)]     0           []                               
                                                                                                  
 tf.compat.v1.gather (TFOpLambd  (None, 118, 3)      0           ['inputs[0][0]']                 
 a)                                                                                               
                                                                                                  
 tf.math.is_nan (TFOpLambda)    (None, 118, 3)       0           ['tf.compat.v1.gather[0][0]']    
                                                                                                  
 tf.zeros_like (TFOpLambda)     (None, 118, 3)       0           ['tf.compat.v1.gather[0][0]

In [17]:
# save the model
converter = tf.lite.TFLiteConverter.from_keras_model(inference_model)
tflite_model = converter.convert()
model_path = "model.tflite"

# submit the model
with open(model_path, 'wb') as f:
    f.write(tflite_model)
!zip submission.zip $model_path

  adding: model.tflite (deflated 8%)
