In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd


# Prints Shape and Dtype For List Of Variables
def print_shape_dtype(l, names):
    for e, n in zip(l, names):
        print(f'{n} shape: {e.shape}, dtype: {e.dtype}')

# Data loading

In [2]:
# model Training bools
USE_VAL = False
pretrained_wei_load = True
check_evalSet = False
testing_mode = False
save_weights = True

ROOT_DIR = '/kaggle/input/gislr-dataset-public'
    
# Load Data
if USE_VAL:
    # Load Train
    X_train = np.load(f'{ROOT_DIR}/X_train.npy')
    y_train = np.load(f'{ROOT_DIR}/y_train.npy')
    non_empty_frame_idxs_train = np.load(f'{ROOT_DIR}/NON_EMPTY_FRAME_IDXS_TRAIN.npy')
    # Load Val
    X_val = np.load(f'{ROOT_DIR}/X_val.npy')
    y_val = np.load(f'{ROOT_DIR}/y_val.npy')
    non_empty_frame_idxs_val = np.load(f'{ROOT_DIR}/NON_EMPTY_FRAME_IDXS_VAL.npy')
    # Define validation Data
    validation_data = ({ 'frames': X_val, 'non_empty_frame_idxs': non_empty_frame_idxs_val }, y_val)
else:
    X_train = np.load(f'{ROOT_DIR}/X.npy')
    y_train = np.load(f'{ROOT_DIR}/y.npy')
    non_empty_frame_idxs_train = np.load(f'{ROOT_DIR}/NON_EMPTY_FRAME_IDXS.npy')
    validation_data = None

# Dropping last dimension 
X_train = X_train[:,:,:,:2]

# Train 
print_shape_dtype([X_train, y_train, non_empty_frame_idxs_train], ['X_train', 'y_train', 'non_empty_frame_idxs_train'])
# Val
if USE_VAL:
    X_val = X_val[:,:,:,:2]
    print_shape_dtype([X_val, y_val, non_empty_frame_idxs_val], ['X_val', 'y_val', 'non_empty_frame_idxs_val'])
# Sanity Check
print(f'# NaN Values X_train: {np.isnan(X_train).sum()}')

X_train shape: (94477, 64, 66, 2), dtype: float32
y_train shape: (94477,), dtype: int32
non_empty_frame_idxs_train shape: (94477, 64), dtype: float32
# NaN Values X_train: 0


In [3]:
D = 2
V = 250
USE_TYPES = ['left_hand', 'pose', 'right_hand']
START_IDX = 468
LIPS_IDXS0 = np.array([
        61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
        291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
        78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
        95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
    ])
# Landmark indices in original data
LEFT_HAND_IDXS0 = np.arange(468,489)
RIGHT_HAND_IDXS0 = np.arange(522,543)
LEFT_POSE_IDXS0 = np.array([502, 504, 506, 508, 510])
RIGHT_POSE_IDXS0 = np.array([503, 505, 507, 509, 511])

# Columns which are kept 
LANDMARK_IDXS_LEFT_DOMINANT0 = np.concatenate((LIPS_IDXS0, LEFT_HAND_IDXS0, LEFT_POSE_IDXS0))
LANDMARK_IDXS_RIGHT_DOMINANT0 = np.concatenate((LIPS_IDXS0, RIGHT_HAND_IDXS0, RIGHT_POSE_IDXS0))
HAND_IDXS0 = np.concatenate((LEFT_HAND_IDXS0, RIGHT_HAND_IDXS0), axis=0)
N_COLS = LANDMARK_IDXS_LEFT_DOMINANT0.size
# Landmark indices in processed data
LIPS_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, LIPS_IDXS0)).squeeze()
LEFT_HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, LEFT_HAND_IDXS0)).squeeze()
RIGHT_HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, RIGHT_HAND_IDXS0)).squeeze()
HAND_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, HAND_IDXS0)).squeeze()
POSE_IDXS = np.argwhere(np.isin(LANDMARK_IDXS_LEFT_DOMINANT0, LEFT_POSE_IDXS0)).squeeze()

# tensors to be used as input for the model (might it be causing a conversion error because of the dtype?)
l_hand_idxs = tf.constant(HAND_IDXS, dtype=tf.int16)
r_hand_idxs = tf.constant(HAND_IDXS, dtype=tf.int16)
pose_idxs = tf.constant(POSE_IDXS, dtype=tf.int16)
lips_idxs = tf.constant(LIPS_IDXS, dtype=tf.int16)


print(f'# HAND_IDXS: {len(HAND_IDXS)}, N_COLS: {N_COLS}')

# HAND_IDXS: 21, N_COLS: 66


In [4]:
LIPS_START = 0
LEFT_HAND_START = LIPS_IDXS.size
RIGHT_HAND_START = LEFT_HAND_START + LEFT_HAND_IDXS.size
POSE_START = RIGHT_HAND_START + RIGHT_HAND_IDXS.size

print(f'LIPS_START: {LIPS_START}, LEFT_HAND_START: {LEFT_HAND_START}, RIGHT_HAND_START: {RIGHT_HAND_START}, POSE_START: {POSE_START}')

LIPS_START: 0, LEFT_HAND_START: 40, RIGHT_HAND_START: 61, POSE_START: 61


# Feature statistics

In [5]:
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

plot_metrics = False

def get_left_right_hand_mean_std():
    # LEFT HAND
    LEFT_HANDS_MEAN_X = np.zeros([LEFT_HAND_IDXS.size], dtype=np.float32)
    LEFT_HANDS_MEAN_Y = np.zeros([LEFT_HAND_IDXS.size], dtype=np.float32)
    LEFT_HANDS_STD_X = np.zeros([LEFT_HAND_IDXS.size], dtype=np.float32)
    LEFT_HANDS_STD_Y = np.zeros([LEFT_HAND_IDXS.size], dtype=np.float32)

    if plot_metrics: 
        fig, axes = plt.subplots(3, 1, figsize=(15, D*6))

    for col, ll in enumerate(tqdm( np.transpose(X_train[:,:,LEFT_HAND_IDXS], [2,3,0,1]).reshape([LEFT_HAND_IDXS.size, D, -1]) )):
        for dim, l in enumerate(ll):
            v = l[np.nonzero(l)]
            if dim == 0: # X
                LEFT_HANDS_MEAN_X[col] = v.mean()
                LEFT_HANDS_STD_X[col] = v.std()
            if dim == 1: # Y
                LEFT_HANDS_MEAN_Y[col] = v.mean()
                LEFT_HANDS_STD_Y[col] = v.std()
            # Plot
            if plot_metrics: 
                axes[dim].boxplot(v, notch=False, showfliers=False, positions=[col], whis=[5,95])

    if plot_metrics:
        for ax, dim_name in zip(axes, ["x","y"]):
            ax.set_title(f'Hands {dim_name.upper()} Dimension', size=24)
            ax.tick_params(axis='x', labelsize=8)
            ax.grid(axis='y')

        plt.subplots_adjust(hspace=0.50)
        plt.show()

    LEFT_HANDS_MEAN = np.array([LEFT_HANDS_MEAN_X, LEFT_HANDS_MEAN_Y]).T
    LEFT_HANDS_STD = np.array([LEFT_HANDS_STD_X, LEFT_HANDS_STD_Y]).T
    
    return LEFT_HANDS_MEAN, LEFT_HANDS_STD


def get_pose_mean_std():
    # POSE
    POSE_MEAN_X = np.zeros([POSE_IDXS.size], dtype=np.float32)
    POSE_MEAN_Y = np.zeros([POSE_IDXS.size], dtype=np.float32)
    POSE_STD_X = np.zeros([POSE_IDXS.size], dtype=np.float32)
    POSE_STD_Y = np.zeros([POSE_IDXS.size], dtype=np.float32)

    if plot_metrics:
        fig, axes = plt.subplots(3, 1, figsize=(15, D*6))

    for col, ll in enumerate(tqdm( np.transpose(X_train[:,:,POSE_IDXS], [2,3,0,1]).reshape([POSE_IDXS.size, D, -1]) )):
        for dim, l in enumerate(ll):
            v = l[np.nonzero(l)]
            if dim == 0: # X
                POSE_MEAN_X[col] = v.mean()
                POSE_STD_X[col] = v.std()
            if dim == 1: # Y
                POSE_MEAN_Y[col] = v.mean()
                POSE_STD_Y[col] = v.std()

            if plot_metrics:
                axes[dim].boxplot(v, notch=False, showfliers=False, positions=[col], whis=[5,95])

    if plot_metrics:
        for ax, dim_name in zip(axes, ["x,y"]):
            ax.set_title(f'Pose {dim_name.upper()} Dimension', size=24)
            ax.tick_params(axis='x', labelsize=8)
            ax.grid(axis='y')

        plt.subplots_adjust(hspace=0.50)
        plt.show()

    POSE_MEAN = np.array([POSE_MEAN_X, POSE_MEAN_Y]).T
    POSE_STD = np.array([POSE_STD_X, POSE_STD_Y]).T
    
    return POSE_MEAN, POSE_STD

def get_lips_mean_std():
    # LIPS
    LIPS_MEAN_X = np.zeros([LIPS_IDXS.size], dtype=np.float32)
    LIPS_MEAN_Y = np.zeros([LIPS_IDXS.size], dtype=np.float32)
    LIPS_STD_X = np.zeros([LIPS_IDXS.size], dtype=np.float32)
    LIPS_STD_Y = np.zeros([LIPS_IDXS.size], dtype=np.float32)

    if plot_metrics:
        fig, axes = plt.subplots(3, 1, figsize=(15, D*6))

    for col, ll in enumerate(tqdm( np.transpose(X_train[:,:,LIPS_IDXS], [2,3,0,1]).reshape([LIPS_IDXS.size, D, -1]) )):
        for dim, l in enumerate(ll):
            v = l[np.nonzero(l)]
            if dim == 0: # X
                LIPS_MEAN_X[col] = v.mean()
                LIPS_STD_X[col] = v.std()
            if dim == 1: # Y
                LIPS_MEAN_Y[col] = v.mean()
                LIPS_STD_Y[col] = v.std()

            if plot_metrics:
                axes[dim].boxplot(v, notch=False, showfliers=False, positions=[col], whis=[5,95])

    if plot_metrics:
        for ax, dim_name in zip(axes, ["x", "y"]):
            ax.set_title(f'Lips {dim_name.upper()} Dimension', size=24)
            ax.tick_params(axis='x', labelsize=8)
            ax.grid(axis='y')

        plt.subplots_adjust(hspace=0.50)
        plt.show()

    LIPS_MEAN = np.array([LIPS_MEAN_X, LIPS_MEAN_Y]).T
    LIPS_STD = np.array([LIPS_STD_X, LIPS_STD_Y]).T
    
    return LIPS_MEAN, LIPS_STD

LIPS_MEAN, LIPS_STD = get_lips_mean_std()
POSE_MEAN, POSE_STD = get_pose_mean_std()
LEFT_HANDS_MEAN, LEFT_HANDS_STD = get_left_right_hand_mean_std()

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]


# Batching function for training

In [6]:
import numpy as np
import tensorflow as tf

num_instances_per_sign = 4
V = 250
N, T_hat, C, D = X_train.shape
params = dict(n=4, V=250, N=N, T_hat=T_hat, C=C, D=D)

def get_train_batch_all_signs(X, y, **kwargs):
    V = kwargs.get('V', 250)
    n = kwargs.get('n', 4)
    T_hat = kwargs.get('T_hat', 64)
    C = kwargs.get('C', 66)
    D = kwargs.get('D', 2)
    rtrn_idxs = kwargs.get('rtn_idxs', True)
    non_empty_frame_idxs = kwargs.get('non_empty_frame_idxs', None)

    B = V * n
    X_batch = np.zeros([B, T_hat, C, D], dtype=np.float32)
    y_batch = np.repeat(np.arange(V, dtype=np.int64), n)
    non_empty_frame_idxs_batch = np.zeros([B, T_hat], dtype=np.float32)

    signID_to_idxs = {}
    for i in range(V):
        signID_to_idxs[i] = np.argwhere(y == i).squeeze().astype(np.int32)

    while True:
        for i in range(V):
            idxs = np.random.choice(signID_to_idxs[i], n)
            X_batch[i * n:(i + 1) * n] = X[idxs]
            non_empty_frame_idxs_batch[i * n:(i + 1) * n] = non_empty_frame_idxs[idxs]

        # Look for the idxs of dummy padded frames (filled with -1).
        mask = non_empty_frame_idxs_batch < -0.5

        # Broadcast y_batch to the shape of (B, T_hat) and apply the mask
        y_batch_expanded = np.ones((B,T_hat)) * y_batch[:,np.newaxis]
        y_batch_expanded[mask] = -1

        x_tf = tf.convert_to_tensor(X_batch)
        y_tf = tf.convert_to_tensor(y_batch_expanded.astype(np.int32))

        if rtrn_idxs:
            yield {'x': x_tf, 'non_empty_frame_idxs': non_empty_frame_idxs_batch}, y_tf
        else:
            yield x_tf, y_tf

class data_sequence(tf.keras.utils.Sequence):
    def __init__(self, X, y, batch_size, iterations_per_epoch, get_batch_function, **kwargs):
        self.X = X
        self.y = y
        self.n = batch_size//V+1
        self.iterations_per_epoch = iterations_per_epoch
        kwargs["n"] = self.n
        self.batch_iterator = get_batch_function(X, y, **kwargs)

    def __len__(self):
        return self.iterations_per_epoch

    def __getitem__(self, idx):
        x_tf, y_tf = next(self.batch_iterator)

        return x_tf, y_tf


# Model training

## Dataloaders, training options and model definition

In [7]:
import sys
import os 



# Data loaders definition
num_instances_per_sign = 4
V = 250
N, T_hat, C, D = X_train.shape

params_train = dict(n=num_instances_per_sign, V=250, N=N, T_hat=T_hat, C=C, D=D, non_empty_frame_idxs = non_empty_frame_idxs_train)
dloader_train = data_sequence(
                            X=X_train, 
                            y=y_train, 
                            batch_size=256, 
                            iterations_per_epoch=100,
                            get_batch_function=get_train_batch_all_signs, 
                            **params_train)

if USE_VAL: 
    params_val = dict(n=num_instances_per_sign, V=250, N=N, T_hat=T_hat, C=C, D=D, non_empty_frame_idxs = non_empty_frame_idxs_val)
    dloader_val = data_sequence(
                                X=X_val,
                                y=y_val,
                                batch_size=128,
                                iterations_per_epoch=100,
                                get_batch_function=get_train_batch_all_signs,
                                **params_val)


# Model reimport     
model_path = "/kaggle/input/model-files"

if model_path not in sys.path: sys.path.insert(0, model_path)

###

import gpt_tf
import importlib

importlib.reload(gpt_tf)
from gpt_tf import clsr_tsfrm as clsr_tsfrm_tf, GPTConfig

#params
model_params = dict(dims=2, block_size=64, landm_size=66, vocab_size=250, n_layer=5, n_head=4, n_embd=300,bias=True,
                    lips_mean=LIPS_MEAN, lips_std=LIPS_STD, pose_mean=POSE_MEAN, pose_std=POSE_STD, left_hands_mean=LEFT_HANDS_MEAN, left_hands_std=LEFT_HANDS_STD,
                    label_smoothing = 0.25, rotate_hand=False, rotate_module=3, embd_dropout=0.0,attn_dropout=0.3, resid_dropout=0.3, mlp_dropout=0.6
                    )

# Define the model
cfg = GPTConfig(**model_params)
tf_tfr = clsr_tsfrm_tf(cfg)
# best_model = clsr_tsfrm_tf(cfg)

In [9]:
# Load pretrained weights
tf_tfr.load_weights("/kaggle/input/model-files/weis_0429_00/weis_0429_00")

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x719c7d8f6d50>

## Training loop

In [10]:
from tqdm.notebook import tqdm
import tensorflow_addons as tfa
import gc

print("Training Loop started")

weight_decay=1e-4

#lr scheduler 
scheduler_type = "cosine"
num_epochs = 50
phase_epoch = np.linspace(0, np.pi/2, num_epochs)

lr_max = 1e-4
lr_scheduler = (np.cos(phase_epoch))**2*lr_max

# Define the optimizer
optimizer = tfa.optimizers.AdamW(weight_decay=weight_decay,
                                 learning_rate=lr_max,
                                 beta_1=0.9,
                                 beta_2=0.999
                                 )

eval_every = 1



ls_tn_epchs = []
loss_evl_epchs = []


# if pretrained_wei_load: testing_mode = True

if testing_mode:
    num_epochs = 1
    dloader_train = data_sequence(
                            X=X_train, 
                            y=y_train, 
                            batch_size=256, 
                            iterations_per_epoch=1,
                            get_batch_function=get_train_batch_all_signs, 
                            **params_train)
    check_evalSet = False
    save_weights = False
    

# early stopping
patience_break = 15
patience_lr = 5
patience_cnt = 0
lr_decay = 0.75

def get_weighted_loss(block_size): 

    loss_wei = tf.linspace(1.0, block_size, block_size)
    loss_wei = tf.reshape(loss_wei, (1, block_size))
    loss_wei = loss_wei/tf.norm(loss_wei)

    return loss_wei

loss_wei = get_weighted_loss(model_params["block_size"])

# best model and evaluation
best_val = 1
best_loss = 1e10
best_epoch = 0

for epoch in range(num_epochs):
    
    loss_list = []
    progress_train = tqdm(
        iterable=enumerate(dloader_train),
        desc=f"Epoch {epoch+1} progress",
        total=len(dloader_train),
        colour="green"
    )

    # assign lr to optimizer
    optimizer.learning_rate.assign(lr_scheduler[epoch])

    for i, (X_dic, y) in progress_train:
        # {'x': x_tf, 'non_empty_frame_idxs': non_empty_frame_idxs_batch}, y_tf
        X = X_dic["x"]
        non_empty_frame_idxs = X_dic["non_empty_frame_idxs"]
        t_real = tf.reduce_sum(tf.where((tf.reduce_sum(X, axis=[-1,-2])>0),1.0,0.0),axis=1) #(B,)
        
        with tf.GradientTape() as tape:
            logits, loss = tf_tfr(X, non_empty_frame_idxs, y, training=True)
            weighted_loss = loss * loss_wei
            avg_wei_loss = cfg.block_size/t_real * tf.reduce_mean(weighted_loss, axis=-1) #(B,T)->(B,)
            loss_reduced = tf.reduce_mean(avg_wei_loss)# (B,)->(1)

        gradients = tape.gradient(loss_reduced, tf_tfr.trainable_variables)
        optimizer.apply_gradients(zip(gradients, tf_tfr.trainable_variables))
        loss_list.append(loss_reduced.numpy()) #(B,)->(1)
        progress_train.set_postfix({"loss": f"{loss_reduced.numpy():.7f}"}, refresh=True)
        
    loss_train = np.mean(loss_list)
    ls_tn_epchs.append(loss_train)
     
    if epoch == 0:
        # Check if the models is smaller than 40 MB (competition limit)
        # float32 bits * byte/bit * 1KB/byte * 1MB/1000KB * num_params = approx. Space occupied by the model in (MB)
        memory_approx = 32 * 1/8 * 1/1024 * 1e-3 * tf_tfr.get_num_params(False)
        print(f"the model weights approximately {memory_approx: .4f} MB.")
        if memory_approx > 39: break

    if not check_evalSet:
        print(f"Training loss: {loss_train: e}, Epoch: {epoch + 1}/{num_epochs} lr: {lr_scheduler[epoch]: e}")
        
        if loss_reduced.numpy() < best_loss:
            best_loss = loss_reduced.numpy()            
            best_epoch = epoch
            best_weights = tf_tfr.get_weights()

    if check_evalSet and USE_VAL:
        loss_eval = 0
        num_eval_batches = 0
        
        for X_dic, y in dloader_val:             
            X = X_dic["x"]       
            non_empty_frame_idxs = X_dic["non_empty_frame_idxs"]
            t_real = tf.reduce_sum(tf.where((tf.reduce_sum(X, axis=[-1,-2])>0),1.0,0.0),axis=1) #(B,)
            
            logits, loss_val = tf_tfr(X, non_empty_frame_idxs, y, training=False)
            weighted_loss = loss_val * loss_wei
            avg_wei_loss = cfg.block_size/t_real * tf.reduce_mean(weighted_loss, axis=-1) #(B,T)->(B,)
            loss_reduced = tf.reduce_mean(avg_wei_loss)

            loss_eval += loss_reduced.numpy()
            num_eval_batches += 1

        loss_eval /= num_eval_batches
        loss_evl_epchs.append(loss_eval)

        if loss_eval < best_loss:
            best_loss = loss_eval
            patience_cnt = 0
            
            best_epoch = epoch
            best_weights = tf_tfr.get_weights()
        else:
            patience_cnt += 1

        # early stopping
        if patience_cnt >= patience_break:
            print(f"Training loss: {loss_train: .4f}, Eval loss: {loss_eval: .4f}, Epoch: {epoch + 1}/{num_epochs}")
            print(f"Early stopping at epoch {epoch+1}, best weights stored from epoch {best_epoch+1}.")
            break
            
        # Reduce learning rate
        if scheduler_type != "cosine":             
            if patience_cnt+1 % patience_lr == 0:
                lr *= lr_decay
                optimizer.learning_rate.assign(lr)
                print(f"Learning rate decayed to {lr:e} at epoch {epoch+1}")
            
            
        print(f"Training loss: {loss_train: .4f}, Eval loss: {loss_eval: .4f}, lr: {lr_scheduler[epoch]: e}, Epoch: {epoch + 1}/{num_epochs}")
        
    # collect our garbage after each epoch
    gc.collect()
        
print("Training Loop ended")


Training Loop started


Epoch 1 progress:   0%|          | 0/100 [00:00<?, ?it/s]

the model weights approximately  23.4574 MB.
Training loss:  7.770290e-02, Epoch: 1/50 lr:  1.000000e-04


Epoch 2 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.719968e-02, Epoch: 2/50 lr:  9.989727e-05


Epoch 3 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.662855e-02, Epoch: 3/50 lr:  9.958950e-05


Epoch 4 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.678960e-02, Epoch: 4/50 lr:  9.907796e-05


Epoch 5 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.707248e-02, Epoch: 5/50 lr:  9.836474e-05


Epoch 6 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.688721e-02, Epoch: 6/50 lr:  9.745279e-05


Epoch 7 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.680350e-02, Epoch: 7/50 lr:  9.634584e-05


Epoch 8 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.704013e-02, Epoch: 8/50 lr:  9.504844e-05


Epoch 9 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.701119e-02, Epoch: 9/50 lr:  9.356594e-05


Epoch 10 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.641918e-02, Epoch: 10/50 lr:  9.190441e-05


Epoch 11 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.707128e-02, Epoch: 11/50 lr:  9.007068e-05


Epoch 12 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.673718e-02, Epoch: 12/50 lr:  8.807230e-05


Epoch 13 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.686186e-02, Epoch: 13/50 lr:  8.591747e-05


Epoch 14 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.672644e-02, Epoch: 14/50 lr:  8.361504e-05


Epoch 15 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.620149e-02, Epoch: 15/50 lr:  8.117449e-05


Epoch 16 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.650318e-02, Epoch: 16/50 lr:  7.860583e-05


Epoch 17 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.658645e-02, Epoch: 17/50 lr:  7.591963e-05


Epoch 18 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.668719e-02, Epoch: 18/50 lr:  7.312691e-05


Epoch 19 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.717307e-02, Epoch: 19/50 lr:  7.023917e-05


Epoch 20 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.670397e-02, Epoch: 20/50 lr:  6.726825e-05


Epoch 21 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.642733e-02, Epoch: 21/50 lr:  6.422638e-05


Epoch 22 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.607165e-02, Epoch: 22/50 lr:  6.112605e-05


Epoch 23 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.640204e-02, Epoch: 23/50 lr:  5.797999e-05


Epoch 24 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.650453e-02, Epoch: 24/50 lr:  5.480115e-05


Epoch 25 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.620479e-02, Epoch: 25/50 lr:  5.160258e-05


Epoch 26 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.653518e-02, Epoch: 26/50 lr:  4.839742e-05


Epoch 27 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.606986e-02, Epoch: 27/50 lr:  4.519885e-05


Epoch 28 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.650191e-02, Epoch: 28/50 lr:  4.202001e-05


Epoch 29 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.633797e-02, Epoch: 29/50 lr:  3.887395e-05


Epoch 30 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.655505e-02, Epoch: 30/50 lr:  3.577362e-05


Epoch 31 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.626029e-02, Epoch: 31/50 lr:  3.273175e-05


Epoch 32 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.645408e-02, Epoch: 32/50 lr:  2.976083e-05


Epoch 33 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.692412e-02, Epoch: 33/50 lr:  2.687309e-05


Epoch 34 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.653656e-02, Epoch: 34/50 lr:  2.408037e-05


Epoch 35 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.670911e-02, Epoch: 35/50 lr:  2.139417e-05


Epoch 36 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.594867e-02, Epoch: 36/50 lr:  1.882551e-05


Epoch 37 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.600551e-02, Epoch: 37/50 lr:  1.638496e-05


Epoch 38 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.706607e-02, Epoch: 38/50 lr:  1.408253e-05


Epoch 39 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.705035e-02, Epoch: 39/50 lr:  1.192770e-05


Epoch 40 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.712547e-02, Epoch: 40/50 lr:  9.929319e-06


Epoch 41 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.758174e-02, Epoch: 41/50 lr:  8.095595e-06


Epoch 42 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.743031e-02, Epoch: 42/50 lr:  6.434065e-06


Epoch 43 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.787552e-02, Epoch: 43/50 lr:  4.951557e-06


Epoch 44 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.871027e-02, Epoch: 44/50 lr:  3.654162e-06


Epoch 45 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  7.906575e-02, Epoch: 45/50 lr:  2.547213e-06


Epoch 46 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  8.037605e-02, Epoch: 46/50 lr:  1.635257e-06


Epoch 47 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  8.029335e-02, Epoch: 47/50 lr:  9.220422e-07


Epoch 48 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  8.202145e-02, Epoch: 48/50 lr:  4.104993e-07


Epoch 49 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  8.307467e-02, Epoch: 49/50 lr:  1.027304e-07


Epoch 50 progress:   0%|          | 0/100 [00:00<?, ?it/s]

Training loss:  8.509335e-02, Epoch: 50/50 lr:  3.749399e-37
Training Loop ended


In [11]:
import json

save_name = f"weis_0429_00"

def save_best_weights(model, best_weights, save_weights, checkpoint_path):
    if best_weights is not None:
        model.set_weights(best_weights)

        if save_weights:
            checkpoint_path = os.path.join(checkpoint_path, save_name)
            model.save_weights(checkpoint_path)
            print(f"Model weights saved to: {checkpoint_path} best weights stored from epoch {best_epoch+1}.")
            
        else:
            print("No best_weights provided. Model weights were not updated or saved.")

# Usage
save_best_weights(tf_tfr, best_weights, save_weights, "check_points")


Model weights saved to: check_points/weis_0429_00 best weights stored from epoch 7.


In [12]:
# export weights 
import shutil
if save_weights:
    folder_to_zip = 'check_points'  # Replace with the name of the folder you want to zip
    output_zip_file = f'weights.zip'  # Name of the output zip file

    shutil.make_archive(output_zip_file[:-4], 'zip', folder_to_zip)


In [13]:
import sys

# Replace this condition with a condition specific to your use case
stop_execution = False

if stop_execution:
    sys.exit("Stopping execution at this point")

# Data preprocessing (inference layer)

In [14]:
"""
    Tensorflow layer to process data in TFLite
    Data needs to be processed in the model itself, so we can not use Python
""" 
C = 543
N_DIMS = 3
class PreprocessLayer(tf.keras.layers.Layer):
    def __init__(self):
        super(PreprocessLayer, self).__init__()
        normalisation_correction = tf.constant([
                    # Add 0.50 to left hand (original right hand) and substract 0.50 of right hand (original left hand)
                    [0] * len(LIPS_IDXS) + [0.50] * len(LEFT_HAND_IDXS) + [0.50] * len(POSE_IDXS),
                    # Y coordinates stay intact
                    [0] * len(LANDMARK_IDXS_LEFT_DOMINANT0),
                    # Z coordinates stay intact
                    [0] * len(LANDMARK_IDXS_LEFT_DOMINANT0),
                ],
                dtype=tf.float32,
            )
        self.normalisation_correction = tf.transpose(normalisation_correction, [1,0])
        
    def pad_edge(self, T, repeats, side):
        if side == 'LEFT':
            return tf.concat((tf.repeat(T[:1], repeats=repeats, axis=0), T), axis=0)
        elif side == 'RIGHT':
            return tf.concat((T, tf.repeat(T[-1:], repeats=repeats, axis=0)), axis=0)
    
    @tf.function(
        input_signature=(tf.TensorSpec(shape=[None,C,N_DIMS], dtype=tf.float32),),
    )
    def call(self, data0):
        # Number of Frames in Video

        T0 = tf.shape(data0)[0]
        
        # Find dominant hand by comparing summed absolute coordinates
        left_hand_sum = tf.math.reduce_sum(tf.where(tf.math.is_nan(tf.gather(data0, LEFT_HAND_IDXS0, axis=1)), 0, 1))
        right_hand_sum = tf.math.reduce_sum(tf.where(tf.math.is_nan(tf.gather(data0, RIGHT_HAND_IDXS0, axis=1)), 0, 1))
        left_dominant = left_hand_sum >= right_hand_sum
        
        # Count non NaN Hand values in each frame for the dominant hand
        if left_dominant:
            frames_dhands_non_nan_sum = tf.math.reduce_sum(
                    tf.where(tf.math.is_nan(tf.gather(data0, LEFT_HAND_IDXS0, axis=1)), 0, 1),
                    axis=[1, 2],
                )
        else:
            frames_dhands_non_nan_sum = tf.math.reduce_sum(
                    tf.where(tf.math.is_nan(tf.gather(data0, RIGHT_HAND_IDXS0, axis=1)), 0, 1),
                    axis=[1, 2],
                )
        
        # Find frames indices with coordinates of dominant hand
        non_empty_frames_idxs = tf.where(frames_dhands_non_nan_sum > 0)
        non_empty_frames_idxs = tf.squeeze(non_empty_frames_idxs, axis=1)
        
        # Filter frames of the dominant hand
        data = tf.gather(data0, non_empty_frames_idxs, axis=0)
        
        # Cast Indices in float32 to be compatible with Tensorflow Lite
        non_empty_frames_idxs = tf.cast(non_empty_frames_idxs, tf.float32)
        # Normalize to start with 0
        non_empty_frames_idxs -= tf.reduce_min(non_empty_frames_idxs)
        
        # Number of Frames in Filtered Video
        T = tf.shape(data)[0]
        

        # Gather Relevant Landmark Columns (T,C,3) -> (T,C_,3) [superior hand only landmarks]
        if left_dominant:
            data = tf.gather(data, LANDMARK_IDXS_LEFT_DOMINANT0, axis=1)
        else:
            # understand that it is partly flipping the video for right hand dominant
            # [0.5, 0.5]
            data = tf.gather(data, LANDMARK_IDXS_RIGHT_DOMINANT0, axis=1)
            data = (
                    self.normalisation_correction + (
                        (data - self.normalisation_correction) * tf.where(self.normalisation_correction != 0, -1.0, 1.0)) #flipps the image for right handed people
                )
        
        # Video fits in T_hat (T)
        # video lenght < T
        if T < T_hat:
            # Pad With -1 if the video is smaller than T_hat (T)
            non_empty_frames_idxs = tf.pad(tensor=non_empty_frames_idxs, paddings=[[0, T_hat-T]], constant_values=-1)
            # Pad Data With Zeros (T,C,3) dimensions, ony padding the first dimension
            data = tf.pad(data, [[0, T_hat-T], [0,0], [0,0]], constant_values=0)
            # Fill NaN Values With 0
            data = tf.where(tf.math.is_nan(data), 0.0, data)


        # Video needs to be downsampled to T_hat
        else:
            # T < video lenght < T**2
            if T < T_hat**2:
                repeats = tf.math.floordiv(T_hat * T_hat, T0)
                data = tf.repeat(data, repeats=repeats, axis=0)
                non_empty_frames_idxs = tf.repeat(non_empty_frames_idxs, repeats=repeats, axis=0)

            # Pad To Multiple Of Input Size
            pool_size = tf.math.floordiv(len(data), T_hat)
            if tf.math.mod(T, T_hat) > 0:
                pool_size += 1

            if pool_size == 1:
                # pad_size = (pool_size * T_hat) - len(data)
                """ < 0 """
                pad_size = T_hat - len(data)
            else:
                pad_size = (pool_size * T_hat) % len(data)

            # Pad Start/End with Start/End value
            pad_left = tf.math.floordiv(pad_size, 2) + tf.math.floordiv(T_hat, 2)
            pad_right = tf.math.floordiv(pad_size, 2) + tf.math.floordiv(T_hat, 2)
            if tf.math.mod(pad_size, 2) > 0:
                pad_right += 1

            # Pad By Concatenating Left/Right Edge Values
            data = self.pad_edge(data, pad_left, 'LEFT')
            data = self.pad_edge(data, pad_right, 'RIGHT')

            # Pad Non Empty Frame Indices
            non_empty_frames_idxs = self.pad_edge(non_empty_frames_idxs, pad_left, 'LEFT')
            non_empty_frames_idxs = self.pad_edge(non_empty_frames_idxs, pad_right, 'RIGHT')

            # Reshape to Mean Pool
            data = tf.reshape(data, [T_hat, -1, N_COLS, N_DIMS])
            non_empty_frames_idxs = tf.reshape(non_empty_frames_idxs, [T_hat, -1])

            # Mean Pool
            data = tf.experimental.numpy.nanmean(data, axis=1)
            non_empty_frames_idxs = tf.experimental.numpy.nanmean(non_empty_frames_idxs, axis=1)

            # Fill NaN Values With 0
            data = tf.where(tf.math.is_nan(data), 0.0, data)
        
        data_drop_z = data[:,:,:2] 
        return tf.expand_dims(data_drop_z, axis=0), tf.expand_dims(non_empty_frames_idxs,axis=0)
    
preprocess_layer = PreprocessLayer()

dummy_input = np.ones(shape=(43, 543, 3))

x_mod, _ = preprocess_layer(dummy_input)
print(x_mod.shape)

(1, 64, 66, 2)


In [15]:
def load_relevant_data_subset(pq_path):
    ROWS_PER_FRAME = 543
    data_columns = ['x', 'y', 'z']
    data = pd.read_parquet(pq_path, columns=data_columns)
    n_frames = int(len(data) / ROWS_PER_FRAME)
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float32)

pq_path = "/kaggle/input/asl-signs/train_landmark_files/22343/1000638205.parquet"
pq_tmp = load_relevant_data_subset(pq_path)

# Testing preprocess layer with np.array input
out, _ = preprocess_layer(pq_tmp)
print(out.shape)

(1, 64, 66, 2)


In [16]:
def shape_printer(arr):
    print(f"tensor has shape: {arr.shape} and type {arr.dtype}.")

In [17]:

class inference_model(tf.keras.Model):
    """model adapted to the input and output comepetition format."""
    def __init__(self, data_tfr_model, trained_model):
        super().__init__()
        self.model = trained_model
        self.data_tfr_model = data_tfr_model      
        self.flatten = tf.keras.layers.Flatten()
    
    # Build the computational graph for the model call one we call it (model(X))
    @tf.function(input_signature=[
      tf.TensorSpec(shape=[None, 543, 3], dtype=tf.float32, name='inputs')
    ])    
    def call(self, x):
        x, non_empty_frames_idxs = self.data_tfr_model(x)
        x, _ = self.model(x, non_empty_frames_idxs)
        x = tf.nn.softmax(logits=x, axis=-1)  
        return {'outputs': tf.reshape(x, [-1])}
    
inf_model = inference_model(preprocess_layer, tf_tfr)

input_model = load_relevant_data_subset(pq_path)
print(f"in shape is: {input_model.shape}")

out_dic = inf_model(input_model)
print(f"out shape is: {out_dic['outputs'].shape}")


in shape is: (19, 543, 3)
out shape is: (250,)


# Save and submit the model

In [18]:
!pip install tflite-runtime
!pip install tensorflow-addons

Collecting tflite-runtime
  Downloading tflite_runtime-2.11.0-cp37-cp37m-manylinux2014_x86_64.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m37.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tflite-runtime
Successfully installed tflite-runtime-2.11.0
[0m

In [19]:
# Load the TFLiteConverter
converter = tf.lite.TFLiteConverter.from_keras_model(inf_model)

In [20]:
# Convert the model
tflite_model = converter.convert()

tflite_path = "my_model.tflite"
# Save the TFLite model to a file
with open(tflite_path, 'wb') as f:
    f.write(tflite_model)

In [21]:
# Checking model Size is < 40mb 

size = os.path.getsize("/kaggle/working/my_model.tflite")

assert size < 40e6, "model size is bigger than expected (40MB)"

In [22]:
import tflite_runtime.interpreter as tflite
interpreter = tflite.Interpreter("/kaggle/working/my_model.tflite")
interpreter.allocate_tensors()
found_signatures = list(interpreter.get_signature_list().keys())


prediction_fn = interpreter.get_signature_runner("serving_default")

frames = load_relevant_data_subset(pq_path)
print(frames.shape)
output = prediction_fn(inputs=frames)

sign = np.argmax(output["outputs"])

# output example
print(f"predicted '{sign}' from probs with shape {output['outputs'].shape}")

(19, 543, 3)
predicted '180' from probs with shape (250,)


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [23]:
# checking inference time constrains by running the model on multiple input instances

import time

examples = 100 

f1 = os.listdir("/kaggle/input/asl-signs/train_landmark_files")
part = np.random.choice(f1, size=4)

times = []

for p_ in part:
    tmp = os.path.join("/kaggle/input/asl-signs/train_landmark_files",p_)
    dirs =  os.listdir(tmp)
    rnd_select = np.random.choice(dirs, size=10)
    
    for file in rnd_select:
        pq_path = os.path.join(tmp, file)
       
    start_time = time.time()
    prediction_fn(inputs=load_relevant_data_subset(pq_path))

    elapsed_time = (time.time() - start_time) * 1000  # Convert to milliseconds
        
    times += [elapsed_time]
    
avg_time = sum(times)/len(times)

print(f"avg inference time is {avg_time} compared to the required inference time  smaller than < 100ms")
    
assert  avg_time < 100, "Inference time needs to be less than a 100 ms." 
    

avg inference time is 62.40040063858032 compared to the required inference time  smaller than < 100ms


In [24]:
!zip submission.zip $tflite_path

  adding: my_model.tflite (deflated 8%)
