In [None]:
from tensorflow.keras import layers
from tensorflow import keras
import tensorflow as tf
from tqdm import tqdm
import pandas as pd
import numpy as np
import json
import os

import warnings
warnings.filterwarnings("ignore")

In [None]:
class CONFIG:
    ROOT_DIR = '/kaggle/input/asl-fingerspelling'
    TRAIN_DATA = '/kaggle/input/asl-fingerspelling/train.csv'
    TRAIN_DIR = '/kaggle/input/asl-fingerspelling/train_landmarks'
    CHAR_PREDICTION_INDEX_MAP = '/kaggle/input/asl-fingerspelling/character_to_prediction_index.json'
    SUP_METADATA = '/kaggle/input/asl-fingerspelling/supplemental_metadata.csv'
    SUP_LANDMARK_DIR = '/kaggle/input/asl-fingerspelling/supplemental_landmarks'
    
    OUTPUT_PREPROCESSING = 'tfds'

In [None]:
with open (CONFIG.CHAR_PREDICTION_INDEX_MAP, "r") as f:
    char_to_ord = json.load(f)

ord_to_char = {j:i for i, j in char_to_ord.items()}

In [None]:
# Memory saving function credit to https://www.kaggle.com/gemartin/load-data-reduce-memory-usage
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """

    for col in df.columns:
        col_type = df[col].dtype

        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)

    return df

In [None]:
train_df = pd.read_csv(CONFIG.TRAIN_DATA)
train_df["phrase_bytes"] = train_df["phrase"].map(lambda x: x.encode("utf-8"))
train_df = reduce_mem_usage(train_df)
train_df.head()

In [None]:
# Can be added more landmark here
LPOSE = [13, 15, 17, 19, 21]
RPOSE = [14, 16, 18, 20, 22]
POSE = LPOSE + RPOSE
# POSE = list(range(0, 32))

FACE = [0, 9, 11, 13, 14, 17, 117, 118, 119, 199, 346, 347, 348]

RHAND_LBLS = [f'x_right_hand_{i}' for i in range(21)] + [f'y_right_hand_{i}' for i in range(21)] + [f'z_right_hand_{i}' for i in range(21)]
LHAND_LBLS = [ f'x_left_hand_{i}' for i in range(21)] + [ f'y_left_hand_{i}' for i in range(21)] + [ f'z_left_hand_{i}' for i in range(21)]
POSE_LBLS = [f'x_pose_{i}' for i in POSE] + [f'y_pose_{i}' for i in POSE] + [f'z_pose_{i}' for i in POSE]
FACE_LBLS = [f'x_face_{i}' for i in FACE] + [f'y_face_{i}' for i in FACE] + [f'z_face_{i}' for i in FACE]

In [None]:
X = [f'x_right_hand_{i}' for i in range(21)] + [f'x_left_hand_{i}' for i in range(21)] + [f'x_pose_{i}' for i in POSE] + [f'x_face_{i}' for i in FACE]
Y = [f'y_right_hand_{i}' for i in range(21)] + [f'y_left_hand_{i}' for i in range(21)] + [f'y_pose_{i}' for i in POSE] + [f'y_face_{i}' for i in FACE]
Z = [f'z_right_hand_{i}' for i in range(21)] + [f'z_left_hand_{i}' for i in range(21)] + [f'z_pose_{i}' for i in POSE] + [f'z_face_{i}' for i in FACE]

In [None]:
SEL_COLS = X + Y + Z
FRAME_LEN = 128

X_IDX = [i for i, col in enumerate(SEL_COLS)  if "x_" in col]
Y_IDX = [i for i, col in enumerate(SEL_COLS)  if "y_" in col]
Z_IDX = [i for i, col in enumerate(SEL_COLS)  if "z_" in col]

RHAND_IDX = [i for i, col in enumerate(SEL_COLS)  if "right_hand" in col]
LHAND_IDX = [i for i, col in enumerate(SEL_COLS)  if  "left_hand" in col]
RPOSE_IDX = [i for i, col in enumerate(SEL_COLS)  if  "pose" in col and int(col[-2:]) in RPOSE]
LPOSE_IDX = [i for i, col in enumerate(SEL_COLS)  if  "pose" in col and int(col[-2:]) in LPOSE]
FACE_IDX = [i for i, col in enumerate(SEL_COLS)  if  "face" in col]

In [None]:
# Preprocessing the landmarks
def resize_pad(x):
    if tf.shape(x)[0] < FRAME_LEN:
        x = tf.pad(x, ([[0, FRAME_LEN-tf.shape(x)[0]], [0, 0], [0, 0]]))
    else:
        x = tf.image.resize(x, (FRAME_LEN, tf.shape(x)[1]))
    return x


def preprocess_landmark(x):
    rhand = tf.gather(x, RHAND_IDX, axis=1)
    lhand = tf.gather(x, LHAND_IDX, axis=1)
    rpose = tf.gather(x, RPOSE_IDX, axis=1)
    lpose = tf.gather(x, LPOSE_IDX, axis=1)
    face  = tf.gather(x, FACE_IDX, axis=1)
    
    rnan_idx = tf.reduce_any(tf.math.is_nan(rhand), axis=1)
    lnan_idx = tf.reduce_any(tf.math.is_nan(lhand), axis=1)
    
    rnans = tf.math.count_nonzero(rnan_idx)
    lnans = tf.math.count_nonzero(lnan_idx)
    
    # For dominant hand
    if rnans > lnans:
        hand = lhand
        pose = lpose
        face = face
        
        hand_x = hand[:, 0*(len(LHAND_IDX)//3) : 1*(len(LHAND_IDX)//3)]
        hand_y = hand[:, 1*(len(LHAND_IDX)//3) : 2*(len(LHAND_IDX)//3)]
        hand_z = hand[:, 2*(len(LHAND_IDX)//3) : 3*(len(LHAND_IDX)//3)]
        hand = tf.concat([1-hand_x, hand_y, hand_z], axis=1)
        
        pose_x = pose[:, 0*(len(LPOSE_IDX)//3) : 1*(len(LPOSE_IDX)//3)]
        pose_y = pose[:, 1*(len(LPOSE_IDX)//3) : 2*(len(LPOSE_IDX)//3)]
        pose_z = pose[:, 2*(len(LPOSE_IDX)//3) : 3*(len(LPOSE_IDX)//3)]
        pose = tf.concat([1-pose_x, pose_y, pose_z], axis=1)
    else:
        hand = rhand
        pose = rpose
        face = face
    
    hand_x = hand[:, 0*(len(LHAND_IDX)//3) : 1*(len(LHAND_IDX)//3)]
    hand_y = hand[:, 1*(len(LHAND_IDX)//3) : 2*(len(LHAND_IDX)//3)]
    hand_z = hand[:, 2*(len(LHAND_IDX)//3) : 3*(len(LHAND_IDX)//3)]
    hand = tf.concat([hand_x[..., tf.newaxis], hand_y[..., tf.newaxis], hand_z[..., tf.newaxis]], axis=-1)
    
    mean = tf.math.reduce_mean(hand, axis=1)[:, tf.newaxis, :]
    std = tf.math.reduce_std(hand, axis=1)[:, tf.newaxis, :]
    hand = (hand - mean) / std

    pose_x = pose[:, 0*(len(LPOSE_IDX)//3) : 1*(len(LPOSE_IDX)//3)]
    pose_y = pose[:, 1*(len(LPOSE_IDX)//3) : 2*(len(LPOSE_IDX)//3)]
    pose_z = pose[:, 2*(len(LPOSE_IDX)//3) : 3*(len(LPOSE_IDX)//3)]
    pose = tf.concat([pose_x[..., tf.newaxis], pose_y[..., tf.newaxis], pose_z[..., tf.newaxis]], axis=-1)
    
    mean = tf.math.reduce_mean(pose, axis=1)[:, tf.newaxis, :]
    std = tf.math.reduce_std(pose, axis=1)[:, tf.newaxis, :]
    pose = (pose - mean) / std
    
    face_x = face[:, 0*(len(FACE_IDX)//3) : 1*(len(FACE_IDX)//3)]
    face_y = face[:, 1*(len(FACE_IDX)//3) : 2*(len(FACE_IDX)//3)]
    face_z = face[:, 2*(len(FACE_IDX)//3) : 3*(len(FACE_IDX)//3)]
    face = tf.concat([face_x[..., tf.newaxis], face_y[..., tf.newaxis], face_z[..., tf.newaxis]], axis=-1)
    
    mean = tf.math.reduce_mean(face, axis=1)[:, tf.newaxis, :]
    std = tf.math.reduce_std(face, axis=1)[:, tf.newaxis, :]
    face = (face - mean) / std
    
    x = tf.concat([hand, pose, face], axis=1)
    x = resize_pad(x)
    
    x = tf.where(tf.math.is_nan(x), tf.zeros_like(x), x)
    x = tf.reshape(x, (FRAME_LEN, len(LHAND_IDX) + len(LPOSE_IDX) + len(FACE_IDX)))
    return x

In [None]:
def load_relevant_data_subset(pq_path):
    return pd.read_parquet(pq_path, columns=SEL_COLS)

In [None]:
file_id = train_df.file_id.iloc[0]
pqfile = f"{CONFIG.ROOT_DIR}/train_landmarks/{file_id}.parquet"
seq_refs = train_df.loc[train_df.file_id == file_id]
seqs = load_relevant_data_subset(pqfile)

seq_id = seq_refs.sequence_id.iloc[0]
frames = seqs.iloc[seqs.index == seq_id]
phrase = str(train_df.loc[train_df.sequence_id == seq_id].phrase.iloc[0])

print(preprocess_landmark(frames).shape)

In [None]:
table = tf.lookup.StaticHashTable(
    initializer=tf.lookup.KeyValueTensorInitializer(
        keys=list(char_to_ord.keys()),
        values=list(char_to_ord.values()),
    ),
    default_value=tf.constant(-1),
    name="class_weight"
)

mask_idx = char_to_ord['#']

def preprocess_fn(landmarks, phrase):
    phrase = ';' + phrase + '['
    phrase = tf.strings.bytes_split(phrase)
    phrase = table.lookup(phrase)
    phrase = tf.pad(phrase, paddings=[[0, 64 - tf.shape(phrase)[0]]], constant_values=mask_idx)
    return preprocess_landmark(landmarks), phrase

def decode_fn(record_bytes):
    schema = {COL: tf.io.VarLenFeature(dtype=tf.float32) for COL in SEL_COLS}
    schema["phrase"] = tf.io.FixedLenFeature([], dtype=tf.string)
    features = tf.io.parse_single_example(record_bytes, schema)
    phrase = features["phrase"]
    landmarks = ([tf.sparse.to_dense(features[COL]) for COL in SEL_COLS])
    landmarks = tf.transpose(landmarks)
    return landmarks, phrase

In [None]:
from skimage.transform import resize

for file_id in tqdm(train_df.file_id.unique()):
    pqfile = f"{CONFIG.ROOT_DIR}/train_landmarks/{file_id}.parquet"
    if not os.path.isdir("tfds"): os.mkdir("tfds")
    tffile = f"tfds/{file_id}.tfrecord"
    seq_refs = train_df.loc[train_df.file_id == file_id]
    seqs = load_relevant_data_subset(pqfile)
    
    with tf.io.TFRecordWriter(tffile) as file_writer:
        for seq_id, phrase in zip(seq_refs.sequence_id, seq_refs.phrase_bytes):
            frames = seqs.iloc[seqs.index == seq_id]
            frames128 = frames.fillna(-10).to_numpy()
            frames128 = resize(frames128, (FRAME_LEN, len(SEL_COLS)))
            frames = pd.DataFrame(data = frames128, columns=frames.columns)
            
            features = {COL: tf.train.Feature(float_list=tf.train.FloatList(value=frames[COL])) for COL in SEL_COLS}
            features["phrase"] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[phrase]))
            record_bytes = tf.train.Example(features=tf.train.Features(feature=features)).SerializeToString()
            file_writer.write(record_bytes)

In [None]:
!tar -zcvf asl.tar.gz /kaggle/working/tfds

In [None]:
inpdir = "/kaggle/working/tfds"
tffiles = train_df.file_id.map(lambda x: f'{CONFIG.ROOT_DIR}/train/{x}.tfrecord').unique()

batch_size = 32
val_len = int(0.1 * len(tffiles))

train_dataset = tf.data.TFRecordDataset(tffiles[val_len:]).map(decode_fn).map(preprocess_fn).shuffle(buffer_size=500).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
val_dataset = tf.data.TFRecordDataset(tffiles[:val_len]).map(decode_fn).map(preprocess_fn).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
test_dataset = tf.data.TFRecordDataset(tffiles).map(decode_fn).prefetch(buffer_size=tf.data.AUTOTUNE)