Thanks for the public notebooks below:  
https://www.kaggle.com/code/hoyso48/1st-place-solution-training  
https://www.kaggle.com/code/irohith/aslfr-ctc-based-on-prev-comp-1st-place  
https://www.kaggle.com/code/markwijkhuizen/aslfr-transformer-training-inference  
This is the 3rd place solution training code, you could refer the solution here:  
https://www.kaggle.com/competitions/asl-fingerspelling/discussion/434393  

# Install libs

In [None]:
!pip install -q icecream --no-index --find-links=file:///kaggle/input/icecream
!pip install -q pymp-pypi --no-index --find-links=file:///kaggle/input/pymp-pypi/pymp-pypi-0.4.5/dist

# Import libs

In [None]:
import sys, os
import numpy as np
import pandas as pd
import json
import re
import six
from collections import Counter, OrderedDict, defaultdict
from collections.abc import Iterable
from multiprocessing import cpu_count
from tqdm.notebook import tqdm
from icecream import ic
import pymp
import tensorflow as tf
ic(tf.__version__)

# Flags

In [None]:
class FLAGS(object):
  # online==False means using n-fold split and train on fold 1,2, folds-1 while valid on fold 0
  # online==True means using all train data but still will valid on fold 0
  online = False  
  folds = 4
  fold_seed = 1229
  root = '../input/asl-fingerspelling'
  working = '/kaggle/working'
  use_z = True  # use x,y,z if True
  norm_frames = True # norm frames using x - mean / std
  concat_frames = True # concat original and normalized frames
  add_pos = True # add abs frame pos, like 1/1000., 2/1000.
  sup_weight = 0.1 # for supplement dataset assigin weight 0.1

def load_json(filename):
  with open(filename) as fh:
    obj = json.load(fh)
  return obj

# Common configs

In [None]:
LPOSE = [13, 15, 17, 19, 21]
RPOSE = [14, 16, 18, 20, 22]
POSE = LPOSE + RPOSE

LIP = [
    61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
    291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
    78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
    95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
]
ic(len(LIP))
LLIP = [84,181,91,146,61,185,40,39,37,87,178,88,95,78,191,80,81,82]
RLIP = [314,405,321,375,291,409,270,269,267,317,402,318,324,308,415,310,311,312]
MID_LIP = [i for i in LIP if i not in LLIP + RLIP]
ic(len(LLIP), len(RLIP), len(MID_LIP))

NOSE=[
    1,2,98,327
]
LNOSE = [98]
RNOSE = [327]
MID_NOSE = [i for i in NOSE if i not in LNOSE + RNOSE]

LEYE = [
    263, 249, 390, 373, 374, 380, 381, 382, 362,
    466, 388, 387, 386, 385, 384, 398,
]
REYE = [
    33, 7, 163, 144, 145, 153, 154, 155, 133,
    246, 161, 160, 159, 158, 157, 173,
]

N_HAND_POINTS = 21
N_POSE_POINTS = len(LPOSE)
N_LIP_POINTS = len(LLIP)
N_EYE_POINTS = len(LEYE)
N_NOSE_POINTS = len(LNOSE)
N_MID_POINTS = len(MID_LIP + MID_NOSE)

SEL_COLS = []
for i in range(N_HAND_POINTS):
  SEL_COLS.extend([f'x_left_hand_{i}', f'y_left_hand_{i}', f'z_left_hand_{i}'])
for i in range(N_HAND_POINTS):
  SEL_COLS.extend([f'x_right_hand_{i}', f'y_right_hand_{i}', f'z_right_hand_{i}'])
for i in LPOSE:
  SEL_COLS.extend([f'x_pose_{i}', f'y_pose_{i}', f'z_pose_{i}'])
for i in RPOSE:
  SEL_COLS.extend([f'x_pose_{i}', f'y_pose_{i}', f'z_pose_{i}'])
for i in LLIP:
  SEL_COLS.extend([f'x_face_{i}', f'y_face_{i}', f'z_face_{i}'])
for i in RLIP:
  SEL_COLS.extend([f'x_face_{i}', f'y_face_{i}', f'z_face_{i}'])

for i in LEYE:
  SEL_COLS.extend([f'x_face_{i}', f'y_face_{i}', f'z_face_{i}'])
for i in REYE:
  SEL_COLS.extend([f'x_face_{i}', f'y_face_{i}', f'z_face_{i}'])
  
for i in LNOSE:
  SEL_COLS.extend([f'x_face_{i}', f'y_face_{i}', f'z_face_{i}'])
for i in RNOSE:
  SEL_COLS.extend([f'x_face_{i}', f'y_face_{i}', f'z_face_{i}'])
  
for i in MID_LIP:
  SEL_COLS.extend([f'x_face_{i}', f'y_face_{i}', f'z_face_{i}'])
for i in MID_NOSE:
  SEL_COLS.extend([f'x_face_{i}', f'y_face_{i}', f'z_face_{i}'])
    
N_COLS = len(SEL_COLS)
ic(N_COLS)
    
CHAR2IDX = load_json(f'../input/asl-fingerspelling/character_to_prediction_index.json')
CHAR2IDX = {k: v + 1 for k, v in CHAR2IDX.items()}
N_CHARS = len(CHAR2IDX)
ic(N_CHARS)

PAD_IDX = 0
SOS_IDX = PAD_IDX # Start Of Sentence
EOS_IDX = N_CHARS + 1 # End Of Sentence
ic(PAD_IDX, SOS_IDX, EOS_IDX)

PAD_TOKEN = '<PAD>'
SOS_TOKEN = PAD_TOKEN
EOS_TOKEN = '<EOS>'

CHAR2IDX[PAD_TOKEN] = PAD_IDX
CHAR2IDX[EOS_TOKEN] = EOS_IDX 

ADDRESS_TOKEN = '<ADDRESS>'
URL_TOKEN = '<URL>'
PHONE_TOKEN = '<PHONE>'
SUP_TOKEN = '<SUP>'

VOCAB_SIZE = len(CHAR2IDX)
IDX2CHAR = {v: k for k, v in CHAR2IDX.items()}
ic(VOCAB_SIZE)
ic(len(IDX2CHAR))

STATS = {}
CLASSES = [
  'address', 
  'url', 
  'phone', 
  'sup',
  ]
PHRASE_TYPES = dict(zip(CLASSES, range(len(CLASSES))))
N_TYPES = len(CLASSES)
MAX_PHRASE_LEN = 32

def get_vocab_size():
  vocab_size = VOCAB_SIZE
  return vocab_size

def get_n_cols(no_motion=False, use_z=None):
  n_cols = N_COLS
  if use_z is None:
    use_z = FLAGS.use_z
  
  if FLAGS.concat_frames:
    assert FLAGS.norm_frames
    n_cols += N_COLS
  
  if not use_z:
    n_cols = n_cols // 3 * 2
    
  if FLAGS.add_pos:
    n_cols += 1
  
  return n_cols

def get_phrase_type(phrase):
  # Phone Number
  if re.match(r'^[\d+-]+$', phrase):
    return 'phone'
  # url
  elif any([substr in phrase for substr in ['www', '.', '/']
           ]) and ' ' not in phrase:
    return 'url'
  # Address
  else:
    return 'address'

# Preprocess for tfrecords

In [None]:
def set_folds_(df, folds=5, group_key=None, stratify_key=None, seed=1024):
  if stratify_key is None:
    rng = np.random.default_rng(seed)
    if group_key is not None:
      group_values = df[group_key].unique()
      ngroups = len(group_values)
      x = np.arange(ngroups)
      rng.shuffle(x)
      xs = np.array_split(x, folds)
      fold_values = np.asarray([0 for _ in range(ngroups)])
      for fold, x in enumerate(xs):
        fold_values[x] = fold
      group2fold = dict(zip(group_values, fold_values))
      df['fold'] = df[group_key].map(group2fold)
    else:
      fold_values = np.asarray([0 for _ in range(len(df))])
      x = np.arange(len(df))
      rng.shuffle(x)
      xs = np.array_split(x, folds)
      for fold, x in enumerate(xs):
        fold_values[x] = fold
      df['fold'] = fold_values
  else:  
    if group_key is None:
      from sklearn.model_selection import StratifiedKFold
      skf = StratifiedKFold(n_splits=folds, random_state=seed, shuffle=True)
      folds = np.zeros(len(df), dtype=int)
      splits = list(skf.split(df, df[stratify_key]))
      for i, (_, val_idx) in enumerate(splits):
        folds[val_idx] = i
      df['fold'] = folds
    else:
      from sklearn.model_selection import StratifiedGroupKFold
      skf = StratifiedGroupKFold(n_splits=folds, random_state=seed, shuffle=True)
      folds = np.zeros(len(df), dtype=int)
      splits = list(skf.split(df, df[stratify_key], df[group_key]))
      for i, (_, val_idx) in enumerate(splits):
        folds[val_idx] = i
      df['fold'] = folds
  return df

def init_folds_(train):
  set_folds_(train, 
             FLAGS.folds,
             group_key='participant_id', 
             stratify_key='phrase_type',
             seed=FLAGS.fold_seed)

In [None]:
def check_phrase_dup_(train):
  counter = Counter()
  for row in train.itertuples():
    row = row._asdict()
    phrase = row['phrase']
    fold = row['fold']
    counter[phrase] += 1
    counter[f'{fold}^{phrase}'] += 1

  l = []
  for row in train.itertuples():
    dup = 0
    row = row._asdict()
    phrase = row['phrase']
    fold = row['fold']
    if counter[f'{fold}^{phrase}'] < counter[phrase]:
      dup = 1
    l.append(dup)

  train['phrase_dup'] = l
  
def preprocess_parquet(file_path, save=False):
  if save:
    with open(f'{FLAGS.working}/inference_args.json', 'w') as f:
      json.dump({ 'selected_columns': SEL_COLS }, f)
  
  df = pd.read_parquet(file_path, columns=SEL_COLS)
  seq_ids = df.index.unique()
  for seq_id in tqdm(seq_ids, total=len(seq_ids), desc='per_seq'):
    frame = df[df.index == seq_id].values
    assert frame.ndim == 2
    assert frame.shape[-1] == N_COLS    
    n_frame = frame.shape[0]
    frame = list(frame.reshape(-1))
    yield seq_id, frame, n_frame

def preprocss_(train):
  train['phrase_len'] = train['phrase'].apply(len)
  train['phrase_type'] = train['phrase'].apply(get_phrase_type)

  # Get complete file path to file
  def get_file_path(path):
    return f'{FLAGS.root}/{path}'

  train['file_path'] = train['path'].apply(get_file_path)

  
def set_idx_(train):
  idxes = [0] * FLAGS.folds
  l = []
  for row in train.itertuples():
    l.append(idxes[row.fold])
    idxes[row.fold] += 1
  train['idx'] = l  

def init_dfs(obj='train'):
  file_name = 'train' if obj == 'train' else 'supplemental_metadata'
  train = pd.read_csv(f'{FLAGS.root}/{file_name}.csv')
  preprocss_(train)
  init_folds_(train)
  check_phrase_dup_(train)
  set_idx_(train)
  return train

# Gen tfrecords

In [None]:
def int_feature(value):
  if not isinstance(value, (list, tuple)):
    value = [value]
  return tf.train.Feature(int64_list=tf.train.Int64List(value=value))


def int64_feature(value):
  if not isinstance(value, (list, tuple)):
    value = [value]
  return tf.train.Feature(int64_list=tf.train.Int64List(value=value))


def bytes_feature(value):
  if not isinstance(value, (list, tuple)):
    value = [value]
  if not six.PY2:
    if isinstance(value[0], str):
      value = [x.encode() for x in value]
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))


def float_feature(value):
  if not isinstance(value, (list, tuple)):
    value = [value]
  return tf.train.Feature(float_list=tf.train.FloatList(value=value))

def gen_feature(l, dtype=None):
  if dtype is None:
    if isinstance(l, (str, bytes)):
      dtype = np.str_
    elif isinstance(l, int):
      dtype = np.int64
    elif isinstance(l, float):
      dtype = np.float32
    else:
      dtype = np.asarray(l).dtype

  if isinstance(l, Iterable) and dtype != np.str_ and dtype != object:
    l = list(l)

  if dtype == object or dtype == np.str_:
    try:
      if l.startswith('(') and l.endswith(')') or l.startswith(
          '[') and l.endswith(']'):
        try:
          l = l[1:-1].split(',')
          l = [int(x.strip()) for x in l]
          dtype = np.int64
        except Exception:
          pass
    except Exception:
      pass

  if dtype == np.int64 or dtype == np.int32:
    return int64_feature(l)
  elif dtype == np.float32 or dtype == np.float64:
    return float_feature(l)
  elif dtype == object or dtype == np.str_ or dtype.str.startswith('<U'):
    return bytes_feature(l)
  else:
    return bytes_feature(l)

def gen_features(feature, default_value=0):
  feature_ = {}
  for key in feature:
    feature_[key] = feature[key]
    if isinstance(feature[key], list or tuple) and not feature[key]:
      feature_[key] = [default_value]
  keys = list(feature_.keys())
  for key in keys:
    try:
      feature_[key] = gen_feature(feature_[key])
    except Exception as e:
      del feature_[key]
      # continue
      print(e)
      print('bad key', key)
      exit(0)
      # ic(e)
      # raise (e)
  return feature_

class TfrecordsWriter(object):
  def __init__(self, filename, format='tfrec', buffer_size=None, 
               shuffle=False, seed=None, clear_first=False):
    '''
    buffer_size = None means write at once
    = 0 means buffersize large engouh, only output at last 
    oterwise output when buffer full
    '''
    if seed:
      self.rng = np.random.default_rng(seed)
    self.count = 0
    self.buffer_size = buffer_size
    self.shuffle = shuffle
    
    fromat = filename.split('.')[-1]
    assert filename.endswith('.' + format), f'file:{filename} format:{format}'
    filename_ = filename[:-len(format)-1]
    filename = filename_ + '.TMP'
    dir_ = os.path.dirname(filename)
    os.makedirs(dir_, exist_ok=True)

    if clear_first:
      command = f'rm -rf {dir_}/{filename_}.*.{format}'
      ic(command)
      os.system(command)
    
    self.writer = tf.io.TFRecordWriter(filename)
    self.buffer = [] if self.buffer_size else None
    self.sort_vals = []

    self.filename = filename
    self.format = format

    self.closed = False

  def __del__(self):
    self.close()

  def __enter__(self):
    return self  

  def __exit__(self, exc_type, exc_value, traceback):
    self.close()

  def close(self):
    if not self.closed:
      if self.buffer:
        if self.shuffle:
          self.rng.shuffle(self.buffer)
        for example in self.buffer:
          self.writer.write(example.SerializeToString())
        self.buffer = []  
        self.sort_vals = []

      ifile = self.filename 
      if self.num_records:
        ofile = ifile[:-len('.TMP')] + f'.{self.num_records}.{self.format}'
        os.system(f'mv {ifile} {ofile}')
      else:
        print(f'removing {ifile}')
        os.system(f'rm -rf {ifile}')
      self.closed = True
      self.count = 0
    
  def finalize(self):
    self.close()
    
  def write(self, feature, sort_val=None):
    self.write_feature(feature, sort_val)

  def write_feature(self, feature, sort_key=None):
    fe = gen_features(feature)
    example = tf.train.Example(features=tf.train.Features(feature=fe))
    if sort_key is None:
      self.write_example(example)
    else:
      self.write_example(example, feature[sort_key])

  def write_example(self, example, sort_val=None):
    self.count += 1
    if self.buffer is not None:
      self.buffer.append(example)
      if sort_val is not None:
        self.sort_vals.append(sort_val)
      if len(self.buffer) >= self.buffer_size and self.buffer_size != 0:
        if self.sort_vals:
          assert self.buffer_size == 0, 'sort all values require buffer_size==0'
          yx = zip(self.sort_vals, self.buffer)
          yx.sort()
          self.buffer = [x for y, x in yx]
        elif self.shuffle: # if sort_vals not do shuffle anymore
          self.rng.shuffle(self.buffer)
        for example in self.buffer:
          self.writer.write(example.SerializeToString())
        self.buffer = []
    else:
      self.writer.write(example.SerializeToString())

  def size(self):
    return self.count

  @property
  def num_records(self):
    return self.count

In [None]:
train = {}
file_paths = []
records_dir = None

def pad(l, max_len, pad_idx=0):
  if len(l) >= max_len:
    return l[:max_len]
  else:
    l = l + [pad_idx] * (max_len - len(l))
    return l

def gen_record(index, obj):
  file_path = file_paths[index]
  start_idx = index * FLAGS.folds
  ofiles = [f'{records_dir}/{start_idx + idx}.tfrec' for idx in range(FLAGS.folds)]
  writers = [TfrecordsWriter(ofile, buffer_size=1000, shuffle=True, seed=1024) for ofile in ofiles]
  for sequence_id, frame, n_frame in preprocess_parquet(file_path, save=(index==0)):
    row = train[sequence_id]
    fe = {}
    for key in ['sequence_id', 'file_id', 'participant_id', 'phrase', 'fold',
                'phrase_len', 'phrase_type', 'phrase_dup', 'idx']:
      fe[key] = row[key]
    fe['frames'] = frame
    fe['n_frames'] = n_frame
    fe['frame_mean'] = np.nan_to_num(np.array(frame)).mean()
    fe['n_frames_per_char'] = n_frame / row['phrase_len']
    
    phrase = [CHAR2IDX[c] for c in row['phrase']]
    
    # ignore 0 for pad, so need -1
    fe['first_char'] = phrase[0] - 1
    fe['last_char'] = phrase[-1] - 1
    phrase.append(EOS_IDX)
    phrase = pad(phrase, MAX_PHRASE_LEN, PAD_IDX)
    fe['phrase'] = row['phrase']
    fe['phrase_'] = phrase
    fe['phrase_type_'] = PHRASE_TYPES[row['phrase_type']]
    fe['weight'] = 1.0 if obj == 'train' else FLAGS.sup_weight
    
    cls_label = [0] * N_CHARS
    for c in row['phrase']:
      cls_label[CHAR2IDX[c] - 1] = 1
    fe['cls_label'] = cls_label
    
    writers[row['fold']].write(fe)
    
  for writer in writers:
    writer.close()
    
def gen_records(obj, out_dir):
  global records_dir
  df = init_dfs(obj=obj)
  records_dir = f'{FLAGS.working}/tfrecords/{out_dir}'
  ic(records_dir)
  os.system(f'mkdir -p {records_dir}')
  for row in tqdm(df.itertuples(), total=len(df), desc='train'):
    row = row._asdict()
    train[row['sequence_id']] = row

  file_paths.extend(df.file_path.unique())
  num_records = len(file_paths)
  ic(num_records)
  num_workers = cpu_count()
  inputs_list = np.array_split(list(range(num_records)), num_workers)
  with pymp.Parallel(num_workers) as p:
    for i in p.range(num_workers):
      for x in tqdm(inputs_list[i], desc='gen_records'):
        gen_record(x, obj)

In [None]:
gen_records(obj='train', out_dir='train')

In [None]:
## this is for generating suplement dataset tfrecords which is used for training also, here commented out for output size over limit
#gen_records(obj='sup', out_dir='sup')