In [21]:
import os
import re
import json
import numpy as np
import pandas as pd
from glob import glob
from tqdm import tqdm
import matplotlib.pyplot as plt

import tensorflow as tf
print(tf.__version__)
import tensorflow_io as tfio
print(tfio.__version__)
import tensorflow_addons as tfa

from tensorflow.keras import layers
from tensorflow.keras import models

import wandb
from wandb.keras import WandbMetricsLogger
from wandb.keras import WandbModelCheckpoint

2.10.0
0.27.0


In [22]:
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

1 Physical GPUs, 1 Logical GPUs


In [23]:
data_path = "../data/tfrecord_heatmaps"


def natural_keys(text):
    ""
    def atoi(text):
        return int(text) if text.isdigit() else text
    
    return [atoi(c) for c in re.split(r'(\d+)', text)]

tfrecords = sorted(glob(f"{data_path}/*.tfrec"), key=natural_keys)

In [24]:
import json

with open("../data/sign_to_prediction_index_map.json") as f:
    data = json.load(f)
id2label = {v:k for k, v in data.items()}
id2label

{0: 'TV',
 1: 'after',
 2: 'airplane',
 3: 'all',
 4: 'alligator',
 5: 'animal',
 6: 'another',
 7: 'any',
 8: 'apple',
 9: 'arm',
 10: 'aunt',
 11: 'awake',
 12: 'backyard',
 13: 'bad',
 14: 'balloon',
 15: 'bath',
 16: 'because',
 17: 'bed',
 18: 'bedroom',
 19: 'bee',
 20: 'before',
 21: 'beside',
 22: 'better',
 23: 'bird',
 24: 'black',
 25: 'blow',
 26: 'blue',
 27: 'boat',
 28: 'book',
 29: 'boy',
 30: 'brother',
 31: 'brown',
 32: 'bug',
 33: 'bye',
 34: 'callonphone',
 35: 'can',
 36: 'car',
 37: 'carrot',
 38: 'cat',
 39: 'cereal',
 40: 'chair',
 41: 'cheek',
 42: 'child',
 43: 'chin',
 44: 'chocolate',
 45: 'clean',
 46: 'close',
 47: 'closet',
 48: 'cloud',
 49: 'clown',
 50: 'cow',
 51: 'cowboy',
 52: 'cry',
 53: 'cut',
 54: 'cute',
 55: 'dad',
 56: 'dance',
 57: 'dirty',
 58: 'dog',
 59: 'doll',
 60: 'donkey',
 61: 'down',
 62: 'drawer',
 63: 'drink',
 64: 'drop',
 65: 'dry',
 66: 'dryer',
 67: 'duck',
 68: 'ear',
 69: 'elephant',
 70: 'empty',
 71: 'every',
 72: 'eye',
 

In [25]:
from argparse import Namespace

configs = Namespace(
    batch_size = 128,
    epochs = 30,
    learning_rate = 1e-3,
    label_smoothing=0.3,
    num_steps=0.8,
)

In [26]:
train_tfrecords, valid_tfrecords = tfrecords[:20], tfrecords[20:]
print(len(train_tfrecords), len(valid_tfrecords))

20 4


In [27]:
def parse_sequence(serialized_sequence):
    return tf.io.parse_tensor(
        serialized_sequence,
        out_type=tf.float16,
    )


def parse_tfrecord_fn(example):
    feature_description = {
        "n_frames": tf.io.FixedLenFeature([], tf.float32),
        "frames": tf.io.FixedLenFeature([], tf.string),
        "label": tf.io.FixedLenFeature([], tf.int64),
    }

    return tf.io.parse_single_example(example, feature_description)


def preprocess_frames(frames):
    """This is where different preprocessing logics will be experimented."""
#     frames = (frames - tf.reduce_min(frames))/(tf.reduce_max(frames)-tf.reduce_min(frames))
    frames = tf.cast(frames, dtype=tf.float32)
    frames = tf.transpose(frames, (0,3,2,1))

    return frames


def parse_data(example):
    # Parse Frames
    n_frames = example["n_frames"]
    frames = tf.reshape(parse_sequence(example["frames"]), shape=(example["n_frames"], 61, 32, 32))
    frames = preprocess_frames(frames)
    
    # Parse Labels
    label = tf.one_hot(example["label"], depth=250)

    return frames, label

In [28]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = tf.data.TFRecordDataset(train_tfrecords)
valid_ds = tf.data.TFRecordDataset(valid_tfrecords)

trainloader = (
    train_ds
    .shuffle(configs.batch_size*4)
    .map(parse_tfrecord_fn, num_parallel_calls=AUTOTUNE)
    .map(parse_data, num_parallel_calls=AUTOTUNE)
    .batch(configs.batch_size)
    .prefetch(AUTOTUNE)
)

validloader = (
    valid_ds
    .map(parse_tfrecord_fn, num_parallel_calls=AUTOTUNE)
    .map(parse_data, num_parallel_calls=AUTOTUNE)
    .batch(configs.batch_size)
    .prefetch(AUTOTUNE)
)

In [29]:
sample, label = next(iter(trainloader))
sample.shape

TensorShape([128, 28, 32, 32, 61])

In [37]:
from tensorflow.keras.regularizers import L2


def slowonly():
    inputs = layers.Input(shape=(28,32,32,61))
    # Stem
    x = layers.Conv3D(64, (1,7,7), 1, activation='relu')(inputs)
    # First Block
    x = layers.Conv3D(64, (1,7,7), 1, padding="same", kernel_regularizer=L2(1e-4))(x)
    x = layers.BatchNormalization(axis=-1)(x)
    x = layers.ReLU()(x)
    
    x = layers.Conv3D(64, (1,7,7), 1, kernel_regularizer=L2(1e-4))(x)
    x = layers.BatchNormalization(axis=-1)(x)
    x = layers.ReLU()(x)
    
    # Second Block
    x = layers.Conv3D(128, (1,7,7), 1, padding="same", kernel_regularizer=L2(1e-4))(x)
    x = layers.BatchNormalization(axis=-1)(x)
    x = layers.ReLU()(x)
    
    x = layers.Conv3D(128, (1,7,7), 1, kernel_regularizer=L2(1e-4))(x)
    x = layers.BatchNormalization(axis=-1)(x)
    x = layers.ReLU()(x)
    
    x = layers.Conv3D(128, (1,7,7), 1, padding="same", kernel_regularizer=L2(1e-4))(x)
    x = layers.BatchNormalization(axis=-1)(x)
    x = layers.ReLU()(x)
    
    x = layers.Conv3D(128, (1,7,7), 1, kernel_regularizer=L2(1e-4))(x)
    x = layers.BatchNormalization(axis=-1)(x)
    x = layers.ReLU()(x)
    
    # Third Block
    x = layers.Conv3D(256, (1,7,7), 1, padding="same", kernel_regularizer=L2(1e-4))(x)
    x = layers.BatchNormalization(axis=-1)(x)
    x = layers.ReLU()(x)
    
    x = layers.Conv3D(256, (1,7,7), 1, kernel_regularizer=L2(1e-4))(x)
    x = layers.BatchNormalization(axis=-1)(x)
    x = layers.ReLU()(x)
    
    x = layers.AveragePooling3D((1,2,2))(x)
    x = layers.GlobalAveragePooling3D()(x)
    
    x = layers.Dropout(0.3)(x)
    
    outputs = layers.Dense(250, activation="softmax", kernel_regularizer=L2(1e-2))(x)
    return models.Model(inputs, outputs)

In [38]:
tf.keras.backend.clear_session()

model = slowonly()
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 32, 32, 61)]  0         
                                                                 
 conv3d (Conv3D)             (None, 28, 26, 26, 64)    191360    
                                                                 
 conv3d_1 (Conv3D)           (None, 28, 26, 26, 64)    200768    
                                                                 
 batch_normalization (BatchN  (None, 28, 26, 26, 64)   256       
 ormalization)                                                   
                                                                 
 re_lu (ReLU)                (None, 28, 26, 26, 64)    0         
                                                                 
 conv3d_2 (Conv3D)           (None, 28, 20, 20, 64)    200768    
                                                             

In [32]:
total_steps = 616*configs.epochs
decay_steps = total_steps*configs.num_steps

cosine_decay_scheduler = tf.keras.optimizers.schedules.CosineDecay(
    initial_learning_rate = configs.learning_rate,
    decay_steps = decay_steps,
    alpha=0.1
)

In [33]:
model.compile(
    tfa.optimizers.AdamW(learning_rate=cosine_decay_scheduler, weight_decay=0.001),
    tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.2),
    metrics=["acc"]
)

In [27]:
run = wandb.init(
    project="kaggle-asl",
    job_type="train_poseconv3d",
    config=configs,
)

[34m[1mwandb[0m: Currently logged in as: [33mayush-thakur[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [34]:
earlystopper = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=8,
    verbose=0,
    mode="auto",
    restore_best_weights=True,
)

callbacks = [
    earlystopper,
    WandbMetricsLogger(log_freq=2),
    WandbModelCheckpoint(
        filepath=f"model",
        save_best_only=True,
    ),
]

model.fit(
    trainloader,
    epochs=configs.epochs,
    validation_data=validloader,
    callbacks=callbacks
)

Epoch 1/30


2023-04-28 00:40:45.939941: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8200


    159/Unknown - 224s 1s/step - loss: 5.4840 - acc: 0.0069

KeyboardInterrupt: 

In [33]:
eval_loss, eval_acc = model.evaluate(validloader)
wandb.log({"eval_loss": eval_loss, "eval_acc": eval_acc})



In [35]:
run.finish()

VBox(children=(Label(value='869.765 MB of 869.765 MB uploaded (0.823 MB deduped)\r'), FloatProgress(value=1.0,…

0,1
batch/acc,▁▁▃▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇████████████████
batch/batch_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
batch/learning_rate,█████▇▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
batch/loss,█▇▅▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/acc,▁▃▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇████████████
epoch/epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
epoch/learning_rate,████▇▇▇▆▆▅▅▅▄▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁
epoch/loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/val_acc,▁▃▅▆▆▆▇▇▇▇▇▇▇█████████████████
epoch/val_loss,█▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
batch/acc,0.79102
batch/batch_step,18478.0
batch/learning_rate,0.0001
batch/loss,2.7087
epoch/acc,0.791
epoch/epoch,29.0
epoch/learning_rate,0.0001
epoch/loss,2.70877
epoch/val_acc,0.6518
epoch/val_loss,3.08889
