## FROM: End-to-End Pytorch Training & Submission
(Kaggle Notebook) https://www.kaggle.com/code/mayukh18/end-to-end-pytorch-training-submission/notebook

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Jupiter  MacOS
# BASE_DIR = "/Users/johnhanratty/Library/CloudStorage/OneDrive-Personal/IRMA_GIT/Kaggle_SignLanguage/asl-signs"
# WORKING_DIR = BASE_DIR
# !pip install nb_black --quiet
# %load_ext lab_black

# Colab
BASE_DIR = "/content/asl-signs"   #"/content/drive/MyDrive/GaggleSignLang/asl-signs"
WORKING_DIR = "/content/asl-work"
# !pip install nb_black --quiet
# print('-----ok')
# %load_ext nb_black

# KAGGLE
# BASE_DIR = "/kaggle/input/asl-signs"
# WORKING_DIR = "/kaggle/working/"
# !pip install nb_black --quiet --root-user-action=ignore
# %load_ext lab_black

import os
import gc
import shutil

import json
from tqdm import tqdm
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings(action='ignore')

LANDMARK_FILES_DIR = f'{BASE_DIR}/train_landmark_files'
TRAIN_FILE = f"{BASE_DIR}/train.csv"

FRAMES_OUT = 12 #16 # 16
PTS_IN_FRAME = 115
DIMS = 2

print('done')


done


In [None]:
# COLAB ONLY - DOWNLOAD / EXTRACT PARQUET
# Load ASL data for COLAB TO INSTANCE
# requires "/content/kaggle.json" that is Kaggle token
# https://towardsdatascience.com/7-ways-to-load-external-data-into-google-colab-7ba73e7d5fc7 

if os.getenv("COLAB_RELEASE_TAG") and not os.path.exists('/content/asl-signs'):
   print("Running in Colab and need asl-signs data files")
   from datetime import datetime
   print('*******************************')
   print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
   shutil.copy("/content/drive/MyDrive/GaggleSignLang/asl-signs.zip", "/content")

   print('*******************************')
   print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
   from zipfile import ZipFile
   with ZipFile('/content/asl-signs.zip', 'r') as f:
     f.extractall('/content/asl-signs')
   
   print('*******************************')
   print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

else:
   print("ok")

if os.getenv("COLAB_RELEASE_TAG") and not os.path.exists(WORKING_DIR):
  !mkdir '/content/asl-work'

if os.getenv("COLAB_RELEASE_TAG"):
  shutil.copy(f"/content/drive/MyDrive/GaggleSignLang/sign_to_prediction_index_map.json", f"{WORKING_DIR}")




Running in Colab and need asl-signs data files
*******************************
2023-03-29 13:07:38


KeyboardInterrupt: ignored

In [None]:
ROWS_PER_FRAME = 543  # combined face, lefth, pose, righth

# FILTER FEATURES IN EACH FRAME  - FACE, POSE & HANDs
class FeatureGen(nn.Module):
    def __init__(self):
        super(FeatureGen, self).__init__()
        pass
    
    def forward(self, x):
        # FLATTENING ROWS BY TYPE and CONCATENATING TO ONE ROW PER FRAME 3D (XYZ)
        # INPUT NUMPY, TORCH OUTPUT

        # flatten points for all types
        # face_x = x[:,:468,:].contiguous().view(-1, 468*3)
        lips_idx = [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308, 95, 88, 178, 87, 14, 317, 402, 318, 324, 146, 91, 181, 84, 17, 314, 405, 321, 375]
        lips_x = x[:, lips_idx,:].contiguous().view(-1, len(lips_idx)*3)
        lefth_x = x[:,468:489,:].contiguous().view(-1, 21*3)
        pose_x = x[:,489:522,:].contiguous().view(-1, 33*3)
        righth_x = x[:,522:,:].contiguous().view(-1, 21*3)
        
        # flatten types into one row per frame
        xfeat = torch.cat([lips_x, lefth_x, pose_x, righth_x], axis=1)  # concatenate types

        # pad to FRAMES_ROWS with NaN rows
        xfeat = F.pad(xfeat, pad=(0, 0, 0, FRAMES_OUT - xfeat.shape[0]), value=float('nan'))
        return xfeat

def load_relevant_data_subset(pq_path):
  # FILTER THE NUMBER OF FAMES 
  #   FRAME_OUT number of frames, pad if not enough
  #   OUTPUT: NUMPY [:, FRAMES_OUT, ROWS_PER_FRAME, 3] 
  # 
    data = pd.read_parquet(pq_path)
    frame_ids = data['frame'].unique()
    n_frames = len(frame_ids)
    
    if n_frames > FRAMES_OUT:
        f_inc = int(round(n_frames / FRAMES_OUT, 0))
        f_start = int((n_frames - FRAMES_OUT * f_inc) / 2)
        f_idx = [x for x in range(f_start, (FRAMES_OUT * f_inc) + f_start, f_inc) if 0 <= x < n_frames]
        frame_ids = frame_ids[f_idx]
        n_frames = len(frame_ids)
    
    data = data.loc[data['frame'].isin(frame_ids)]
    data_columns = ["x", "y", "z"]
    data = data[data_columns]
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float32)

## PROCESS EACH ROW (ONE PARQUET PER ROW)
def convert_row(row):
    x = load_relevant_data_subset(os.path.join(BASE_DIR, row[1].path))
    x = feature_converter(torch.tensor(x)).cpu().numpy()
    return x, row[1].label

## LOOP THROUGH PARQUET FILES LISTED IN TRAIN FILE
##  SAVE RESULTS 
def convert_and_save_data():
    label_map = json.load(open(f"{WORKING_DIR}/sign_to_prediction_index_map.json", "r"))
    df = pd.read_csv(TRAIN_FILE)
    df['label'] = df['sign'].map(label_map)
    npdata = np.zeros((df.shape[0], FRAMES_OUT, PTS_IN_FRAME))
    nplabels = np.zeros(df.shape[0])

    results = map(convert_row, df.iterrows())
    for i, (x,y) in tqdm(enumerate(results), total=df.shape[0]):
            npdata[i,:] = x
            nplabels[i] = y
    np.save(f"{WORKING_DIR}/feature_data{FRAMES_OUT}.npy", npdata)
    np.save(f"{WORKING_DIR}/feature_labels.npy", nplabels)
    if os.getenv("COLAB_RELEASE_TAG"):
       shutil.copy(f"{WORKING_DIR}/feature_data{FRAMES_OUT}.npy", "/content/drive/MyDrive/GaggleSignLang")
       shutil.copy(f"{WORKING_DIR}/feature_labels.npy", "/content/drive/MyDrive/GaggleSignLang")

    return npdata
 
if not os.path.exists(WORKING_DIR):
  print('---- WORKING DIRECTORY DOES NOT EXIST----',{WORKING_DIR})
else:
  feature_converter = FeatureGen()
  datax = convert_and_save_data()



  1%|          | 1004/94477 [00:14<22:38, 68.82it/s]


KeyboardInterrupt: ignored

# RUN MODEL

In [None]:
# COLAB ONLY - MOVE FEATURE FILES TO WORKING DIRECTORY
# 
import os
import gc
import shutil

if os.getenv("COLAB_RELEASE_TAG") and not os.path.exists(WORKING_DIR):
  !mkdir '/content/asl-work'

if os.getenv("COLAB_RELEASE_TAG"):
    shutil.copy(f"/content/drive/MyDrive/GaggleSignLang/feature_data{FRAMES_OUT}.npy", f"{WORKING_DIR}")
    shutil.copy(f"/content/drive/MyDrive/GaggleSignLang/feature_labels.npy", f"{WORKING_DIR}")
    shutil.copy(f"/content/drive/MyDrive/GaggleSignLang/sign_to_prediction_index_map.json", f"{WORKING_DIR}")



In [None]:
class ASLData(Dataset):
    def __init__(self, datax, datay):
        self.datax = datax
        self.datay = datay

    def __getitem__(self, index):
        return self.datax[index, :], self.datay[index]

    def __len__(self):
        return len(self.datay)

class ASLModel(nn.Module):
    def __init__(self, p):
        super(ASLModel, self).__init__()
        self.flatten = nn.Flatten()
        self.dropout0 = nn.Dropout(p)
        self.layer0 = nn.Linear(FRAMES_OUT * PTS_IN_FRAME * DIMS, 8192)
        self.dropout1 = nn.Dropout(p)

        self.layer0a = nn.Linear(8192, 2024)
        self.layer1 = nn.Linear(2024, 512)
        self.layer2 = nn.Linear(512, 250)
        
    def forward(self, x):
        x = self.flatten(x)
        print('flatten', x.shape)

        x = self.layer0(x)
        print('layer0', x.shape)

        x = self.dropout0(x)
        x = self.layer0a(x)
        #x = self.dropout1(x)      
        x = self.layer1(x)
        x = self.layer2(x)
        return x


## Training (Doesn't work on Jupiter/MacOS  \
Need new Macbood with Navida GPU)

In [None]:
###############
# !!! TRAINING DOES NOT RUN ON MAC OS - (cuda)
if torch.cuda.is_available():
  device = torch.device("cuda")
  print("++++using GPU++++")
else:
  device = torch.device("cpu")
  print("++++using CPU++++")

EPOCHS = 40
BATCH_SIZE = 64

datax = np.load(f"{WORKING_DIR}/feature_data{FRAMES_OUT}.npy")
datay = np.load(f"{WORKING_DIR}/feature_labels.npy") 
datax = torch.tensor(datax)

# Replace NaNs with 0
datax = np.nan_to_num(datax, copy=False)

trainx, testx, trainy, testy = train_test_split(datax, datay, test_size=0.15, random_state=42)

train_data = ASLData(trainx, trainy)
valid_data = ASLData(testx, testy)

train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, num_workers=4, shuffle=True)
val_loader = DataLoader(valid_data, batch_size=BATCH_SIZE, num_workers=4, shuffle=False)

model = ASLModel(0.2).to(device)
opt = torch.optim.Adam(model.parameters(), lr=0.005)
criterion = nn.CrossEntropyLoss()
sched = torch.optim.lr_scheduler.StepLR(opt, step_size=300, gamma=0.95)

for i in range(EPOCHS):
    model.train()
    
    train_loss_sum = 0.
    train_correct = 0
    train_total = 0
    train_bar = train_loader
    for x,y in train_bar:
        print("SHAPE BAR", x.shape, y.shape)

        x = torch.Tensor(x).float().to(device)
        y = torch.Tensor(y).long().to(device)  
        y_pred = model(x)
        
        loss = criterion(y_pred, y)
        loss.backward()
        opt.step()
        opt.zero_grad()
        
        train_loss_sum += loss.item()
        train_correct += np.sum((np.argmax(y_pred.detach().cpu().numpy(), axis=1) == y.cpu().numpy()))
        train_total += 1
        sched.step()
        
    val_loss_sum = 0.
    val_correct = 0
    val_total = 0
    model.eval()
    for x,y in val_loader:
        x = torch.Tensor(x).float().to(device)
        y = torch.Tensor(y).long().to(device)
        
        with torch.no_grad():
            y_pred = model(x)
            loss = criterion(y_pred, y)
            val_loss_sum += loss.item()
            val_correct += np.sum((np.argmax(y_pred.cpu().numpy(), axis=1) == y.cpu().numpy()))
            val_total += 1
                              
    print(f"Epoch:{i} > Train Loss: {(train_loss_sum/train_total):.04f}, Train Acc: {train_correct/len(train_data):0.04f}")
    print(f"Epoch:{i} > Val Loss: {(val_loss_sum/val_total):.04f}, Val Acc: {val_correct/len(valid_data):0.04f}")
    print("="*50)

# Save the pytorch model
py_model_path = f"{WORKING_DIR}/py_model.pt"
torch.save(model, py_model_path)

DATAX (94477, 12, 345)


IndexError: ignored

In [None]:
16 Frames 


32 FRAMES_OUT 3xlinear
==================================================
Epoch:39 > Train Loss: 2.2568, Train Acc: 0.5093
Epoch:39 > Val Loss: 2.8198, Val Acc: 0.4007
==================================================

16 FRAMES_OUT 3xlinear
==================================================
Epoch:39 > Train Loss: 2.2368, Train Acc: 0.5112
Epoch:39 > Val Loss: 2.6392, Val Acc: 0.4321
==================================================

12 FRAMES_OUT 3xlinear
==================================================
Epoch:39 > Train Loss: 2.3254, Train Acc: 0.4921
Epoch:39 > Val Loss: 2.6272, Val Acc: 0.4351
==================================================

8 FRAMES_OUT 3xlinear
==================================================
Epoch:39 > Train Loss: 2.4085, Train Acc: 0.4752
Epoch:39 > Val Loss: 2.6766, Val Acc: 0.4238
==================================================

## Tensorflow Conversion¶

In [None]:
# Not needed for Kaggle
!pip install onnx-tf --quiet --root-user-action=ignore
!pip install tflite-runtime  --quiet --root-user-action=ignore


Usage:   
  pip3 install [options] <requirement specifier> [package-index-options] ...
  pip3 install [options] -r <requirements file> [package-index-options] ...
  pip3 install [options] [-e] <vcs project url> ...
  pip3 install [options] [-e] <local project path> ...
  pip3 install [options] <archive url/path> ...

no such option: --root-user-action

Usage:   
  pip3 install [options] <requirement specifier> [package-index-options] ...
  pip3 install [options] -r <requirements file> [package-index-options] ...
  pip3 install [options] [-e] <vcs project url> ...
  pip3 install [options] [-e] <local project path> ...
  pip3 install [options] <archive url/path> ...

no such option: --root-user-action


In [None]:
sample_input = torch.rand((50, 543, 3))
onnx_feat_gen_path = f"{WORKING_DIR}/feature_gen.onnx"

feature_converter.eval()

torch.onnx.export(
    feature_converter,                # PyTorch Model
    sample_input,                     # Input tensor
    onnx_feat_gen_path,               # Output file (eg. 'output_model.onnx')
    opset_version=12,                 # Operator support version
    input_names=['input'],            # Input tensor name (arbitary)
    output_names=['output'],          # Output tensor name (arbitary)
    operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK,  # added jh
    dynamic_axes={
        'input' : {0: 'input'}
    }
)

In [None]:
# load model (trained on colab)
#model = ASLModel(0.2)
py_model_path = f"{WORKING_DIR}/py_model.pt"
model = torch.load(py_model_path)
model.eval()

ASLModel(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (dropout): Dropout(p=0.2, inplace=False)
  (layer0): Linear(in_features=4140, out_features=1024, bias=True)
  (layer1): Linear(in_features=1024, out_features=512, bias=True)
  (layer2): Linear(in_features=512, out_features=250, bias=True)
)

In [None]:
sample_input = torch.rand((1, 3258)).to(device)
onnx_model_path = f"{WORKING_DIR}/asl_model.onnx"

model.eval()

torch.onnx.export(
    model,                        # PyTorch Model
    sample_input,                 # Input tensor
    onnx_model_path,              # Output file (eg. 'output_model.onnx')
    opset_version=12,             # Operator support version
    input_names=['input'],        # Input tensor name (arbitary)
    output_names=['output'],      # Output tensor name (arbitary)
    operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK,  # added jh
    dynamic_axes={
        'input' : {0: 'input'}
    }
)

RuntimeError: ignored

In [None]:
import onnx
from onnx_tf.backend import prepare


tf_feat_gen_path = f'{WORKING_DIR}/tf_feat_gen'
onnx_feat_gen = onnx.load(onnx_feat_gen_path)
tf_rep = prepare(onnx_feat_gen)
tf_rep.export_graph(tf_feat_gen_path)


tf_model_path = f'{WORKING_DIR}/tf_model'
onnx_model = onnx.load(onnx_model_path)
tf_rep = prepare(onnx_model)
tf_rep.export_graph(tf_model_path)

<IPython.core.display.Javascript object>

## Final Inference Model in Tensorflow
Both of the converted models will be used here one after another.

In [None]:
import tensorflow as tf

class ASLInferModel(tf.Module):
    def __init__(self):
        super(ASLInferModel, self).__init__()
        self.feature_gen = tf.saved_model.load(tf_feat_gen_path)
        self.model = tf.saved_model.load(tf_model_path)
        self.feature_gen.trainable = False
        self.model.trainable = False
    
    @tf.function(input_signature=[
      tf.TensorSpec(shape=[None, 543, 3], dtype=tf.float32, name='inputs')
    ])
    def call(self, input):
        output_tensors = {}
        features = self.feature_gen(**{'input': input})['output']
        output_tensors['outputs'] = self.model(**{'input': tf.expand_dims(features, 0)})['output'][0,:]
        return output_tensors
    
    
mytfmodel = ASLInferModel()
tf.saved_model.save(mytfmodel, f'{WORKING_DIR}/tf_infer_model', signatures={'serving_default': mytfmodel.call})

<IPython.core.display.Javascript object>

## Convert the Model

In [None]:
# Convert the model

tf_infer_model_path = f'{WORKING_DIR}/tf_infer_model'
converter = tf.lite.TFLiteConverter.from_saved_model(tf_infer_model_path)
tflite_model = converter.convert()

tflite_model_path = 'model.tflite'

# Save the model
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)

<IPython.core.display.Javascript object>

In [None]:
ROWS_PER_FRAME = 543  # number of landmarks per frame
pq_path = f"{BASE_DIR}/train_landmark_files/53618/1001379621.parquet"

import tflite_runtime.interpreter as tflite
interpreter = tflite.Interpreter(tflite_model_path)
interpreter.allocate_tensors()

found_signatures = list(interpreter.get_signature_list().keys())

# if REQUIRED_SIGNATURE not in found_signatures:
#     raise KernelEvalException('Required input signature not found.')

prediction_fn = interpreter.get_signature_runner("serving_default")
output = prediction_fn(inputs=load_relevant_data_subset(pq_path))
sign = np.argmax(output["outputs"])

print(sign, output["outputs"].shape)

30 (250,)


<IPython.core.display.Javascript object>

In [None]:
# FINAL ZIP
!zip submission.zip $tflite_model_path