# ASL gestures recognizer


**Acknowledgements**

Adapted from https://www.kaggle.com/code/benedar/mediapipe-model-for-asl/notebook


In [None]:
!pip install --upgrade pip
!pip install mediapipe-model-maker

Collecting pip
  Downloading pip-23.3.2-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.1.2
    Uninstalling pip-23.1.2:
      Successfully uninstalled pip-23.1.2
Successfully installed pip-23.3.2
Collecting mediapipe-model-maker
  Downloading mediapipe_model_maker-0.2.1.3-py3-none-any.whl.metadata (1.6 kB)
Collecting mediapipe>=0.10.0 (from mediapipe-model-maker)
  Downloading mediapipe-0.10.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting tensorflow-addons (from mediapipe-model-maker)
  Downloading tensorflow_addons-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)
Collecting tf-models-official>=2.13.1 (from mediapipe-model-maker)
  Downloading tf_models_official-2.15.0-py2.py3-none-any.whl.metadata (1.4 kB)


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import random
import shutil
import tensorflow as tf
import pathlib
import pandas as pd
import mediapipe as mp
import cv2

from mediapipe_model_maker import gesture_recognizer
from tqdm import tqdm


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



## Build the dataset

### Download the dataset
> Training data set contains 87,000 images which are 200x200 pixels. There are 29 classes, of which 26 are for the letters A-Z and 3 classes for SPACE, DELETE and NOTHING.

Data has to be ogranized into train, validation and test splits(original validation set is very small).

Set access to kaggle

In [None]:
# https://www.kaggle.com/discussions/general/74235

from google.colab import files
files.upload()

!rm -r ~/.kaggle
!mkdir ~/.kaggle
!mv ./kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets download -d grassknoted/asl-alphabet

Saving kaggle.json to kaggle.json
rm: cannot remove '/root/.kaggle': No such file or directory
Downloading asl-alphabet.zip to /content
 99% 1.02G/1.03G [00:10<00:00, 177MB/s]
100% 1.03G/1.03G [00:10<00:00, 101MB/s]


In [None]:
! rm -rf asl_alphabet_train/
! rm -rf asl_alphabet_/
! unzip -o asl-alphabet.zip > /dev/null

In [None]:
! mv asl_alphabet_test/asl_alphabet_test/ asl_alphabet_test/asl_alphabet_test_2/
! rm -rf asl_alphabet_test/asl_alphabet_test/
! rm -rf asl_alphabet_test/asl_alphabet_validation/

In [None]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

count = 0

# for static images:
hands = mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=2,
    min_detection_confidence=0.5)


In [None]:
train_dir = "/content/asl_alphabet_train/asl_alphabet_train/"
valid_dir = "/content/asl_alphabet_train/asl_alphabet_validation/"
test_dir =  "/content/asl_alphabet_test/asl_alphabet_test/"

SEED = 123
random.seed(SEED)

skip_labels = ["nothing"]
none_path = os.path.join(train_dir, "nothing")

for label in os.listdir(train_dir):
    print(label)
    if label in skip_labels:
      continue
    src = os.path.join(train_dir, label)
    for files in os.listdir(src):
        img_path = os.path.join(src, files)
        image = cv2.imread(img_path)

        # convert the BGR image to RGB before processing.
        results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

        if not results.multi_hand_landmarks:
            count = count + 1
            shutil.move(img_path, none_path)


J
Y
H
C
P
D
E
R
M
space
A
T
nothing
N
U
V
G
del
B
L
F
O
S
W
K
Z
X
Q
I


In [None]:
print(f"no hand detected on {count} images")

no hand detected on 20172 images


In [None]:
! mv asl_alphabet_train/asl_alphabet_train/nothing asl_alphabet_train/asl_alphabet_train/none

Now that we have built the dataset lets view the first nine images and labels from the training set:

In [None]:
print(train_dir)
labels = []
for i in os.listdir(train_dir):
  if os.path.isdir(os.path.join(train_dir, i)):
    labels.append(i)

print(sorted(labels))

/content/asl_alphabet_train/asl_alphabet_train/
['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'none', 'space']


In [None]:
NUM_EXAMPLES = 3

for label in labels[:5]:
  label_dir = os.path.join(train_dir, label)
  example_filenames = os.listdir(label_dir)[:NUM_EXAMPLES]
  fig, axs = plt.subplots(1, NUM_EXAMPLES, figsize=(10,2))
  for i in range(NUM_EXAMPLES):
    axs[i].imshow(plt.imread(os.path.join(label_dir, example_filenames[i])))
    axs[i].get_xaxis().set_visible(False)
    axs[i].get_yaxis().set_visible(False)
  fig.suptitle(f'Showing {NUM_EXAMPLES} examples for {label}')

plt.show()

In [None]:
data = gesture_recognizer.Dataset.from_folder(
    dirname=train_dir,
    hparams=gesture_recognizer.HandDataPreprocessingParams()
)
train_data, rest_data = data.split(0.8)
validation_data, test_data = rest_data.split(0.5)

Downloading https://storage.googleapis.com/mediapipe-assets/palm_detection_full.tflite to /tmp/model_maker/gesture_recognizer/palm_detection_full.tflite
Downloading https://storage.googleapis.com/mediapipe-assets/hand_landmark_full.tflite to /tmp/model_maker/gesture_recognizer/hand_landmark_full.tflite
Downloading https://storage.googleapis.com/mediapipe-assets/gesture_embedder.tar.gz to /tmp/model_maker/gesture_recognizer/gesture_embedder


In [None]:
hparams = gesture_recognizer.HParams(export_dir="exported_model", epochs=15)
options = gesture_recognizer.GestureRecognizerOptions(hparams=hparams)
model = gesture_recognizer.GestureRecognizer.create(
    train_data=train_data,
    validation_data=validation_data,
    options=options
)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 hand_embedding (InputLayer  [(None, 128)]             0         
 )                                                               
                                                                 
 batch_normalization (Batch  (None, 128)               512       
 Normalization)                                                  
                                                                 
 re_lu (ReLU)                (None, 128)               0         
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 custom_gesture_recognizer_  (None, 29)                3741      
 out (Dense)                                                     
                                                             

In [None]:
loss, acc = model.evaluate(test_data, batch_size=1)
print(f"Test loss:{loss}, Test accuracy:{acc}")

Test loss:0.2473103255033493, Test accuracy:0.9055434465408325


In [None]:
model.export_model()

Downloading https://storage.googleapis.com/mediapipe-assets/gesture_embedder.tflite to /tmp/model_maker/gesture_recognizer/gesture_embedder.tflite
Using existing files at /tmp/model_maker/gesture_recognizer/palm_detection_full.tflite
Using existing files at /tmp/model_maker/gesture_recognizer/hand_landmark_full.tflite
Downloading https://storage.googleapis.com/mediapipe-assets/canned_gesture_classifier.tflite to /tmp/model_maker/gesture_recognizer/canned_gesture_classifier.tflite


In [None]:
!zip -r models_gesture_reco.zip exported_model

updating: exported_model/ (stored 0%)
updating: exported_model/checkpoint (deflated 43%)
updating: exported_model/best_model_weights.index (deflated 56%)
updating: exported_model/best_model_weights.data-00000-of-00001 (deflated 9%)
updating: exported_model/gesture_recognizer.task (deflated 24%)
updating: exported_model/logs/ (stored 0%)
updating: exported_model/logs/validation/ (stored 0%)
updating: exported_model/logs/validation/events.out.tfevents.1704744706.cf7bbf4e349f.173.1.v2 (deflated 75%)
updating: exported_model/logs/train/ (stored 0%)
updating: exported_model/logs/train/events.out.tfevents.1704744605.cf7bbf4e349f.173.0.v2 (deflated 85%)
updating: exported_model/epoch_models/ (stored 0%)
updating: exported_model/epoch_models/model-0013.index (deflated 56%)
updating: exported_model/epoch_models/model-0002.index (deflated 56%)
updating: exported_model/epoch_models/model-0005.index (deflated 56%)
updating: exported_model/epoch_models/model-0010.data-00000-of-00001 (deflated 9%)
u

# Export datasets

In [None]:
train_data.gen_tf_dataset().save("train_data")
validation_data.gen_tf_dataset().save("validation_data")
test_data.gen_tf_dataset().save("test_data")

In [None]:
!zip -r datasets_postproc.zip train_data validation_data test_data

  adding: train_data/ (stored 0%)
  adding: train_data/11448688883308010032/ (stored 0%)
  adding: train_data/11448688883308010032/00000000.shard/ (stored 0%)
  adding: train_data/11448688883308010032/00000000.shard/00000000.snapshot (deflated 30%)
  adding: train_data/dataset_spec.pb (deflated 30%)
  adding: train_data/snapshot.metadata (stored 0%)
  adding: validation_data/ (stored 0%)
  adding: validation_data/dataset_spec.pb (deflated 30%)
  adding: validation_data/10973640621223305708/ (stored 0%)
  adding: validation_data/10973640621223305708/00000000.shard/ (stored 0%)
  adding: validation_data/10973640621223305708/00000000.shard/00000000.snapshot (deflated 30%)
  adding: validation_data/snapshot.metadata (stored 0%)
  adding: test_data/ (stored 0%)
  adding: test_data/dataset_spec.pb (deflated 30%)
  adding: test_data/7295529981080186486/ (stored 0%)
  adding: test_data/7295529981080186486/00000000.shard/ (stored 0%)
  adding: test_data/7295529981080186486/00000000.shard/000000