# Dependencies

In [1]:
%%capture
!pip install pandas
!pip install pyarrow
!pip install tensorflow
!pip install protobuf==3.20.*
!pip install mediapipe==0.9.0.1

In [2]:
import os
import json
import shutil
import random
import matplotlib
import numpy as np
from copy import deepcopy
from itertools import chain
from collections import deque
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from matplotlib import animation, rc
from IPython.display import display, Image

import cv2
import pandas as pd
import mediapipe as mp
import tensorflow as tf
from tensorflow import keras
import pyarrow.parquet as pq
from tensorflow.keras import layers
from mediapipe.framework.formats import landmark_pb2

# For extraction and drawing
mp_pose = mp.solutions.pose
mp_hands = mp.solutions.hands
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils 
mp_drawing_styles = mp.solutions.drawing_styles

In [3]:
!python --version
print("TensorFlow v" + tf.__version__)
print("Mediapipe v" + mp.__version__)

Python 3.9.13
TensorFlow v2.14.0
Mediapipe v0.9.0.1


In [4]:
seed = 42
random.seed(seed)
np.random.seed(seed)
cv2.setRNGSeed(seed)
tf. random.set_seed(seed)

# Dataset preparation

In [18]:
# Pose coordinates for hand movement.
LPOSE = [13, 15, 17, 19, 21]
RPOSE = [14, 16, 18, 20, 22]
POSE = LPOSE + RPOSE

def extract_from_result(res):
    # Extract specific pose landmarks if available
    px, py, pz = [[]]*3
    if res.pose_landmarks:
        for i in POSE:
            lm = res.pose_landmarks.landmark[i]
            px.append(lm.x)
            py.append(lm.y)
            pz.append(lm.z)
    else:
        px, py, pz = [[0.]*len(POSE)]*3

    # Extract left hand landmarks if available
    lx, ly, lz = [[]]*3
    if res.left_hand_landmarks:
        for lm in res.left_hand_landmarks.landmark:
            lx.append(lm.x)
            ly.append(lm.y)
            lz.append(lm.z)
    else:
        lx, ly, lz = [[0.]*21]*3

    # Extract right hand landmarks if available
    rx, ry, rz = [[]]*3
    if res.right_hand_landmarks:
        for lm in res.right_hand_landmarks.landmark:
            rx.append(lm.x)
            ry.append(lm.y)
            rz.append(lm.z)
    else:
        rx, ry, rz = [[0.]*21]*3

    return list(chain(rx, lx, px, ry, ly, py, rz, lz, pz))

In [19]:
def load_data_from_video(path_to_video: str):
    data = []
    video = cv2.VideoCapture(path_to_video)
    try:
        with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
            while True:
                _, frame = video.read()
                if frame is None:
                    break

                frame.flags.writeable = False
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                results = holistic.process(frame)
                data.append(extract_from_result(results))
    finally:
        video.release()
        
    return data

In [20]:
MAX_LEN = 128

def preprocess_data(data: list):
    sliding_window = deque(maxlen=MAX_LEN)

    sequences = []
    for pose in data:
        sliding_window.append(preprocess_lms(pose))
        if len(sliding_window) == MAX_LEN:
            seq = deepcopy(list(sliding_window))
            sequences.append(seq)
        
    return  sequences

In [21]:
x = []
y = []
for i in tqdm(range(1, 6)):
    data = load_data_from_video(f"signing samples/{i}.mp4")
    d = preprocess_data(data)
    x.extend(d)
    y.extend([1]*len(d))
    
for i in tqdm(range(1, 10)):
    data = load_data_from_video(f"not signing samples/{i}.mp4")
    d = preprocess_data(data)
    x.extend(d)
    y.extend([0]*len(d))

x = np.array(x)
y = np.array(y)
perm = np.random.permutation(len(x))

x = x[perm]
y = y[perm]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

In [22]:
x.shape

(0,)

In [15]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

model = Sequential()
model.add(LSTM(16, input_shape=(MAX_LEN, 156)))
model.add(Dense(2, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 16)                11072     
                                                                 
 dense_1 (Dense)             (None, 2)                 34        
                                                                 
Total params: 11106 (43.38 KB)
Trainable params: 11106 (43.38 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [16]:
model.fit(x[:10000], y[:10000], validation_split=0.3, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x175008b7dc0>

In [41]:
class SignDetectionModel(tf.Module):
    def __init__(self, model: Sequential):
        super(SignDetectionModel, self).__init__()
        self.model = model

    @tf.function(input_signature=[
        tf.TensorSpec(shape=[tf.constant(MAX_LEN, dtype=tf.int32), tf.constant(156, dtype=tf.int32)], dtype=tf.float32),
    ])
    def predict(self, landmarks):
        # Inference
        landmarks = tf.expand_dims(landmarks, axis=0)
        logits = self.model(landmarks)

        probabilities = tf.nn.softmax(logits)[0]
        pred = tf.argmax(probabilities)
        return {"result" : pred}

In [42]:
signing_detector = SignDetectionModel(model)

In [43]:
signing_detector.predict(np.zeros((MAX_LEN, 156)))

{'result': <tf.Tensor: shape=(), dtype=int64, numpy=0>}

In [44]:
save_model_name = "saved_model"
if os.path.isdir(save_model_name):
    print(f"A model with the same name has already been saved!")
else:
    tf.saved_model.save(signing_detector, export_dir=save_model_name)

INFO:tensorflow:Assets written to: saved_model\assets


INFO:tensorflow:Assets written to: saved_model\assets
