In [12]:
import os
import tensorflow as tf
import pickle
from utils import LABELS
import random
import numpy as np

In [13]:
def preprocess_data(landmarks):
    processed_landmarks = []
    # set origin to 0
    # expand it
    min_x = float('inf')
    min_y = float('inf')
    min_z = float('inf')
    max_x = 0
    max_y = 0
    max_z = 0
    for point in landmarks:
        x = point[0]
        y = point[1]
        z = point[2]
        min_x = min(min_x, x)
        min_y = min(min_y, y)
        min_z = min(min_z, z)
        max_x = max(max_x, x)
        max_y = max(max_y, y)
        max_z = max(max_z, z)

    offset_x = -min_x
    offset_y = -min_y
    offset_z = -min_z
    scaler_x = 1 / (max_x + offset_x)
    scaler_y = 1 / (max_y + offset_y)
    scaler_z = 1 / (max_z + offset_z)
    scaler = min(scaler_x, scaler_y)

    for point in landmarks:
        x = (point[0] + offset_x) * scaler
        y = (point[1] + offset_y) * scaler
        z = (point[2] + offset_z) * scaler_z
        processed_landmarks.append([x, y, z])

    return processed_landmarks    

def split_train_test(x, y, test_size=0.3):
    # shuffle data
    temp = list(zip(x, y))
    random.shuffle(temp)
    x, y = zip(*temp)
    x, y = list(x), list(y)
    
    # split data
    cutoff_index = int(len(x) * (1 - test_size))
    x_train = x[:cutoff_index]
    x_test = x[cutoff_index:]
    y_train = y[:cutoff_index]
    y_test = y[cutoff_index:]

    return x_train, y_train, x_test, y_test


In [14]:
data_root = os.path.abspath('raw_data')
all_paths = list(tf.io.gfile.glob(data_root + r'/*/*'))
if not all_paths:
    raise ValueError('Image dataset directory is empty.')

x = []
y = []

for path in all_paths:
    label = os.path.basename(os.path.dirname(path))
    hand_data = pickle.load(open(path, 'rb'))
    for frame in hand_data:
        processed_data = preprocess_data(frame.hand)
        x.append(processed_data)
        y.append(LABELS[label])

assert len(x) == len(y)
print(f"Total data length: {len(x)}")

_x_train, _y_train, _x_test, _y_test = split_train_test(x, y)
print(f"Train data length: {len(_x_train)}")
print(f"Test data length: {len(_x_test)}")

x_train = np.array(_x_train)
y_train = np.array(_y_train)
x_test = np.array(_x_test)
y_test = np.array(_y_test)

Total data length: 1600
Train data length: 1120
Test data length: 480


In [15]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(21, 3)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(8)
])

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

In [16]:
model.fit(x_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x15ee1bbd0>

In [17]:
model.evaluate(x_test,  y_test, verbose=2) # type: ignore

15/15 - 0s - loss: 0.1640 - accuracy: 1.0000 - 62ms/epoch - 4ms/step


[0.16401298344135284, 1.0]

In [18]:
probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])

probability_model.save('gesture_recognizer.keras')

In [19]:
predictions = probability_model(np.array([x_test[0]])).numpy()[0]   #type: ignore
prediction = np.argmax(predictions)
probability = predictions[prediction]
print(prediction)
print(y_test[0])

4
4
