In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

%load_ext autoreload 
%autoreload 2

In [2]:
def build_encoder(embedding_dim=64):
    inp = keras.Input(shape=(None, 3))  # variable-length sequence of (x,y,z)

    x = layers.Masking(mask_value=0.0)(inp)
    x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x)
    x = layers.Bidirectional(layers.LSTM(32))(x)
    x = layers.Dense(128, activation="relu")(x) 
    out = layers.Dense(embedding_dim)(x)   # final embed

    return keras.Model(inp, out, name="gesture_encoder")

encoder = build_encoder()
encoder.summary()

In [3]:
gest1 = keras.Input(shape=(None, 3))
gest2 = keras.Input(shape=(None, 3))

enc1 = encoder(gest1)
enc2 = encoder(gest2)

# L2 distance
distance = layers.Lambda(lambda x: tf.norm(x[0] - x[1], axis=1, keepdims=True))([enc1, enc2])

siamese = keras.Model([gest1, gest2], distance, name="siamese_network")
siamese.summary()




In [4]:
def contrastive_loss(y_true, y_pred, margin=1.0):
    # y_true: 0 = same gesture, 1 = different
    # y_pred: distance between embeddings
    return tf.reduce_mean(
        (1 - y_true) * tf.square(y_pred) +
        (y_true) * tf.square(tf.maximum(margin - y_pred, 0))
    )

In [5]:
siamese.compile(
    optimizer=keras.optimizers.Adam(1e-3),
    loss=contrastive_loss
)

In [6]:
import numpy as np

data = np.load('gesture_dataset.npz', allow_pickle=True)  # must allow pickle for object arrays
X = data['X']  # gestures
y = data['y']  # labels
class_to_label = data['class_to_label'].item()  # convert from 0-d object to dict


In [7]:
import random

def generate_pairs(X, y, num_pairs_per_class=50):
    """
    X: list of gestures (variable-length sequences of (x,y,z))
    y: list of integer labels
    Returns:
        gest1, gest2, labels
    """
    gest1_pairs = []
    gest2_pairs = []
    labels = []

    class_indices = {}
    for idx, label in enumerate(y):
        class_indices.setdefault(label, []).append(idx)

    all_labels = list(class_indices.keys())

    # Positive pairs
    for label, indices in class_indices.items():
        for _ in range(num_pairs_per_class):
            i1, i2 = random.sample(indices, 2)  # two different gestures of same class
            gest1_pairs.append(X[i1])
            gest2_pairs.append(X[i2])
            labels.append(0)

    # Negative pairs
    for _ in range(len(labels)):
        label1, label2 = random.sample(all_labels, 2)
        i1 = random.choice(class_indices[label1])
        i2 = random.choice(class_indices[label2])
        gest1_pairs.append(X[i1])
        gest2_pairs.append(X[i2])
        labels.append(1)

    return gest1_pairs, gest2_pairs, np.array(labels)


In [8]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

def pad_gestures(gestures):
    """
    Pad each gesture (list of (x,y,z)) to the same length
    """
    return pad_sequences(
        gestures, 
        padding='post', 
        dtype='float32'
    )


In [9]:
gest1_pairs, gest2_pairs, pair_labels = generate_pairs(X, y)
gest1_padded = pad_gestures(gest1_pairs)
gest2_padded = pad_gestures(gest2_pairs)

In [10]:
siamese.fit(
    [gest1_padded, gest2_padded],
    pair_labels,
    batch_size=16,
    epochs=20,
    validation_split=0.2  # keep 20% for validation
)


Epoch 1/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 64ms/step - loss: 0.2112 - val_loss: 0.1521
Epoch 2/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - loss: 0.1502 - val_loss: 0.1584
Epoch 3/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - loss: 0.1163 - val_loss: 0.1118
Epoch 4/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - loss: 0.0947 - val_loss: 0.0932
Epoch 5/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 0.0701 - val_loss: 0.0461
Epoch 6/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 0.0540 - val_loss: 0.0370
Epoch 7/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - loss: 0.0407 - val_loss: 0.0266
Epoch 8/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 0.0362 - val_loss: 0.0200
Epoch 9/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1629a2b8260>

In [11]:
encoder.save("gesture_encoder_model.keras")