In [1]:
!pip install tensorflow==2.15 opencv-python scikit-learn matplotlib




In [14]:
import tensorflow as tf
from tensorflow.keras import layers, models

print(tf.__version__)


2.15.0


In [26]:
from pathlib import Path

RAW_ROOT = Path(r"C:\Users\Ankit Kushwaha\Desktop\leapGestRecog\archive (4)\leapGestRecog")
print("Path exists:", RAW_ROOT.exists())

subject_dirs = sorted([p for p in RAW_ROOT.iterdir() if p.is_dir()])
print("Subjects:", [s.name for s in subject_dirs])



Path exists: True
Subjects: ['00', '01', '02', '03', '04', '05', '06', '07', '08', '09', 'leapGestRecog']


In [27]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("Installed location:", tf.__file__)
from tensorflow.keras import layers, models


TensorFlow version: 2.15.0
Installed location: C:\Users\Ankit Kushwaha\anaconda3\Lib\site-packages\tensorflow\__init__.py


In [28]:
import os, re, random, json
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt


In [29]:
IMG_SIZE = (160, 160)
BATCH_SIZE = 64
SEED = 42
random.seed(SEED)

# Label mapping
gesture_names = ["palm","l","fist","fist_moved","thumb","index","ok","palm_moved","c","palm_down"]
label_map = {name: i for i, name in enumerate(gesture_names)}
print("Label map:", label_map)


Label map: {'palm': 0, 'l': 1, 'fist': 2, 'fist_moved': 3, 'thumb': 4, 'index': 5, 'ok': 6, 'palm_moved': 7, 'c': 8, 'palm_down': 9}


In [30]:
def make_dataset(paths_labels, training=False):
    paths, labels = zip(*paths_labels)
    ds = tf.data.Dataset.from_tensor_slices((list(paths), list(labels)))

    def _load(path, label):
        img = tf.io.read_file(path)
        img = tf.io.decode_png(img, channels=1)
        img = tf.image.resize(img, IMG_SIZE)
        img = tf.image.grayscale_to_rgb(img)
        img = tf.cast(img, tf.float32) / 255.0
        return img, tf.one_hot(label, depth=len(label_map))

    ds = ds.map(_load, num_parallel_calls=tf.data.AUTOTUNE)
    if training:
        aug = tf.keras.Sequential([
            layers.RandomFlip("horizontal"),
            layers.RandomRotation(0.05),
            layers.RandomContrast(0.1),
        ])
        ds = ds.map(lambda x, y: (aug(x, training=True), y))
        ds = ds.shuffle(1000, seed=SEED)
    return ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)


In [31]:
records = []
for subj in subject_dirs:
    for gesture in subj.iterdir():
        gname = gesture.name.split("_")[-1].lower()
        if gname not in label_map: continue
        label = label_map[gname]
        for img in gesture.glob("*.png"):
            records.append((str(img), label, subj.name))

print("Total images:", len(records))

# Split by subjects (8 train, 1 val, 1 test)
subjects = sorted(set(r[2] for r in records))
random.shuffle(subjects)
train_subj, val_subj, test_subj = subjects[:8], subjects[8:9], subjects[9:10]

train_recs = [(p,l) for p,l,s in records if s in train_subj]
val_recs   = [(p,l) for p,l,s in records if s in val_subj]
test_recs  = [(p,l) for p,l,s in records if s in test_subj]

print(f"Train: {len(train_recs)}, Val: {len(val_recs)}, Test: {len(test_recs)}")

train_ds = make_dataset(train_recs, training=True)
val_ds   = make_dataset(val_recs)
test_ds  = make_dataset(test_recs)


Total images: 14000
Train: 11200, Val: 1400, Test: 1400


In [32]:
base = tf.keras.applications.MobileNetV2(input_shape=(*IMG_SIZE,3),
                                         include_top=False,
                                         weights="imagenet")
base.trainable = False

inputs = layers.Input(shape=(*IMG_SIZE,3))
x = tf.keras.applications.mobilenet_v2.preprocess_input(inputs)
x = base(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(256, activation="relu")(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(len(label_map), activation="softmax")(x)

model = models.Model(inputs, outputs)
model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
              loss="categorical_crossentropy",
              metrics=["accuracy"])
model.summary()


Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 160, 160, 3)]     0         
                                                                 
 tf.math.truediv_1 (TFOpLam  (None, 160, 160, 3)       0         
 bda)                                                            
                                                                 
 tf.math.subtract_1 (TFOpLa  (None, 160, 160, 3)       0         
 mbda)                                                           
                                                                 
 mobilenetv2_1.00_160 (Func  (None, 5, 5, 1280)        2257984   
 tional)                                                         
                                                                 
 global_average_pooling2d_1  (None, 1280)              0         
  (GlobalAveragePooling2D)                                 

In [None]:
history = model.fit(train_ds,
                    validation_data=val_ds,
                    epochs=15)


Epoch 1/15



In [None]:
base.trainable = True
for layer in base.layers[:-40]:
    layer.trainable = False

model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
              loss="categorical_crossentropy",
              metrics=["accuracy"])

history_ft = model.fit(train_ds,
                       validation_data=val_ds,
                       epochs=8)


In [None]:
loss, acc = model.evaluate(test_ds)
print("Test accuracy:", acc)


In [None]:
def predict_image(path):
    img = tf.io.read_file(path)
    img = tf.io.decode_png(img, channels=1)
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.image.grayscale_to_rgb(img)
    img = tf.cast(img, tf.float32) / 255.0
    x = tf.expand_dims(img, 0)
    preds = model.predict(x)
    pred_id = np.argmax(preds)
    return gesture_names[pred_id], float(np.max(preds))

print(predict_image(test_recs[0][0]))


In [None]:
import cv2

cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if not ret: break
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    gray = cv2.resize(gray, IMG_SIZE[::-1])
    rgb = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
    x = np.expand_dims(rgb.astype("float32")/255.0, 0)
    preds = model.predict(x, verbose=0)
    pred = gesture_names[np.argmax(preds)]
    conf = np.max(preds)
    cv2.putText(frame, f\"{pred} ({conf:.2f})\", (10,40),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)
    cv2.imshow(\"Gesture\", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'): break
cap.release()
cv2.destroyAllWindows()
