In [7]:
!pip install opencv-python mediapipe pandas



# Dynamic Model

## Collect data for Dynamic model

In [1]:
import cv2
import mediapipe as mp
import numpy as np
import os

mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)

DATASET_PATH = "dynamic_gesture_dataset"
if not os.path.exists(DATASET_PATH):
    os.makedirs(DATASET_PATH)

# Ask user for gesture name
gesture_name = input("Enter gesture name: ")
gesture_folder = os.path.join(DATASET_PATH, gesture_name)

if not os.path.exists(gesture_folder):
    os.makedirs(gesture_folder)

# Video capture
cap = cv2.VideoCapture(0)

sequence = []  # Stores landmark sequences
seq_length = 30  # Number of frames per gesture sample
collecting = False  # Toggle to start/stop recording
sample_count = 0

print("Press 's' to START/STOP collecting, 'n' for a new gesture, 'q' to quit")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert frame to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb_frame)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Extract landmark coordinates
            landmarks = []
            for lm in hand_landmarks.landmark:
                landmarks.extend([lm.x, lm.y, lm.z])  # Store x, y, z positions

            if collecting:
                sequence.append(landmarks)

            # If sequence reaches required length, save it
            if len(sequence) == seq_length:
                np.save(os.path.join(gesture_folder, f"sample_{sample_count}.npy"), np.array(sequence))
                print(f"Saved sample {sample_count} for {gesture_name}")
                sequence = []  # Reset sequence
                sample_count += 1

    # Display Status
    status_text = f"Gesture: {gesture_name} | Collecting: {'ON' if collecting else 'OFF'} | Samples: {sample_count}"
    cv2.putText(frame, status_text, (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    cv2.imshow("Dynamic Gesture Capture", frame)

    # Key Handling
    key = cv2.waitKey(1) & 0xFF
    if key == ord('s'):  # Toggle data collection
        collecting = not collecting
        print(f"Data collection {'started' if collecting else 'paused'}")
    elif key == ord('n'):  # Enter new gesture name
        gesture_name = input("Enter new gesture name: ")
        gesture_folder = os.path.join(DATASET_PATH, gesture_name)
        os.makedirs(gesture_folder, exist_ok=True)
        sample_count = 0
        print(f"Switched to gesture: {gesture_name}")
    elif key == ord('q'):  # Quit
        break

cap.release()
cv2.destroyAllWindows()


Enter gesture name:  NO


Press 's' to START/STOP collecting, 'n' for a new gesture, 'q' to quit
Data collection started
Saved sample 0 for NO
Saved sample 1 for NO
Saved sample 2 for NO
Saved sample 3 for NO
Saved sample 4 for NO
Saved sample 5 for NO
Saved sample 6 for NO
Saved sample 7 for NO
Saved sample 8 for NO
Saved sample 9 for NO
Saved sample 10 for NO
Saved sample 11 for NO
Saved sample 12 for NO
Saved sample 13 for NO
Saved sample 14 for NO
Saved sample 15 for NO
Saved sample 16 for NO
Saved sample 17 for NO
Saved sample 18 for NO
Saved sample 19 for NO
Saved sample 20 for NO
Saved sample 21 for NO
Saved sample 22 for NO
Data collection paused


Enter new gesture name:  FIRE


Switched to gesture: FIRE
Data collection started
Saved sample 0 for FIRE
Saved sample 1 for FIRE
Saved sample 2 for FIRE
Saved sample 3 for FIRE
Saved sample 4 for FIRE
Saved sample 5 for FIRE
Saved sample 6 for FIRE
Saved sample 7 for FIRE
Saved sample 8 for FIRE
Saved sample 9 for FIRE
Saved sample 10 for FIRE
Saved sample 11 for FIRE
Saved sample 12 for FIRE
Saved sample 13 for FIRE
Saved sample 14 for FIRE
Saved sample 15 for FIRE
Saved sample 16 for FIRE
Saved sample 17 for FIRE
Saved sample 18 for FIRE
Saved sample 19 for FIRE
Saved sample 20 for FIRE
Saved sample 21 for FIRE
Saved sample 22 for FIRE
Saved sample 23 for FIRE
Saved sample 24 for FIRE
Saved sample 25 for FIRE
Saved sample 26 for FIRE
Saved sample 27 for FIRE
Saved sample 28 for FIRE
Saved sample 29 for FIRE
Data collection paused


Enter new gesture name:  YOYO


Switched to gesture: YOYO
Data collection started
Saved sample 0 for YOYO
Saved sample 1 for YOYO
Saved sample 2 for YOYO
Saved sample 3 for YOYO
Saved sample 4 for YOYO
Saved sample 5 for YOYO
Saved sample 6 for YOYO
Saved sample 7 for YOYO
Saved sample 8 for YOYO
Saved sample 9 for YOYO
Saved sample 10 for YOYO
Saved sample 11 for YOYO
Saved sample 12 for YOYO
Saved sample 13 for YOYO
Saved sample 14 for YOYO
Saved sample 15 for YOYO
Saved sample 16 for YOYO
Saved sample 17 for YOYO
Saved sample 18 for YOYO
Saved sample 19 for YOYO
Saved sample 20 for YOYO
Saved sample 21 for YOYO
Saved sample 22 for YOYO
Saved sample 23 for YOYO
Saved sample 24 for YOYO
Saved sample 25 for YOYO
Data collection paused


In [20]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.18.0-cp312-cp312-win_amd64.whl.metadata (3.3 kB)
Collecting tensorflow-intel==2.18.0 (from tensorflow)
  Downloading tensorflow_intel-2.18.0-cp312-cp312-win_amd64.whl.metadata (4.9 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting termcolor>=1.1.0 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading termcolor-2.5.0-py3-none-any.whl.metadata (6.1 kB)
Collecting grpcio<2.0,>=1.24.3 (from tenso

## Prepares Data for training Dynamic Model

In [25]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Path to dataset
DATASET_PATH = "dynamic_gesture_dataset"

# Get gesture labels (folder names)
gesture_labels = sorted(os.listdir(DATASET_PATH))
num_classes = len(gesture_labels)
print("Classes:", gesture_labels)

# Encode labels using LabelEncoder
label_encoder = LabelEncoder()
y_labels = label_encoder.fit_transform(gesture_labels)  # Encode gesture labels

# Save label encoder for future inference
np.save("dynamic_label_encoder.npy", label_encoder.classes_)
print("Label encoder saved as 'dynamic_label_encoder.npy'")

# Load sequences and labels
X, y = [], []

for label, gesture in zip(y_labels, gesture_labels):
    gesture_path = os.path.join(DATASET_PATH, gesture)
    for file in os.listdir(gesture_path):
        if file.endswith(".npy"):
            data = np.load(os.path.join(gesture_path, file))  # Load numpy file
            X.append(data)  # Store sequence
            y.append(label)  # Store encoded label

X = np.array(X)
y = np.array(y)

# Normalize data (optional but recommended)
X = X / np.max(X)

# One-hot encode labels
y = to_categorical(y, num_classes)

# Split into training & test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Dataset loaded! X shape:", X.shape, "y shape:", y.shape)


Classes: ['.ipynb_checkpoints', 'FIRE', 'NO', 'WAVE', 'YOYO']
Label encoder saved as 'dynamic_label_encoder.npy'
Dataset loaded! X shape: (120, 30, 63) y shape: (120, 5)


## Build Dynamic Model

In [27]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout

# Define model with Input layer
model = Sequential([
    Input(shape=(30, X.shape[2])),  # Correct way to specify input shape
    LSTM(64, return_sequences=True, activation="relu"),
    LSTM(64, return_sequences=False, activation="relu"),
    Dense(64, activation="relu"),
    Dropout(0.2),
    Dense(32, activation="relu"),
    Dropout(0.2),
    Dense(num_classes, activation="softmax")  # Output layer
])

model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model.summary()


## Train and save Dynamic Model

In [29]:
# Train the model
history = model.fit(X_train, y_train, epochs=30, batch_size=16, validation_data=(X_test, y_test))

# Save the trained model
model.save("gesture_model.keras")  # Recommended format

print("Model trained and saved as gesture_model.keras")


Epoch 1/30
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 125ms/step - accuracy: 0.0534 - loss: 1.6419 - val_accuracy: 0.3333 - val_loss: 1.5951
Epoch 2/30
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.4089 - loss: 1.5820 - val_accuracy: 0.6250 - val_loss: 1.5648
Epoch 3/30
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.5972 - loss: 1.5531 - val_accuracy: 0.6250 - val_loss: 1.5279
Epoch 4/30
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.4318 - loss: 1.4995 - val_accuracy: 0.6250 - val_loss: 1.3410
Epoch 5/30
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.5939 - loss: 1.2663 - val_accuracy: 0.6250 - val_loss: 1.1175
Epoch 6/30
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.5082 - loss: 1.2337 - val_accuracy: 0.4167 - val_loss: 1.0502
Epoch 7/30
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━

# Static Model


## Collect Data for Static Model from livefeed

In [4]:
import cv2
import numpy as np
import mediapipe as mp
import pandas as pd
import os

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)
mp_draw = mp.solutions.drawing_utils

file_path = "gesture_dataset.csv"

# Define column names (21 landmarks * 3 coordinates + label)
columns = [f"{c}{i}" for i in range(21) for c in ['x', 'y', 'z']] + ["label"]

# Ensure dataset exists; if not, create it
if not os.path.exists(file_path):
    pd.DataFrame(columns=columns).to_csv(file_path, index=False)

# Start capturing from webcam
cap = cv2.VideoCapture(0)

print("Press 'N' to enter a new gesture label.")
print("Press 'W' to start recording, 'S' to stop recording.")
print("Press 'Q' to quit.")

recording = False
label = None

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert to RGB for MediaPipe processing
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb_frame)

    # Draw landmarks if a hand is detected
    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    # Display status
    status_text = f"Recording: {recording} | Label: {label if label else 'None'}"
    cv2.putText(frame, status_text, (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow("Hand Gesture Capture", frame)

    key = cv2.waitKey(1) & 0xFF  # Proper key detection

    # Enter new label
    if key == ord('n'):
        print("Enter new gesture label and press Enter in the terminal:")
        label = input().strip()
        print(f"New label set: {label}")

    # Start recording
    elif key == ord('w') and label:
        recording = True
        print(f"Started recording gesture: {label}")

    # Stop recording
    elif key == ord('s'):
        recording = False
        print("Stopped recording.")

    # Quit the program
    elif key == ord('q'):
        print("Exiting...")
        break

    # Save detected hand landmarks to CSV
    if recording and label and result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            landmarks = np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]).flatten()

            # Append new gesture data to the CSV file
            new_row = pd.DataFrame([np.append(landmarks, label)], columns=columns)
            new_row.to_csv(file_path, mode='a', header=False, index=False)

            print(f"Saved gesture: {label}")

cap.release()
cv2.destroyAllWindows()


Press 'N' to enter a new gesture label.
Press 'W' to start recording, 'S' to stop recording.
Press 'Q' to quit.
Enter new gesture label and press Enter in the terminal:


 PEACE


New label set: PEACE
Started recording gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
Saved gesture: PEACE
S

## Prepare Data for training Static Model

In [19]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# Load dataset
file_path = "gesture_dataset.csv"
data = pd.read_csv(file_path)

# Separate features (X) and labels (y)
X = data.iloc[:, :-1].values  # All columns except last (landmarks)
y = data.iloc[:, -1].values   # Last column (gesture labels)

# Encode labels (convert text to numbers)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# One-hot encode labels (for categorical classification)
y_onehot = keras.utils.to_categorical(y_encoded)

# Split into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)

# Normalize inputs (optional, but helps with training stability)
X_train = X_train / np.max(X_train)
X_test = X_test / np.max(X_test)

# Save label mapping for future use
np.save("static_label_encoder.npy", label_encoder.classes_)

print("Data Loaded & Preprocessed!")


Data Loaded & Preprocessed!


## Build Static Model

In [21]:
# Define the model
model = keras.Sequential([
    keras.layers.Input(shape=(X_train.shape[1],)),  # Input layer (landmark data)
    keras.layers.Dense(128, activation='relu'),  # Hidden layer 1
    keras.layers.Dense(64, activation='relu'),   # Hidden layer 2
    keras.layers.Dense(y_onehot.shape[1], activation='softmax')  # Output layer (gesture classes)
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

print("Model Created!")


Model Created!


## Train and save Static Model

In [23]:
# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Save trained model
model.save("gesture_static_model.keras")

print("Model Trained & Saved!")


Epoch 1/50
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.4387 - loss: 1.4183 - val_accuracy: 0.9350 - val_loss: 0.7111
Epoch 2/50
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9719 - loss: 0.5092 - val_accuracy: 0.9950 - val_loss: 0.1661
Epoch 3/50
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.1240 - val_accuracy: 1.0000 - val_loss: 0.0621
Epoch 4/50
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0465 - val_accuracy: 1.0000 - val_loss: 0.0315
Epoch 5/50
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0257 - val_accuracy: 1.0000 - val_loss: 0.0204
Epoch 6/50
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0157 - val_accuracy: 1.0000 - val_loss: 0.0132
Epoch 7/50
[1m75/75[0m [32m━━━━━━━━━━

# Run Live-Feed Hand Gesture Detection

In [31]:
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
from tensorflow import keras
from collections import deque

# Load trained models
static_model = keras.models.load_model("gesture_static_model.keras")  # Static gesture model
dynamic_model = keras.models.load_model("gesture_model.keras")  # Dynamic gesture model

# Load separate label encoders for static & dynamic models
static_label_encoder = np.load("static_label_encoder.npy", allow_pickle=True)
dynamic_label_encoder = np.load("dynamic_label_encoder.npy", allow_pickle=True)

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)
mp_draw = mp.solutions.drawing_utils

# Buffer for dynamic gesture sequences
sequence_length = 30  # Number of frames for a dynamic gesture
sequence = deque(maxlen=sequence_length)

# Start webcam
cap = cv2.VideoCapture(0)

print("Running Static & Dynamic Gesture Prediction... Press 'Q' to quit.")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert to RGB for processing
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb_frame)

    # Default predictions
    static_pred = "No Hand"
    dynamic_pred = "Collecting..."

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Extract 21 landmark points (x, y, z)
            landmarks = np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]).flatten()

            # Normalize landmarks
            landmarks = landmarks / np.max(landmarks)

            # **Static Gesture Prediction**
            static_predictions = static_model.predict(np.expand_dims(landmarks, axis=0), verbose=0)
            static_pred = static_label_encoder[np.argmax(static_predictions)]

            # **Dynamic Gesture Prediction**
            sequence.append(landmarks)  # Add new frame to the sequence
            if len(sequence) == sequence_length:
                input_sequence = np.expand_dims(np.array(sequence), axis=0)
                dynamic_predictions = dynamic_model.predict(input_sequence, verbose=0)
                dynamic_pred = dynamic_label_encoder[np.argmax(dynamic_predictions)]

    # Display predicted gestures
    cv2.putText(frame, f"Static: {static_pred}", (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.putText(frame, f"Dynamic: {dynamic_pred}", (10, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

    cv2.imshow("Hand Gesture Recognition", frame)

    # Press 'Q' to quit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Running Static & Dynamic Gesture Prediction... Press 'Q' to quit.
