### Collecting the dataset using webcam

In [None]:
import cv2
import numpy as np
import os
import mediapipe as mp

# Define dataset path
DATA_PATH = "SignLanguageDataset"
actions = np.array(["Hello", "Yes", "No", "Please", "ThankYou", "Mother", "Father", "Love", "Baby", "Sorry", "You're welcome"])

sequence_length = 30  # Number of frames per sequence
num_sequences = 60  # Number of sequences per action

# Initialize MediaPipe
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Create directories
for action in actions:
    os.makedirs(os.path.join(DATA_PATH, action), exist_ok=True)

# Function to extract keypoints from the frame
def extract_landmarks(image, holistic):
    """Extract hand and pose landmarks using MediaPipe"""
    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = holistic.process(img_rgb)

    # Extract keypoints
    pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]) if results.pose_landmarks else np.zeros((33, 3))
    left_hand = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]) if results.left_hand_landmarks else np.zeros((21, 3))
    right_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]) if results.right_hand_landmarks else np.zeros((21, 3))

    return np.concatenate([pose.flatten(), left_hand.flatten(), right_hand.flatten()])

# Open webcam
cap = cv2.VideoCapture(0)

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    for action in actions:
        for sequence in range(120,180):
            print(f"📷 Recording {action} - Sequence {sequence + 1}/{num_sequences}")

            # Wait before starting recording
            for frame in range(5):
                ret, frame = cap.read()
                cv2.putText(frame, f"GET READY: {action}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                cv2.imshow("Webcam", frame)
                cv2.waitKey(1000)

            # Collect sequence data
            for frame_idx in range(sequence_length):
                ret, frame = cap.read()

                # Extract keypoints
                keypoints = extract_landmarks(frame, holistic)
                print(f"Extracted Keypoints: {keypoints.shape}") 

                # Save keypoints
                keypoints_path = os.path.join(DATA_PATH, action, f"{sequence}_{frame_idx}.npy")
                np.save(keypoints_path, keypoints)

                # Show frame with landmarks
                mp_drawing.draw_landmarks(frame, holistic.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).pose_landmarks, mp_holistic.POSE_CONNECTIONS)
                cv2.putText(frame, f"Collecting {action} | sequence {sequence} | Frame {frame_idx + 1}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                cv2.imshow("Webcam", frame)

                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

cap.release()
cv2.destroyAllWindows()

print("✅ Data collection complete! All keypoints saved.")


In [2]:
cap.release()
cv2.destroyAllWindows()

### Pre-processing the collected data

In [5]:
import numpy as np
import os

# Define dataset path
DATA_PATH = "SignLanguageDataset"

# Parameters
num_sequences = 60  # Number of sequences per action
sequence_length = 30  # Number of frames per sequence
actions = np.array(["Hello", "Yes", "No", "Please", "ThankYou", "Mother", "Father", "Love", "Baby", "Sorry", "You're welcome"])

# Load data
X, Y = [], []
for action_idx, action in enumerate(actions):
    for sequence in range(num_sequences):  # Assume 100 sequences per action
        sequence_data = []
        for frame in range(sequence_length):
            keypoints_path = os.path.join(DATA_PATH, action, f"{sequence}_{frame}.npy")
            keypoints = np.load(keypoints_path)  # Load keypoint data
            sequence_data.append(keypoints)
        X.append(sequence_data)
        Y.append(action_idx)

# Convert to NumPy arrays
X = np.array(X)
Y = np.array(Y)

# Save processed data
np.save("X_sequences.npy", X)
np.save("Y_labels.npy", Y)

print("Data preprocessing complete!")


Data preprocessing complete!


### Create the model and training

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
import numpy as np

# Load processed data
X = np.load("X_sequences.npy")
Y = np.load("Y_labels.npy")

# One-hot encode labels
Y = tf.keras.utils.to_categorical(Y, num_classes=len(set(Y)))

# Split dataset
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

# Define LSTM model
model = Sequential([
    LSTM(64, return_sequences=True, activation='relu', input_shape=(X.shape[1], X.shape[2])),
    Dropout(0.2),
    LSTM(128, return_sequences=True, activation='relu'),
    Dropout(0.3),
    LSTM(64, activation='relu'),
    Dense(64, activation='relu'),
    Dense(len(set(Y.argmax(axis=1))), activation='softmax')  # Output layer (one neuron per gesture)
])

# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X_train, Y_train, epochs=50, batch_size=32, validation_data=(X_val, Y_val))

# Save model
model.save("lstm_sign_language_model_3")
print("LSTM model training complete!")


Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
INFO:tensorflow:Assets written to: lstm_sign_language_model_3\assets


INFO:tensorflow:Assets written to: lstm_sign_language_model_3\assets


LSTM model training complete!


In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from sklearn.model_selection import train_test_split
import numpy as np

# Load processed data
X = np.load("X_sequences.npy")
Y = np.load("Y_labels.npy")

# One-hot encode labels
Y = tf.keras.utils.to_categorical(Y, num_classes=len(set(Y)))

X = X / np.max(X)

# Split dataset
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

# Define LSTM model
model = Sequential([
    LSTM(128, return_sequences=True, activation='relu', input_shape=(X.shape[1], X.shape[2])),
    Dropout(0.2),
    LSTM(256, return_sequences=True, activation='relu'),
    Dropout(0.3),
    LSTM(128, return_sequences=True, activation='relu'),
    Dropout(0.3),
    LSTM(64, activation='relu'),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(len(set(Y.argmax(axis=1))), activation='softmax')
])

In [17]:
from tensorflow.keras.callbacks import EarlyStopping


# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

model.fit(X_train, Y_train, epochs=100, batch_size=32, validation_data=(X_val, Y_val))

# Save model
model.save("lstm_sign_language_model_4")
print("LSTM model training complete!")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

INFO:tensorflow:Assets written to: lstm_sign_language_model_4\assets


LSTM model training complete!


### Evaluation of the model

In [19]:
model.evaluate(X_val, Y_val)



[0.7007331252098083, 0.9166666865348816]

In [20]:
y = model.predict(X_val[:1])
print(actions[np.argmax(y)])
print(actions[np.argmax(Y_val[0])])


You're welcome
You're welcome


### Check the real-time prediction

In [11]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model
from collections import deque

# Load trained LSTM model
model = load_model("lstm_sign_language_model_3")

# Define gestures
gestures = ["Hello", "Yes", "No", "Please", "ThankYou", "Mother", "Father", "Love", "Baby", "Sorry", "You're welcome"]

# Initialize MediaPipe
mp_holistic = mp.solutions.holistic
cap = cv2.VideoCapture(0)

# Store the last 30 frames
sequence = deque(maxlen=30)

def extract_landmarks(image):
    """Extract pose and hand keypoints from an image"""
    with mp_holistic.Holistic(static_image_mode=False, min_detection_confidence=0.5) as holistic:
        img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = holistic.process(img_rgb)

        # Extract keypoints
        pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]) if results.pose_landmarks else np.zeros((33, 3))
        left_hand = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]) if results.left_hand_landmarks else np.zeros((21, 3))
        right_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]) if results.right_hand_landmarks else np.zeros((21, 3))

        return np.concatenate([pose.flatten(), left_hand.flatten(), right_hand.flatten()])

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        break

    # Extract keypoints
    keypoints = extract_landmarks(frame)
    sequence.append(keypoints)

    if len(sequence) == 30:  # Make prediction only if we have 30 frames
        input_sequence = np.expand_dims(np.array(sequence), axis=0)  # Reshape for model input
        prediction = model.predict(input_sequence)
        predicted_label = np.argmax(prediction)

        # Display prediction
        cv2.putText(frame, gestures[predicted_label], (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)

    cv2.imshow("Sign Language Recognition", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()




### creating the tensorflow.js file from the model for web application

In [None]:
import tensorflow as tf
import tensorflowjs as tfjs

# Load trained model
model = tf.keras.models.load_model("lstm_sign_language_model.h5")

# Convert and save the model for TensorFlow.js
tfjs.converters.save_keras_model(model, "tfjs_model")

print("✅ Model converted to TensorFlow.js format!")
