In [None]:
import cv2
import numpy as np
import os
import mediapipe as mp

# Define dataset path
DATA_PATH = "SignLanguageDataset"
actions = np.array(["Hello", "Yes", "No", "Please", "ThankYou"])  # Modify based on signs you need
sequence_length = 30  # Number of frames per sequence
num_sequences = 60  # Number of sequences per action

# Initialize MediaPipe
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Create directories
for action in actions:
    os.makedirs(os.path.join(DATA_PATH, action), exist_ok=True)

# Function to extract keypoints from the frame
def extract_landmarks(image, holistic):
    """Extract hand and pose landmarks using MediaPipe"""
    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = holistic.process(img_rgb)

    # Extract keypoints
    pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]) if results.pose_landmarks else np.zeros((33, 3))
    left_hand = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]) if results.left_hand_landmarks else np.zeros((21, 3))
    right_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]) if results.right_hand_landmarks else np.zeros((21, 3))

    return np.concatenate([pose.flatten(), left_hand.flatten(), right_hand.flatten()])

# Open webcam
cap = cv2.VideoCapture(0)

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    for action in actions:
        for sequence in range(num_sequences):
            print(f"📷 Recording {action} - Sequence {sequence + 1}/{num_sequences}")

            # Wait before starting recording
            for frame in range(5):
                ret, frame = cap.read()
                cv2.putText(frame, f"GET READY: {action}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                cv2.imshow("Webcam", frame)
                cv2.waitKey(1000)

            # Collect sequence data
            for frame_idx in range(sequence_length):
                ret, frame = cap.read()

                # Extract keypoints
                keypoints = extract_landmarks(frame, holistic)

                # Save keypoints
                keypoints_path = os.path.join(DATA_PATH, action, f"{sequence}_{frame_idx}.npy")
                np.save(keypoints_path, keypoints)

                # Show frame with landmarks
                mp_drawing.draw_landmarks(frame, holistic.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).pose_landmarks, mp_holistic.POSE_CONNECTIONS)
                cv2.putText(frame, f"Collecting {action} | sequence {sequence} | Frame {frame_idx + 1}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                cv2.imshow("Webcam", frame)

                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

cap.release()
cv2.destroyAllWindows()

print("✅ Data collection complete! All keypoints saved.")


📷 Recording Yes - Sequence 60/60
✅ Data collection complete! All keypoints saved.


In [8]:
cap.release()
cv2.destroyAllWindows()

In [5]:
import numpy as np
import os

# Define dataset path
DATA_PATH = "SignLanguageDataset"

# Parameters
num_sequences = 60  # Number of sequences per action
sequence_length = 30  # Number of frames per sequence
actions = np.array(["Hello", "Yes", "No", "Please", "ThankYou"])

# Load data
X, Y = [], []
for action_idx, action in enumerate(actions):
    for sequence in range(num_sequences):  # Assume 100 sequences per action
        sequence_data = []
        for frame in range(sequence_length):
            keypoints_path = os.path.join(DATA_PATH, action, f"{sequence}_{frame}.npy")
            keypoints = np.load(keypoints_path)  # Load keypoint data
            sequence_data.append(keypoints)
        X.append(sequence_data)
        Y.append(action_idx)

# Convert to NumPy arrays
X = np.array(X)
Y = np.array(Y)

# Save processed data
np.save("X_sequences.npy", X)
np.save("Y_labels.npy", Y)

print("Data preprocessing complete!")


Data preprocessing complete!


In [7]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
import numpy as np

# Load processed data
X = np.load("X_sequences.npy")
Y = np.load("Y_labels.npy")

# One-hot encode labels
Y = tf.keras.utils.to_categorical(Y, num_classes=len(set(Y)))

# Split dataset
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

# Define LSTM model
model = Sequential([
    LSTM(64, return_sequences=True, activation='relu', input_shape=(X.shape[1], X.shape[2])),
    Dropout(0.2),
    LSTM(128, return_sequences=True, activation='relu'),
    Dropout(0.3),
    LSTM(64, activation='relu'),
    Dense(64, activation='relu'),
    Dense(5, activation='softmax')  # Output layer (one neuron per gesture)
])

# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X_train, Y_train, epochs=50, batch_size=32, validation_data=(X_val, Y_val))

# Save model
model.save("lstm_sign_language_model.h5")
print("LSTM model training complete!")


Epoch 1/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 136ms/step - accuracy: 0.3797 - loss: 1.5479 - val_accuracy: 0.3500 - val_loss: 1.1658
Epoch 2/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - accuracy: 0.5212 - loss: 1.0332 - val_accuracy: 0.6500 - val_loss: 0.8745
Epoch 3/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.6850 - loss: 0.8403 - val_accuracy: 0.7500 - val_loss: 0.9801
Epoch 4/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - accuracy: 0.6236 - loss: 0.9845 - val_accuracy: 0.6333 - val_loss: 0.9341
Epoch 5/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 58ms/step - accuracy: 0.7528 - loss: 0.6043 - val_accuracy: 0.6167 - val_loss: 0.5703
Epoch 6/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.7226 - loss: 0.5951 - val_accuracy: 0.5000 - val_loss: 1.1645
Epoch 7/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━



LSTM model training complete!


In [9]:
model.evaluate(X_val, Y_val)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - accuracy: 0.9667 - loss: 0.1450 


[0.20942051708698273, 0.949999988079071]

In [14]:
y = model.predict(X_val[:1])
print(actions[np.argmax(y)])
print(actions[np.argmax(Y_val[0])])


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step
Please
Please


In [33]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model
from collections import deque

# Load trained LSTM model
model = load_model("lstm_sign_language_model.h5")

# Define gestures
gestures = ["Hello", "Yes", "No", "Please", "ThankYou"]

# Initialize MediaPipe
mp_holistic = mp.solutions.holistic
cap = cv2.VideoCapture(0)

# Store the last 30 frames
sequence = deque(maxlen=30)

def extract_landmarks(image):
    """Extract pose and hand keypoints from an image"""
    with mp_holistic.Holistic(static_image_mode=False, min_detection_confidence=0.5) as holistic:
        img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = holistic.process(img_rgb)

        # Extract keypoints
        pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]) if results.pose_landmarks else np.zeros((33, 3))
        left_hand = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]) if results.left_hand_landmarks else np.zeros((21, 3))
        right_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]) if results.right_hand_landmarks else np.zeros((21, 3))

        return np.concatenate([pose.flatten(), left_hand.flatten(), right_hand.flatten()])

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        break

    # Extract keypoints
    keypoints = extract_landmarks(frame)
    sequence.append(keypoints)

    if len(sequence) == 30:  # Make prediction only if we have 30 frames
        input_sequence = np.expand_dims(np.array(sequence), axis=0)  # Reshape for model input
        prediction = model.predict(input_sequence)
        predicted_label = np.argmax(prediction)

        # Display prediction
        cv2.putText(frame, gestures[predicted_label], (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)

    cv2.imshow("Sign Language Recognition", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


ModuleNotFoundError: No module named 'cv2'

In [3]:
import tensorflow as tf




In [6]:
import tensorflow as tf
import tensorflowjs as tfjs

# Load trained model
model = tf.keras.models.load_model("lstm_sign_language_model.h5")

# Convert and save the model for TensorFlow.js
tfjs.converters.save_keras_model(model, "tfjs_model")

print("✅ Model converted to TensorFlow.js format!")




NotFoundError: c:\Users\vithustennysan\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow_decision_forests\tensorflow\ops\inference\inference.so not found