# This is a sample Jupyter Notebook

Below is an example of a code cell. 
Put your cursor into the cell and press Shift+Enter to execute it and select the next one, or click 'Run Cell' button.

Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

To learn more about Jupyter Notebooks in PyCharm, see [help](https://www.jetbrains.com/help/pycharm/ipython-notebook-support.html).
For an overview of PyCharm, go to Help -> Learn IDE features or refer to [our documentation](https://www.jetbrains.com/help/pycharm/getting-started.html).

In [27]:
!pip install opencv-python numpy tensorflow scikit-learn matplotlib pillow pandas mediapipe ffmpeg-python

Collecting ffmpeg-python
  Using cached ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting future (from ffmpeg-python)
  Downloading future-1.0.0-py3-none-any.whl.metadata (4.0 kB)
Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Downloading future-1.0.0-py3-none-any.whl (491 kB)
Installing collected packages: future, ffmpeg-python

   ---------------------------------------- 0/2 [future]
   ---------------------------------------- 0/2 [future]
   ---------------------------------------- 0/2 [future]
   ---------------------------------------- 2/2 [ffmpeg-python]

Successfully installed ffmpeg-python-0.2.0 future-1.0.0


In [4]:
import cv2
import os
import time
import mediapipe as mp
import csv
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


Data Collection

In [55]:


# -------------------------
# Settings
# -------------------------
CLASSES = ["Call"]   # <-- Add more categories here
SAVE_DIR = "dataset"
VIDEO_COUNT = 15
DURATION = 3
PAUSE_DURATION = 3
FPS = 30
FRAME_WIDTH = 1280
FRAME_HEIGHT = 720
# -------------------------

# Initialize webcam
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, FRAME_WIDTH)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, FRAME_HEIGHT)
cap.set(cv2.CAP_PROP_FPS, FPS)

# Initialize MediaPipe
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)

print("Starting multi-category recording...")

for CLASS_NAME in CLASSES:

    print(f"\n===== START CATEGORY: {CLASS_NAME} =====")

    # Create folders
    class_folder = os.path.join(SAVE_DIR, CLASS_NAME)
    os.makedirs(class_folder, exist_ok=True)
    keypoints_folder = os.path.join(class_folder, "keypoints")
    os.makedirs(keypoints_folder, exist_ok=True)

    for i in range(1, VIDEO_COUNT + 1):

        # CSV file for keypoints
        csv_filename = f"{CLASS_NAME}_{i}_keypoints.csv"
        csv_filepath = os.path.join(keypoints_folder, csv_filename)

        csv_file = open(csv_filepath, "w", newline="")
        csv_writer = csv.writer(csv_file)

        # Header
        header = []
        for hand in ['left', 'right']:
            for j in range(21):
                header += [f"{hand}_x{j}", f"{hand}_y{j}", f"{hand}_z{j}"]
        header.append("label")
        csv_writer.writerow(header)

        # Video Writer
        video_filename = f"{CLASS_NAME}_{i}.mp4"
        video_path = os.path.join(class_folder, video_filename)
        fourcc = cv4 = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(video_path, fourcc, FPS, (FRAME_WIDTH, FRAME_HEIGHT))

        print(f"\nRecording {CLASS_NAME} - Video {i}/{VIDEO_COUNT}")

        start_time = time.time()

        while time.time() - start_time < DURATION:
            ret, frame = cap.read()
            if not ret:
                continue
            frame = cv2.flip(frame, 1)

            # Process with MediaPipe
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            result = hands.process(rgb_frame)

            left_hand_kp = [(0, 0, 0)] * 21
            right_hand_kp = [(0, 0, 0)] * 21

            if result.multi_hand_landmarks and result.multi_handedness:
                for hand_landmarks, handedness in zip(result.multi_hand_landmarks, result.multi_handedness):
                    label = handedness.classification[0].label
                    hand_kp = [(lm.x, lm.y, lm.z) for lm in hand_landmarks.landmark]

                    if label == "Left":
                        left_hand_kp = hand_kp
                    else:
                        right_hand_kp = hand_kp

                    mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Save keypoints
            row = [coord for kp in left_hand_kp + right_hand_kp for coord in kp]
            row.append(CLASS_NAME)
            csv_writer.writerow(row)

            # Overlay info
            sec_left = int(DURATION - (time.time() - start_time) + 1)
            cv2.putText(frame, f"Category: {CLASS_NAME}", (10, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
            cv2.putText(frame, f"Recording Video {i}/{VIDEO_COUNT}", (10, 80),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.putText(frame, f"Time Left: {sec_left}s", (10, 120),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

            # Show & write
            cv2.imshow("Recorder", frame)
            out.write(frame)

            if cv2.waitKey(1) == ord('q'):
                print("Stopped by user.")
                cap.release()
                cv2.destroyAllWindows()
                raise SystemExit()

        out.release()
        csv_file.close()

        print(f"Saved video: {video_path}")
        print(f"Saved keypoints: {csv_filepath}")

        # Pause between videos
        print(f"Waiting {PAUSE_DURATION} seconds...")
        pause_start = time.time()
        while time.time() - pause_start < PAUSE_DURATION:
            ret, frame = cap.read()
            if not ret:
                continue
            frame = cv2.flip(frame, 1)
            remaining = int(PAUSE_DURATION - (time.time() - pause_start))
            cv2.putText(frame, f"Next video in {remaining}s", (10, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
            cv2.imshow("Recorder", frame)
            if cv2.waitKey(1) == ord('q'):
                cap.release()
                cv2.destroyAllWindows()
                raise SystemExit()

cap.release()
cv2.destroyAllWindows()
print("\nAll categories recorded successfully!")


Starting multi-category recording...

===== START CATEGORY: Call =====

Recording Call - Video 1/15
Saved video: dataset\Call\Call_1.mp4
Saved keypoints: dataset\Call\keypoints\Call_1_keypoints.csv
Waiting 3 seconds...

Recording Call - Video 2/15
Saved video: dataset\Call\Call_2.mp4
Saved keypoints: dataset\Call\keypoints\Call_2_keypoints.csv
Waiting 3 seconds...

Recording Call - Video 3/15
Saved video: dataset\Call\Call_3.mp4
Saved keypoints: dataset\Call\keypoints\Call_3_keypoints.csv
Waiting 3 seconds...

Recording Call - Video 4/15
Saved video: dataset\Call\Call_4.mp4
Saved keypoints: dataset\Call\keypoints\Call_4_keypoints.csv
Waiting 3 seconds...

Recording Call - Video 5/15
Saved video: dataset\Call\Call_5.mp4
Saved keypoints: dataset\Call\keypoints\Call_5_keypoints.csv
Waiting 3 seconds...

Recording Call - Video 6/15
Saved video: dataset\Call\Call_6.mp4
Saved keypoints: dataset\Call\keypoints\Call_6_keypoints.csv
Waiting 3 seconds...

Recording Call - Video 7/15
Saved video:

Data Preprocessing

In [56]:
# -------------------------
# Settings
# -------------------------
DATASET_DIR = "dataset"    # folder with category subfolders
SEQUENCE_LENGTH = 30       # number of frames per sample
# -------------------------

def fix_sequence_length(sequence, target_len):
    """Pad or truncate keypoint sequence to a fixed length."""
    if len(sequence) > target_len:
        return sequence[:target_len]
    elif len(sequence) < target_len:
        pad = np.zeros((target_len - len(sequence), sequence.shape[1]))
        return np.vstack([sequence, pad])
    return sequence

X = []
y = []

# Loop through categories
for category in os.listdir(DATASET_DIR):
    kp_dir = os.path.join(DATASET_DIR, category, "keypoints")
    if not os.path.isdir(kp_dir):
        continue

    for csv_file in os.listdir(kp_dir):
        csv_path = os.path.join(kp_dir, csv_file)
        df = pd.read_csv(csv_path)

        # Remove label column
        keypoints = df.iloc[:, :-1].values.astype(np.float32)

        # Fix sequence length
        keypoints = fix_sequence_length(keypoints, SEQUENCE_LENGTH)

        X.append(keypoints)
        y.append(category)

# Convert to NumPy arrays
X = np.array(X)
y = np.array(y)

print("X shape:", X.shape)  # (samples, SEQUENCE_LENGTH, 126)
print("y shape:", y.shape)  # (samples,)

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_onehot = to_categorical(y_encoded)

print("Classes:", label_encoder.classes_)
print("y_onehot shape:", y_onehot.shape)

# Save preprocessed data
np.save("X_keypoints.npy", X)
np.save("y_labels.npy", y_onehot)
print("Preprocessed data saved to X_keypoints.npy and y_labels.npy")

X shape: (160, 30, 126)
y shape: (160,)
Classes: ['Call' 'Dad' 'Eat' 'Go' 'Good' 'Help' 'I' 'Love' 'No' 'Say' 'Stop' 'You']
y_onehot shape: (160, 12)
Preprocessed data saved to X_keypoints.npy and y_labels.npy


Modeling

In [57]:
X = np.load("X_keypoints.npy")      # shape: (samples, SEQUENCE_LENGTH, 126)
y = np.load("y_labels.npy")         # shape: (samples, num_classes)

print("X shape:", X.shape)
print("y shape:", y.shape)

# -------------------------
# Model definition
# -------------------------
SEQUENCE_LENGTH = X.shape[1]
FEATURES = X.shape[2]
NUM_CLASSES = y.shape[1]

model = Sequential()

# 1D Convolutions over keypoints per frame
model.add(Conv1D(64, kernel_size=3, activation='relu', input_shape=(SEQUENCE_LENGTH, FEATURES)))
model.add(Conv1D(128, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.3))

# LSTM for temporal information
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.3))

# Output layer
model.add(Dense(NUM_CLASSES, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# -------------------------
# Callbacks
# -------------------------
checkpoint = ModelCheckpoint("sign_language_model.h5", monitor='val_accuracy', save_best_only=True, verbose=1)
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# -------------------------
# Train-test split
# -------------------------
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# -------------------------
# Train the model
# -------------------------
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=16,
    callbacks=[checkpoint, early_stop]
)

X shape: (160, 30, 126)
y shape: (160, 12)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m9s[0m 1s/step - accuracy: 0.1875 - loss: 2.4381
Epoch 1: val_accuracy improved from None to 0.06250, saving model to sign_language_model.h5




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 46ms/step - accuracy: 0.1406 - loss: 2.4420 - val_accuracy: 0.0625 - val_loss: 2.5001
Epoch 2/50
[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 32ms/step - accuracy: 0.2500 - loss: 2.3899
Epoch 2: val_accuracy improved from 0.06250 to 0.09375, saving model to sign_language_model.h5




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.1719 - loss: 2.3637 - val_accuracy: 0.0938 - val_loss: 2.4730
Epoch 3/50
[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 30ms/step - accuracy: 0.1250 - loss: 2.3452
Epoch 3: val_accuracy improved from 0.09375 to 0.15625, saving model to sign_language_model.h5




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.2188 - loss: 2.2286 - val_accuracy: 0.1562 - val_loss: 2.3785
Epoch 4/50
[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 37ms/step - accuracy: 0.5000 - loss: 2.0234
Epoch 4: val_accuracy did not improve from 0.15625
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.3125 - loss: 2.0609 - val_accuracy: 0.1562 - val_loss: 2.2673
Epoch 5/50
[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 33ms/step - accuracy: 0.1875 - loss: 2.1559
Epoch 5: val_accuracy improved from 0.15625 to 0.34375, saving model to sign_language_model.h5




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.3828 - loss: 1.8688 - val_accuracy: 0.3438 - val_loss: 2.0643
Epoch 6/50
[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 27ms/step - accuracy: 0.3125 - loss: 1.8635
Epoch 6: val_accuracy did not improve from 0.34375
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4531 - loss: 1.6657 - val_accuracy: 0.3438 - val_loss: 1.9708
Epoch 7/50
[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 33ms/step - accuracy: 0.5000 - loss: 1.5112
Epoch 7: val_accuracy did not improve from 0.34375
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4844 - loss: 1.5391 - val_accuracy: 0.2500 - val_loss: 1.9356
Epoch 8/50
[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - accuracy: 0.4375 - loss: 1.3009
Epoch 8: val_accuracy did not improve from 0.34375
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 



[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.5312 - loss: 1.3192 - val_accuracy: 0.4375 - val_loss: 1.7762
Epoch 10/50
[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 29ms/step - accuracy: 0.9375 - loss: 0.6529
Epoch 10: val_accuracy improved from 0.43750 to 0.56250, saving model to sign_language_model.h5




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.6172 - loss: 1.1333 - val_accuracy: 0.5625 - val_loss: 1.5608
Epoch 11/50
[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 34ms/step - accuracy: 0.5000 - loss: 1.4161
Epoch 11: val_accuracy did not improve from 0.56250
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.5938 - loss: 1.1264 - val_accuracy: 0.5000 - val_loss: 1.6915
Epoch 12/50
[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 32ms/step - accuracy: 0.6250 - loss: 1.0853
Epoch 12: val_accuracy did not improve from 0.56250
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.6562 - loss: 1.0418 - val_accuracy: 0.5000 - val_loss: 1.5751
Epoch 13/50
[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 29ms/step - accuracy: 0.5625 - loss: 1.1795
Epoch 13: val_accuracy did not improve from 0.56250
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m



[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.7188 - loss: 0.9215 - val_accuracy: 0.6250 - val_loss: 1.3815
Epoch 18/50
[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 27ms/step - accuracy: 0.7500 - loss: 0.7527
Epoch 18: val_accuracy did not improve from 0.62500
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.7344 - loss: 0.8042 - val_accuracy: 0.5938 - val_loss: 1.5110
Epoch 19/50
[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 31ms/step - accuracy: 0.6250 - loss: 1.2052
Epoch 19: val_accuracy did not improve from 0.62500
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.7500 - loss: 0.7809 - val_accuracy: 0.5312 - val_loss: 1.4221
Epoch 20/50
[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 29ms/step - accuracy: 0.8125 - loss: 0.7767
Epoch 20: val_accuracy did not improve from 0.62500
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m



[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.8594 - loss: 0.4659 - val_accuracy: 0.6562 - val_loss: 1.2930
Epoch 32/50
[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 27ms/step - accuracy: 0.8125 - loss: 0.4079
Epoch 32: val_accuracy did not improve from 0.65625
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.8125 - loss: 0.5200 - val_accuracy: 0.6562 - val_loss: 1.3442


In [58]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model
import os

# -------------------------
# Settings
# -------------------------
MODEL_PATH = "sign_language_model.h5"   # your trained model
DATASET_DIR = "dataset"                 # folder used in preprocessing
SEQUENCE_LENGTH = 30
FRAME_WIDTH = 1280
FRAME_HEIGHT = 720
FPS = 30
# -------------------------

# Load trained model
model = load_model(MODEL_PATH)

# Dynamically get class names from dataset folder
CLASSES = [d for d in os.listdir(DATASET_DIR) if os.path.isdir(os.path.join(DATASET_DIR, d))]
CLASSES.sort()  # ensure consistent order

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)

# Initialize webcam
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, FRAME_WIDTH)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, FRAME_HEIGHT)
cap.set(cv2.CAP_PROP_FPS, FPS)

# Sequence buffer
sequence = []

print("Starting real-time gesture test. Press 'q' to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        continue
    frame = cv2.flip(frame, 1)

    # MediaPipe processing
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb_frame)

    # Extract keypoints
    left_hand_kp = [(0, 0, 0)] * 21
    right_hand_kp = [(0, 0, 0)] * 21

    if result.multi_hand_landmarks and result.multi_handedness:
        for hand_landmarks, handedness in zip(result.multi_hand_landmarks, result.multi_handedness):
            label = handedness.classification[0].label
            hand_kp = [(lm.x, lm.y, lm.z) for lm in hand_landmarks.landmark]
            if label == "Left":
                left_hand_kp = hand_kp
            else:
                right_hand_kp = hand_kp
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    # Flatten keypoints and add to sequence
    keypoints = [coord for kp in left_hand_kp + right_hand_kp for coord in kp]
    sequence.append(keypoints)

    # Keep last SEQUENCE_LENGTH frames
    if len(sequence) > SEQUENCE_LENGTH:
        sequence = sequence[-SEQUENCE_LENGTH:]

    # Make prediction when sequence is full
    if len(sequence) == SEQUENCE_LENGTH:
        input_data = np.expand_dims(sequence, axis=0)  # shape: (1, SEQUENCE_LENGTH, 126)
        prediction = model.predict(input_data, verbose=0)
        class_id = np.argmax(prediction)
        class_name = CLASSES[class_id]
        confidence = prediction[0][class_id]
        cv2.putText(frame, f"{class_name} ({confidence*100:.1f}%)", (10, 40),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Show frame
    cv2.imshow("Real-Time Gesture Recognition", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
hands.close()




Starting real-time gesture test. Press 'q' to quit.


KeyboardInterrupt: 