# Hand Gesture Recognition Notebook

This notebook uses MediaPipe to detect hand landmarks from a webcam feed.

In [None]:
%pip install -r requirements.txt

## Model Training

This section loads the dataset created in the `data_generation` notebook, splits it into training and testing sets, and then trains a neural network to classify the gestures. The trained model is saved for later use in real-time prediction.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from gesture_classifier import GestureClassifier
from config.app_config import AppConfig

# Create a default AppConfig instance for training (uses default key_bindings.json)
app_config = AppConfig()

# --- Load Configuration ---
nn_config = app_config.get_neural_network_config()
model_path = nn_config.get('MODEL_PATH')
epochs = nn_config.get('EPOCHS')
batch_size = nn_config.get('BATCH_SIZE')
test_size = nn_config.get('TEST_SIZE')
dataset_path = nn_config.get('DATASET_PATH')

# --- Load and Prepare Dataset ---
try:
    df = pd.read_csv(dataset_path)

    # Convert the 'LANDMARKS' column from string representation of list to actual list
    # This is necessary because pandas reads it as a string
    df['LANDMARKS'] = df['LANDMARKS'].apply(eval)

    X = np.array(df['LANDMARKS'].tolist())
    Y = df['GESTURE_ID']

    # Dynamically determine number of classes based on highest gesture ID
    max_gesture_id = Y.max()
    num_classes = max_gesture_id + 1

    # Split the data into training and validation sets
    X_train, X_val, Y_train, Y_val = train_test_split(
        X, Y, test_size=test_size
    )

    # --- Train the Model ---
    classifier = GestureClassifier(num_classes=num_classes, input_size=X_train.shape[1])
    classifier.train(X_train, Y_train, X_val, Y_val)

except FileNotFoundError:
    print(f"Error: Dataset file not found at '{dataset_path}'.")
    print("Please run the data generation notebook first.")
except Exception as e:
    print(f"An error occurred: {e}")



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/200
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.3902 - loss: 1.7869 - val_accuracy: 0.7170 - val_loss: 1.1017
Epoch 2/200
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.3902 - loss: 1.7869 - val_accuracy: 0.7170 - val_loss: 1.1017
Epoch 2/200
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5972 - loss: 1.0511 - val_accuracy: 0.9599 - val_loss: 0.5147
Epoch 3/200
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5972 - loss: 1.0511 - val_accuracy: 0.9599 - val_loss: 0.5147
Epoch 3/200
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7263 - loss: 0.6738 - val_accuracy: 0.9292 - val_loss: 0.2932
Epoch 4/200
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7263 - loss: 0.6738 - val_accuracy: 0.9292 - val_loss: 0.2932
Epoch 4/200
[1m34/34[0m [32m━━━



Model saved to models/gesture_model.h5

Final Training Accuracy: 1.0000
Final Validation Accuracy: 1.0000

Final Training Accuracy: 1.0000
Final Validation Accuracy: 1.0000


## Main Application Loop

This cell captures video from the webcam, processes each frame to detect hands, and displays the output.

In [None]:
import cv2
import time
import json
from gesture_detector import GestureDetector
from data_preprocessor import DataPreprocessor
from gesture_classifier import GestureClassifier
from keyboard_adapter import KeyboardAdapter
from config.app_config import AppConfig
from IPython.display import clear_output


# Configuration
KEY_BINDINGS_CONFIG = "data/key_bindings_default.json"
MODEL_PATH = "models/gesture_classifier.h5"
# Load configuration
app_config = AppConfig(KEY_BINDINGS_CONFIG)
gesture_config = app_config.get_hand_gesture_config()
max_hands = gesture_config.get('MAX_HANDS')
min_detection_confidence = gesture_config.get('MIN_DETECTION_CONFIDENCE')
detection_rate = gesture_config.get('DETECTION_RATE')
model_path = app_config.get_model_path()


detector = GestureDetector(max_hands, min_detection_confidence)
keyboard_adapter = KeyboardAdapter(app_config)
data_preprocessor = DataPreprocessor()
classifier = GestureClassifier()
classifier.load_model(model_path)

# Camera setup with optimized settings for better performance
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)  
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)  
cap.set(cv2.CAP_PROP_FPS, 30)  
cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)  

last_print_time = time.time()

# Track previous gesture predictions
prev_left_gesture = None
prev_right_gesture = None

key_time = 0.0
try:
    while cap.isOpened():
        success, image = cap.read()
        current_time = time.time()
        if current_time - last_print_time >= detection_rate:

            if not success:
                print("Ignoring empty camera frame.")
                continue

            image = cv2.flip(image, 1)
            processed_image = detector.process_frame(image)
            left_hand, right_hand = detector.get_hand_vectors()
            norm_left_hand = data_preprocessor.process(left_hand)
            norm_right_hand = data_preprocessor.process(right_hand)

            gesture_id_left = classifier.predict(norm_left_hand)
            gesture_id_right = classifier.predict(norm_right_hand)


            right_hand_changed = (gesture_id_right != prev_right_gesture)
            left_hand_changed = (gesture_id_left != prev_left_gesture)

            if right_hand_changed or left_hand_changed:
                clear_output(wait=True)
                
                # Handle right hand gesture changes
                if right_hand_changed and gesture_id_right is not None:
                    keyboard_adapter.handle_gesture_key(prev_right_gesture, gesture_id_right)
                    print(f"Right hand: {gesture_id_right}, {app_config.get_gesture_name(gesture_id_right)}")
                    prev_right_gesture = gesture_id_right

                # Handle left hand gesture changes
                if left_hand_changed and gesture_id_left is not None:
                    keyboard_adapter.handle_gesture_key(prev_left_gesture, gesture_id_left)
                    print(f"Left hand: {gesture_id_left}, {app_config.get_gesture_name(gesture_id_left)}")
                    prev_left_gesture = gesture_id_left

            # Get gesture names
            left_gesture_name = app_config.get_gesture_name(gesture_id_left) if gesture_id_left is not None else ""
            right_gesture_name = app_config.get_gesture_name(gesture_id_right) if gesture_id_right is not None else ""

            # Draw gesture names on the image
            processed_image = detector.draw_gesture_names(processed_image, left_gesture_name, right_gesture_name)

            last_print_time = current_time
            cv2.imshow('Hand Tracking', processed_image)


            if cv2.waitKey(5) & 0xFF == 27:
                break

except KeyboardInterrupt:
    print("Interrupted by user")
finally:
    # Release any remaining pressed keys when exiting
    print("Releasing remaining keys...")
    keyboard_adapter.release_all_keys()

    cv2.destroyAllWindows()
    cv2.waitKey(1)
    cap.release()


Right hand: -1, unknown
Releasing remaining keys...
Releasing remaining keys...


: 