In [1]:
import cv2
import os
import numpy as np
from keras.models import load_model



In [2]:
# Load the pre-trained CNN model
model = load_model('C:/Users/User/Desktop/projects/Courses/Prodigy InfoTech/Task 4 (Hand Gesture Recognition)/CNN model_final.h5')
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 150, 150, 15)      390       
                                                                 
 max_pooling2d (MaxPooling2  (None, 75, 75, 15)        0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 75, 75, 15)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 75, 75, 32)        4352      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 37, 37, 32)        0         
 g2D)                                                            
                                                                 
 dropout_1 (Dropout)         (None, 37, 37, 32)        0

In [5]:
# Map labels to gestures
gesture_mapping = {
    0: "01_palm", 1: "02_l", 2: "03_fist", 3: "04_fist_moved",
    4: "05_thumb", 5: "06_index", 6: "07_ok", 7: "08_palm_moved",
    8: "09_c", 9: "10_down"
}

# Camera Initialization
cap = cv2.VideoCapture(0)  # 0 --> Default camera

# Background subtractor using KNN to capture the gesture witin the frame
bg_subtractor = cv2.createBackgroundSubtractorKNN(history = 500, dist2Threshold = 400.0, detectShadows = False)

while True:
    # Frame-by-frame capture
    ret, frame = cap.read()

    # Convert frame to the HSV color space
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

    # Skin color in HSV range
    lower_skin = np.array([0, 20, 70], dtype = np.uint8)
    upper_skin = np.array([20, 255, 255], dtype = np.uint8)

    # Threshold the HSV image to capture the skin color
    mask_skin = cv2.inRange(hsv, lower_skin, upper_skin)

    # Applying background subtraction
    fg_mask = bg_subtractor.apply(frame)

    # Combining the skin mask with the background subtractor mask
    mask_combined = cv2.bitwise_and(mask_skin, mask_skin, mask = fg_mask)

    # Morphological operations to reduce noise
    kernel = np.ones((5, 5), np.uint8)
    mask_combined = cv2.morphologyEx(mask_combined, cv2.MORPH_OPEN, kernel)
    mask_combined = cv2.morphologyEx(mask_combined, cv2.MORPH_CLOSE, kernel)

    # Apply combined mask to the original frame
    segmented_hand = cv2.bitwise_and(frame, frame, mask = mask_combined)

    # Convert the segmented frame to grayscale
    gray = cv2.cvtColor(segmented_hand, cv2.COLOR_BGR2GRAY)

    # Resize the segmented frame to match the model's input size
    resized_frame = cv2.resize(gray, (150, 150))

    # Reshape the frame to match the input shape of the model
    input_data_arr = np.array(resized_frame)
    input_data = input_data_arr.reshape((1, 150, 150, 1))  # Ensure single channel

    # Model predictions
    prediction = model.predict(input_data)
    predicted_label = np.argmax(prediction)

    print("Raw Prediction:", prediction)

    # Map to the gesture
    predicted_gesture = gesture_mapping[predicted_label]

    # Display frame with the predicted gesture
    cv2.putText(frame, f"Predicted Gesture: {predicted_gesture}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow('Hand Gesture Recognition', frame)
    print("Predicted Probabilities:", prediction)

    # Break the loop when 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release camera and close all windows
cap.release()
cv2.destroyAllWindows()


Raw Prediction: [[2.5907708e-27 5.1994396e-32 3.1044401e-19 4.2543512e-12 6.2478360e-34
  1.6026411e-15 5.2044777e-18 1.0000000e+00 1.7662865e-09 3.1932918e-12]]
Predicted Probabilities: [[2.5907708e-27 5.1994396e-32 3.1044401e-19 4.2543512e-12 6.2478360e-34
  1.6026411e-15 5.2044777e-18 1.0000000e+00 1.7662865e-09 3.1932918e-12]]
Raw Prediction: [[1.2566440e-27 1.8718698e-30 1.6394438e-17 1.5443741e-10 3.1484236e-34
  8.0878772e-15 1.1931053e-15 1.0000000e+00 6.4770078e-09 4.8945740e-11]]
Predicted Probabilities: [[1.2566440e-27 1.8718698e-30 1.6394438e-17 1.5443741e-10 3.1484236e-34
  8.0878772e-15 1.1931053e-15 1.0000000e+00 6.4770078e-09 4.8945740e-11]]
Raw Prediction: [[1.5713032e-28 1.2977758e-35 7.1247706e-22 6.0934309e-18 7.5697112e-37
  1.6981258e-16 3.4479636e-18 1.0000000e+00 4.6951900e-09 3.7983712e-14]]
Predicted Probabilities: [[1.5713032e-28 1.2977758e-35 7.1247706e-22 6.0934309e-18 7.5697112e-37
  1.6981258e-16 3.4479636e-18 1.0000000e+00 4.6951900e-09 3.7983712e-14]]
R