In [1]:
import cv2
import numpy as np
import tensorflow as tf
import os
import time
import collections
import pyttsx3
print(tf.__version__)

2.4.0


In [None]:
# Load the saved model
model = tf.keras.models.load_model('gesture_model.h5')

In [None]:

# Labels for gestures
labels = ['A', 'B', 'C', 'D', 'E', 'F', 'Nothing']

# Initialize the video source (0 for internal camera)
cap = cv2.VideoCapture(0)

# Initialize the speech engine
engine = pyttsx3.init()

# Tracking variables
current_prediction = None
prediction_counter = 0
prediction_start_time = 0

# Frame rate for smoothness
fps = 10
frame_duration = 1 / fps

while True:
    # Start time to manage frame rate
    start_time = time.time()

    # Capture frame
    ret, frame = cap.read()
    if not ret:
        print("Error: Couldn't capture frame")
        break

    # Draw a rectangle for ROI
    cv2.rectangle(frame, (50, 50), (400, 400), (0, 0, 255), 5)
    roi = frame[50:400, 50:400]  # Region of interest

    # Resize the ROI to match the model's input shape
    img = cv2.resize(roi, (96, 96))
    img = img / 255.0  # Normalize pixel values

    # Make prediction about the current frame
    prediction = model.predict(img.reshape(1, 96, 96, 3))
    char_index = np.argmax(prediction)
    predicted_char = labels[char_index]
    confidence = round(prediction[0, char_index] * 100, 1)

    # Check if confidence is >98% and gesture is stable
    if confidence > 95:
        if current_prediction == predicted_char:
            # Increment counter if the gesture is the same
            prediction_counter += 1
        else:
            # Reset counter for a new gesture
            current_prediction = predicted_char
            prediction_counter = 1
            prediction_start_time = time.time()
    else:
        # Reset tracking if confidence is low
        current_prediction = None
        prediction_counter = 0

    # Check if the gesture is stable for 1 seconds
    if prediction_counter >= fps * 1:  # Assuming 10 FPS, 
        # Trigger speech output for the stable gesture
        engine.say(predicted_char)
        engine.runAndWait()

        # Prevent repeat speech until gesture changes
        prediction_counter = 0
        current_prediction = None

    # Display the predicted gesture and confidence
    font = cv2.FONT_HERSHEY_TRIPLEX
    fontScale = 1
    color = (0, 255, 255)
    thickness = 2
    
    if confidence >=95:
        msg = f"{predicted_char}, Conf: {confidence}%"
        cv2.putText(frame, msg, (80, 80), font, fontScale, color, thickness)

    # Show the frame
    cv2.imshow('frame', frame)

    # Control the frame rate
    elapsed_time = time.time() - start_time
    if elapsed_time < frame_duration:
        time.sleep(frame_duration - elapsed_time)

    # Close the camera when 'q' is pressed
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

# Release the camera and close all windows
cap.release()
cv2.destroyAllWindows()


In [None]:
import matplotlib.pyplot as plt
# Visualize Results
# Confidence Trend Graph
plt.figure(figsize=(12, 6))
plt.plot(timestamps, confidences, marker='o', label="Confidence")
plt.xlabel("Time (s)")
plt.ylabel("Confidence (%)")
plt.title("Confidence Trend Over Time")
plt.legend()
plt.grid()
plt.show()

# Histogram of Predictions
plt.figure(figsize=(12, 6))
unique_preds, counts = np.unique(predictions, return_counts=True)
plt.bar(unique_preds, counts, color='skyblue')
plt.xlabel("Gesture")
plt.ylabel("Frequency")
plt.title("Frequency of Predicted Gestures")
plt.show()

In [None]:
gesture_indices = [labels.index(pred) for pred in predictions]

plt.figure(figsize=(12, 6))
plt.scatter(timestamps, gesture_indices, c=gesture_indices, cmap="viridis", s=50, label="Predicted Gesture")
plt.yticks(range(len(labels)), labels)
plt.xlabel("Time (s)")
plt.ylabel("Gesture")
plt.title("Predicted Gestures Over Time")
plt.colorbar(label="Gesture Index")
plt.grid()
plt.show()
