# Right hand

In [1]:
import warnings
import pickle
import cv2
import mediapipe as mp
import numpy as np
import tkinter as tk
import threading
import time
import logging

# Suppress specific warnings
warnings.filterwarnings("ignore", category=UserWarning, message="SymbolDatabase.GetPrototype() is deprecated")

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

# Load the model with error handling
model_path = './model.p'
try:
    with open(model_path, 'rb') as f:
        model_dict = pickle.load(f)
    model = model_dict['model']
    logging.info(f"Model loaded successfully from {model_path}.")
except FileNotFoundError:
    logging.error(f"Model file not found at {model_path}. Please check the path.")
    raise
except Exception as e:
    logging.error(f"Error loading the model: {e}")
    raise

# Initialize the video capture with error handling
cap = cv2.VideoCapture(1)
if not cap.isOpened():
    logging.error("Cannot open camera. Please ensure that the camera is connected and not used by another application.")
    raise IOError("Cannot open camera.")

# Mediapipe Hands configuration for tracking both hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,  # Changed from 1 to 2 to track both hands
    min_detection_confidence=0.7,
    min_tracking_confidence=0.5
)

# Labels dictionary
labels_dict = {
    0: '1', 1: '2', 2: '3', 3: '4', 4: '5', 5: '6', 6: '7', 7: '8', 8: '9', 
    9: 'a', 10: 'b', 11: 'c', 12: 'd', 13: 'e', 14: 'f',
    15: 'g', 16: 'h', 17: 'i', 18: 'j', 19: 'k', 20: 'l',
    21: 'm', 22: 'n', 23: 'o', 24: 'p', 25: 'q', 26: 'r',
    27: 's', 28: 't', 29: 'u', 30: 'v', 31: 'w', 32: 'x',
    33: 'y', 34: 'z'
}

# Create a tkinter window
root = tk.Tk()
root.title("ISL Prediction")

# Create a text field in tkinter
text_field = tk.Text(root, height=2, width=40, font=("Helvetica", 16))
text_field.pack(pady=10)

# Create a clear text button
def clear_text():
    text_field.delete('1.0', tk.END)  # Clear the text field
    logging.info('Text cleared.')

clear_button = tk.Button(root, text="Clear Text", command=clear_text)
clear_button.pack(pady=5)

# Function to update the tkinter text field by appending the new predicted character
def update_text_field(text):
    if text == 'space':
        text_field.insert(tk.END, ' ')  # Append a space
    else:
        text_field.insert(tk.END, text)  # Append new character
    logging.info(f'Word added: {text if text != "space" else "space (represented as space)"}')

# Function to run video capture and ASL prediction in a separate thread
def run():
    global hands, model

    # Variables to store previous predictions and timing for debouncing
    prev_predictions = {}  # key: hand_id, value: (prediction, timestamp)
    fixed_predictions = {}  # key: hand_id, value: prediction
    delay_time = 1.0  # seconds to confirm prediction

    while True:
        ret, frame = cap.read()
        if not ret:
            logging.error("Failed to read from camera. Exiting...")
            break

        # Flip the frame horizontally for a mirror-like effect
        frame = cv2.flip(frame, 1)
        H, W, _ = frame.shape
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(frame_rgb)

        current_time = time.time()

        if results.multi_hand_landmarks:
            for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
                hand_id = idx  # Assign an ID to each hand

                # Draw hand landmarks on the frame
                mp_drawing.draw_landmarks(
                    frame,
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS,
                    mp_drawing_styles.get_default_hand_landmarks_style(),
                    mp_drawing_styles.get_default_hand_connections_style()
                )

                # Extract and normalize landmarks
                landmarks = hand_landmarks.landmark
                data_aux = []

                # Normalize landmarks relative to the wrist (landmark 0)
                wrist = landmarks[0]
                normalized_landmarks = []
                for lm in landmarks:
                    normalized_landmarks.append([
                        lm.x - wrist.x,
                        lm.y - wrist.y,
                        lm.z - wrist.z
                    ])

                # Flatten the normalized landmarks
                for lm in normalized_landmarks:
                    data_aux.extend(lm)

                # Calculate pairwise distances as additional features
                for i in range(len(normalized_landmarks) - 1):
                    for j in range(i + 1, len(normalized_landmarks)):
                        distance = np.linalg.norm(np.array(normalized_landmarks[i]) - np.array(normalized_landmarks[j]))
                        data_aux.append(distance)

                # Convert to numpy array and reshape for prediction
                feature_vector = np.array(data_aux).reshape(1, -1)

                # Make prediction using the model
                try:
                    prediction = model.predict(feature_vector)
                    predicted_label = labels_dict.get(int(prediction[0]), '?')
                except Exception as e:
                    logging.error(f"Prediction error: {e}")
                    predicted_label = '?'

                # Debouncing logic: confirm prediction after it stabilizes for delay_time seconds
                if hand_id in prev_predictions:
                    prev_pred, timestamp = prev_predictions[hand_id]
                    if predicted_label == prev_pred:
                        if (current_time - timestamp) >= delay_time:
                            if fixed_predictions.get(hand_id) != predicted_label:
                                fixed_predictions[hand_id] = predicted_label
                                update_text_field(predicted_label)
                    else:
                        prev_predictions[hand_id] = (predicted_label, current_time)
                else:
                    prev_predictions[hand_id] = (predicted_label, current_time)

                # Get bounding box for the hand
                x_coords = [lm.x for lm in landmarks]
                y_coords = [lm.y for lm in landmarks]
                x_min = max(int(min(x_coords) * W) - 20, 0)
                y_min = max(int(min(y_coords) * H) - 20, 0)
                x_max = min(int(max(x_coords) * W) + 20, W)
                y_max = min(int(max(y_coords) * H) + 20, H)

                # Draw bounding box
                cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)
                
                # Remove or comment out the following line to eliminate text from the bounding box
                # cv2.putText(frame, predicted_label, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX,
                #             1.0, (255, 0, 0), 2, cv2.LINE_AA)
        
        else:
            # Reset fixed predictions if no hands are detected
            fixed_predictions.clear()

        # Display the resulting frame
        cv2.imshow('ISL Prediction', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            logging.info("Exit key pressed. Exiting...")
            break

    # Release resources
    cap.release()
    cv2.destroyAllWindows()

# Function to exit the application and stop both Tkinter and OpenCV
def exit_app():
    logging.info('Exiting application...')
    if cap.isOpened():
        cap.release()  # Release the video capture
    hands.close()  # Close Mediapipe Hands
    cv2.destroyAllWindows()  # Close OpenCV windows
    root.quit()  # Stop the Tkinter main loop
    root.destroy()  # Close the Tkinter window

# Create an "Exit" button
exit_button = tk.Button(root, text="Exit", command=exit_app)
exit_button.pack(pady=5)

# Start the video capture in a separate thread to keep tkinter responsive
thread = threading.Thread(target=run, daemon=True)
thread.start()

# Start the tkinter main loop
root.mainloop()


2024-10-10 11:22:25,739 - Model loaded successfully from ./model.p.
2024-10-10 11:22:46,578 - Prediction error: X has 273 features, but RandomForestClassifier is expecting 84 features as input.
2024-10-10 11:22:46,625 - Prediction error: X has 273 features, but RandomForestClassifier is expecting 84 features as input.
2024-10-10 11:22:46,671 - Prediction error: X has 273 features, but RandomForestClassifier is expecting 84 features as input.
2024-10-10 11:22:47,563 - Prediction error: X has 273 features, but RandomForestClassifier is expecting 84 features as input.
2024-10-10 11:22:47,626 - Prediction error: X has 273 features, but RandomForestClassifier is expecting 84 features as input.
2024-10-10 11:22:47,626 - Word added: ?
2024-10-10 11:22:47,672 - Prediction error: X has 273 features, but RandomForestClassifier is expecting 84 features as input.
2024-10-10 11:22:47,720 - Prediction error: X has 273 features, but RandomForestClassifier is expecting 84 features as input.
2024-10-10

# If use model with left hand

In [2]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, message="SymbolDatabase.GetPrototype() is deprecated")

import pickle
import cv2
import mediapipe as mp
import numpy as np
import tkinter as tk
import threading
import time
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

# Load the model
model_dict = pickle.load(open('./model.p', 'rb'))
model = model_dict['model']

# Initialize the video capture
cap = cv2.VideoCapture(0)

# Mediapipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=False, min_detection_confidence=0.7, max_num_hands=1)

# Labels dictionary
labels_dict = {
    0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e', 5: 'f',
    6: 'g', 7: 'h', 8: 'i', 9: 'j', 10: 'k', 11: 'l',
    12: 'm', 13: 'n', 14: 'o', 15: 'p', 16: 'q', 17: 'r',
    18: 's', 19: 't', 20: 'u', 21: 'v', 22: 'w', 23: 'x',
    24: 'y', 25: 'z', 26: '0', 27: '1', 28: '2',
    29: '3', 30: '4', 31: '5', 32: '6', 33: '7', 34: '8', 35: '9',
    36: 'I love You', 37: 'yes', 38: 'No', 39: 'Hello', 40: 'Thanks',
    41: 'Sorry', 43: 'space'
}

# Create a tkinter window
root = tk.Tk()
root.title("ASL Prediction")

# Create a text field in tkinter
text_field = tk.Text(root, height=2, width=40, font=("Helvetica", 16))
text_field.pack()

# Create a clear text button
def clear_text():
    text_field.delete('1.0', tk.END)  # Clear the text field
    logging.info('Text cleared.')

clear_button = tk.Button(root, text="Clear Text", command=clear_text)
clear_button.pack()

# Variable to store the previous prediction and time
prev_prediction = None
word_count = 0  # Track how many words have been written

# Variables to track the detected character and delay counter
last_detected_character = None
fixed_character = ""
delayCounter = 0
start_time = time.time()

# Function to update the tkinter text field by appending the new predicted character
def update_text_field(text):
    if text == 'space':
        text_field.insert(tk.END, ' ')  # Append a space
    else:
        text_field.insert(tk.END, text + '')  # Append new character
    logging.info(f'Word added: {text if text != "space" else "space (represented as space)"}')

# Function to run video capture and ASL prediction in a separate thread
def run():
    global last_detected_character, fixed_character, delayCounter, start_time

    while True:
        data_aux = []
        x_ = []
        y_ = []

        ret, frame = cap.read()

        frame = cv2.flip(frame , 1)
        
        if not ret:
            break

        H, W, _ = frame.shape
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(frame_rgb)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    frame,
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS,
                    mp_drawing_styles.get_default_hand_landmarks_style(),
                    mp_drawing_styles.get_default_hand_connections_style()
                )

            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y

                    x_.append(x)
                    y_.append(y)

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x - min(x_))
                    data_aux.append(y - min(y_))

                x1 = int(min(x_) * W) - 10
                y1 = int(min(y_) * H) - 10
                x2 = int(max(x_) * W) - 10
                y2 = int(max(y_) * H) - 10

                # Make prediction using the model
                prediction = model.predict([np.asarray(data_aux)])
                predicted_character = labels_dict[int(prediction[0])]

                # Draw a rectangle and the predicted character on the frame
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
                cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3,
                            cv2.LINE_AA)

                current_time = time.time()

                # Timer logic: Check if the predicted character is the same for more than 1 second
                if predicted_character == last_detected_character:
                    if (current_time - start_time) >= 1.0:  # Class fixed after 1 second
                        fixed_character = predicted_character
                        if delayCounter == 0:  # Add character once after it stabilizes for 1 second
                            update_text_field(fixed_character)
                            delayCounter = 1
                else:
                    # Reset the timer when a new character is detected
                    start_time = current_time
                    last_detected_character = predicted_character
                    delayCounter = 0  # Reset delay counter for a new character

        # Show the video feed with the prediction
        cv2.imshow('frame', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# Function to exit the application and stop both Tkinter and OpenCV
def exit_app():
    global cap
    logging.info('Exiting application...')
    if cap.isOpened():
        cap.release()  # Release the video capture
    cv2.destroyAllWindows()  # Close OpenCV windows
    root.quit()  # Stop the Tkinter main loop
    root.destroy()  # Close the Tkinter window

# Create an "Exit" button
exit_button = tk.Button(root, text="Exit", command=exit_app)
exit_button.pack()

# Start the video capture in a separate thread to keep tkinter responsive
threading.Thread(target=run, daemon=True).start()

# Start the tkinter main loop
root.mainloop()


2024-09-23 23:06:48,122 - Word added: Hello
2024-09-23 23:06:50,007 - Word added: space (represented as space)
2024-09-23 23:06:52,409 - Word added: a
2024-09-23 23:06:54,215 - Word added: space (represented as space)
2024-09-23 23:06:55,813 - Word added: i
2024-09-23 23:06:58,718 - Word added: a
2024-09-23 23:07:06,301 - Word added: Sorry
2024-09-23 23:07:07,959 - Exiting application...
