In [3]:
import cv2
from cvzone.HandTrackingModule import HandDetector
import numpy as np
import math
from tensorflow.keras.models import load_model
import tkinter as tk
from tkinter import messagebox
from PIL import Image, ImageTk
import pyttsx3  # For text-to-speech
import os
import time  # For tracking time

# Constants
offset = 20
imgSize = 100
labels = []
model = None
cap = None  # Declare it globally so it can be accessed in multiple functions
detector = None  # Declare HandDetector globally

# Text-to-Speech Engine
engine = pyttsx3.init()
rate = engine.getProperty('rate')
engine.setProperty('rate', rate - 60)

recorded_gestures = []

# Load the model and labels
def load_model_safe(model_path, label_list):
    global model, labels
    if os.path.exists(model_path):
        model = load_model(model_path)
        labels = label_list
        start_prediction_screen()
    else:
        messagebox.showerror("Error", f"Model file '{model_path}' not found.")

def load_alphabet_model():
    load_model_safe('asl_alphabet_model.h5', [str(chr(i)) for i in range(65, 91)] + ["space", "num", "words", "dot"])

def load_number_model():
    load_model_safe('asl_number_model.h5', [str(i) for i in range(10)] + ["space", "alphabets", "words", "dot"])

def load_word_model():
    load_model_safe('asl_words_model.h5', ['alphabets','dot','Hello','I love you','No','num','Ok','space','Thanks','Yes'])

def record_gesture(gesture):
    if gesture:
        if gesture == "space":  # Check if the gesture is "space"
            recorded_gestures.append(" ")  # Append a space character
        elif gesture == "num":  # Check if the gesture is "num"
            load_number_model()  # Switch to the number model
            messagebox.showinfo("Model Switched", "Switched to Numbers Model")  # Show info message
            return  # Exit the function to avoid appending "num" to recorded_gestures
        elif gesture == "alphabets":  # Check if the gesture is "alphabets"
            load_alphabet_model()  # Switch to the alphabet model
            messagebox.showinfo("Model Switched", "Switched to Alphabets Model")  # Show info message
            return  # Exit the function to avoid appending "alphabets" to recorded_gestures
        elif gesture == "words":  # Check if the gesture is "words"
            load_word_model()  # Switch to the word model
            messagebox.showinfo("Model Switched", "Switched to Words Model")  # Show info message
            return  # Exit the function to avoid appending "words" to recorded_gestures
        elif gesture == "dot":  # Check if the gesture is "dot"
            speak_text()  # Perform the same action as the "Speak" button
            return  # Exit the function to avoid appending "dot" to recorded_gestures
        else:
            recorded_gestures.append(gesture)  # Otherwise, append the gesture name
        update_recorded_text()
        messagebox.showinfo("Gesture Recognized", f"Recognized Gesture: {gesture}")
    else:
        messagebox.showwarning("No Gesture", "No gesture detected.")

def speak_text():
    text = recorded_text.cget("text")
    if text:
        engine.say(text)
        engine.runAndWait()
    else:
        messagebox.showwarning("No Text", "No recorded gestures to speak.")

def clear_text():
    if recorded_gestures:
        recorded_gestures.clear()
        update_recorded_text()
        messagebox.showinfo("Cleared", "Recorded gestures have been cleared.")
    else:
        messagebox.showwarning("No Text", "No recorded gestures to clear.")

# Prediction Screen
def start_prediction_screen():
    global root, canvas, cap, detector, text_box, recorded_text

    selection_root.destroy()  # Close the selection screen

    cap = cv2.VideoCapture(0)
    detector = HandDetector(maxHands=1)

    root = tk.Tk()
    root.title("ASL Prediction")
    root.geometry("800x700")

    # Canvas for webcam feed
    canvas = tk.Canvas(root, width=640, height=400)
    canvas.pack()

    # Text box for recognized gesture
    text_box = tk.Entry(root, font=('Arial', 24), width=10)
    text_box.pack(pady=10)

    # Recorded gestures label
    recorded_label = tk.Label(root, text="Recorded Gestures:", font=('Arial', 14))
    recorded_label.pack(pady=10)

    recorded_text = tk.Label(root, text="", font=('Arial', 14), width=20, height=2, relief="solid", wraplength=300)
    recorded_text.pack(pady=10)

    # Button frame for gesture controls
    button_frame = tk.Frame(root)
    button_frame.pack(pady=10)

    # Removed "Record Gesture" button
    tk.Button(button_frame, text="Speak", font=('Arial', 14), command=speak_text, bg="lightgreen").grid(row=0, column=0, padx=10)
    tk.Button(button_frame, text="Clear", font=('Arial', 14), command=clear_text, bg="red").grid(row=0, column=1, padx=10)

    # Buttons for switching models
    switch_frame = tk.Frame(root)
    switch_frame.pack(pady=10)

    tk.Button(switch_frame, text="Alphabets", font=('Arial', 14), command=load_alphabet_model, bg="lightblue").grid(row=0, column=0, padx=10)
    tk.Button(switch_frame, text="Numbers", font=('Arial', 14), command=load_number_model, bg="lightgreen").grid(row=0, column=1, padx=10)
    tk.Button(switch_frame, text="Words", font=('Arial', 14), command=load_word_model, bg="lightyellow").grid(row=0, column=2, padx=10)

    update_frame()  # Start updating frames
    root.mainloop()

    cap.release()
    cv2.destroyAllWindows()

# Function to update the text box with recognized gesture
def update_text_box(gesture):
    text_box.delete(0, tk.END)
    if gesture == "space":  # Check if the gesture is "space"
        text_box.insert(0, " ")  # Insert a space character
    else:
        text_box.insert(0, gesture)  # Otherwise, insert the gesture name

# Function to update recorded gestures display
def update_recorded_text():
    recorded_text.config(text="".join(recorded_gestures))

# Function to process each frame
def update_frame():
    global cap, canvas, detector

    success, img = cap.read()
    if not success:
        print("Failed to capture frame.")
        return

    imgOutput = img.copy()
    hands, img = detector.findHands(img)

    if hands:
        hand = hands[0]
        x, y, w, h = hand['bbox']
        imgWhite = np.ones((imgSize, imgSize, 3), np.uint8) * 255
        imgCrop = img[max(0, y - offset):min(y + h + offset, img.shape[0]),
                      max(0, x - offset):min(x + w + offset, img.shape[1])]

        try:
            aspectRatio = h / w
            if aspectRatio > 1:
                k = imgSize / h
                wCal = math.ceil(k * w)
                imgResize = cv2.resize(imgCrop, (wCal, imgSize))
                wGap = math.ceil((imgSize - wCal) / 2)
                imgWhite[:, wGap:wGap + wCal] = imgResize
            else:
                k = imgSize / w
                hCal = math.ceil(k * h)
                imgResize = cv2.resize(imgCrop, (imgSize, hCal))
                hGap = math.ceil((imgSize - hCal) / 2)
                imgWhite[hGap:hGap + hCal, :] = imgResize

            imgWhite = imgWhite / 255.0
            imgWhite = np.expand_dims(imgWhite, axis=0)

            predictions = model.predict(imgWhite)
            classIndex = np.argmax(predictions)
            gestureName = labels[classIndex]

            cv2.putText(imgOutput, gestureName, (x, y - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.rectangle(imgOutput, (x, y), (x + w, y + h), (0, 255, 0), 2)

            update_text_box(gestureName)

            # Check if the hand is still for 2 seconds
            if hasattr(update_frame, 'last_gesture_time') and hasattr(update_frame, 'last_gesture'):
                if gestureName == update_frame.last_gesture:
                    if time.time() - update_frame.last_gesture_time >= 2:  # 2 seconds
                        record_gesture(gestureName)
                        update_frame.last_gesture_time = time.time()  # Reset timer
                else:
                    update_frame.last_gesture = gestureName
                    update_frame.last_gesture_time = time.time()
            else:
                update_frame.last_gesture = gestureName
                update_frame.last_gesture_time = time.time()

        except Exception as e:
            print("Error processing hand:", e)

    imgOutput = cv2.cvtColor(imgOutput, cv2.COLOR_BGR2RGB)
    imgOutput = Image.fromarray(imgOutput)
    imgOutput = ImageTk.PhotoImage(imgOutput)

    canvas.create_image(0, 0, image=imgOutput, anchor=tk.NW)
    canvas.image = imgOutput

    root.after(50, update_frame)  # 20 FPS

# Selection screen
selection_root = tk.Tk()
selection_root.title("Select ASL Model")
selection_root.geometry("400x300")

tk.Label(selection_root, text="Select ASL Model", font=('Arial', 24)).pack(pady=20)
tk.Button(selection_root, text="Alphabets", font=('Arial', 18), command=load_alphabet_model).pack(pady=10)
tk.Button(selection_root, text="Numbers", font=('Arial', 18), command=load_number_model).pack(pady=10)
tk.Button(selection_root, text="Words", font=('Arial', 18), command=load_word_model).pack(pady=10)

selection_root.mainloop()

ModuleNotFoundError: No module named 'mediapipe'

In [2]:
!pip install --upgrade pip
!pip install cvzone


Collecting pip
  Downloading pip-25.1.1-py3-none-any.whl.metadata (3.6 kB)
Downloading pip-25.1.1-py3-none-any.whl (1.8 MB)
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/

ERROR: To modify pip, please run the following command:
C:\Users\hp\anaconda3\python.exe -m pip install --upgrade pip


Collecting cvzone
  Downloading cvzone-1.6.1.tar.gz (25 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: cvzone
  Building wheel for cvzone (setup.py): started
  Building wheel for cvzone (setup.py): finished with status 'done'
  Created wheel for cvzone: filename=cvzone-1.6.1-py3-none-any.whl size=26309 sha256=30ad254a177a163109bad492c6ef9c00296bd2b9b9798d1936527133717eb161
  Stored in directory: c:\users\hp\appdata\local\pip\cache\wheels\5d\21\e8\3147ae88d44e27f06e0175d337a7673c70fb957202cbbe2034
Successfully built cvzone
Installing collected packages: cvzone
Successfully installed cvzone-1.6.1


In [None]:
!pip install mediapipe
