In [1]:
# -------BUTTONS SETUP CODE FOR START GUI FUNC----------

def setup_buttons():
    frame = tk.Frame(root, bg="#f0f0f0")
    frame.pack(pady=15)

    # First row: white background with black text
    btn_params_white = dict(
    font=('Arial', 14),
    bg='white',
    fg='black',
    activebackground='white',
    activeforeground='black',
    relief='raised',
    padx=10, pady=5
    )

    tk.Button(frame, text="🗣️ Speak", command=speak_text, **btn_params_white).grid(row=0, column=0, padx=10)
    tk.Button(frame, text="🧹 Clear", command=clear_text, **btn_params_white).grid(row=0, column=1, padx=10)
    tk.Button(frame, text="🎧 Listen", command=open_listen, **btn_params_white).grid(row=0, column=2, padx=10)
    tk.Button(frame, text="🎙️ Voice Settings", command=select_exact_voice_and_start_gui, **btn_params_white).grid(row=0, column=3, padx=10)

    # Second row: black with white text
    switch = tk.Frame(root, bg="#f0f0f0")
    switch.pack(pady=15)

    btn_params_black = dict(
        font=('Arial', 14),
        bg='black',
        fg='white',
        activebackground='black',
        activeforeground='white',
        relief='raised',
        padx=8, pady=5
    )

    tk.Button(switch, text="🔤 Alphabets", command=load_alphabet_model, **btn_params_black).grid(row=0, column=0, padx=8, pady=5)
    tk.Button(switch, text="🔢 Numbers", command=load_number_model, **btn_params_black).grid(row=0, column=1, padx=8, pady=5)
    tk.Button(switch, text="📝 Words", command=load_word_model, **btn_params_black).grid(row=0, column=2, padx=8, pady=5)
    tk.Button(switch, text="✋✋ Both Hands", command=load_both_hand_model, **btn_params_black).grid(row=0, column=3, padx=8, pady=5)
    tk.Button(switch, text="⚡ Dynamic", command=load_dynamic_model, **btn_params_black).grid(row=0, column=4,padx=8, pady=5)
        # Logout button: red base with black text
    btn_params_red = dict(
        font=('Arial', 14),
        bg='#e74c3c',              # nice red
        fg='black',
        activebackground='#c0392b',  # darker red on hover
        activeforeground='black',
        relief='raised',
        padx=8, pady=5
    )

    tk.Button(switch, text="🛑 Logout", command=root.destroy, **btn_params_red).grid(row=0, column=5, padx=8, pady=5)



In [2]:
import random
import tkinter as tk
import cv2
import mediapipe as mp

# 🌱 Custom suggestion words list
suggestion_words = [
    "hello", "my", "name", "is", "study", "in", "department", "ok", "am", "help", "good", "night", "morning",
    "pray", "want", "see", "later", "when", "water", "you", "done", "i", "love", "u", "yes", "thanks",
    "sign", "language", "deaf", "project", "fyp", "university", "student", "presentation", "today",
    "introduce", "learn", "communicate", "understand", "gesture", "recognize", "recognition",
    "speech", "text", "voice", "convert", "system", "using", "ai", "model", "deep", "learning", "thank", "team", "group"
]

# ✏️ Simple bigram dictionary
bigram_dict = {
    "hello": ["my", "i", "good"],
    "my": ["name", "university", "project"],
    "name": ["is", "in", "project"],
    "i": ["am", "love", "want"],
    "am": ["student", "learning", "good"],
    "want": ["to", "help", "see"],
    "to": ["learn", "study", "see"],
    "study": ["in", "ai", "project"],
    "in": ["university", "department", "project"],
    "project": ["fyp", "presentation", "team"],
    "sign": ["language", "system", "recognition"],
    "language": ["model", "recognition", "system"],
    "deaf": ["community", "culture", "student"],
    "recognition": ["system", "model", "project"],
    "good": ["morning", "night", "afternoon"],
    "thank": ["you", "team", "group"],
    "thanks": ["team", "you", "group"],
    "team": ["project", "presentation", "group"],
    "presentation": ["today", "team", "group"],
    "learn": ["sign", "language", "ai"],
    "help": ["me", "you", "project"],
    "today": ["presentation", "project", "team"],
    "fyp": ["project", "presentation", "team"],
    "student": ["project", "presentation", "team"],
    "using": ["ai", "model", "system"],
    "ai": ["model", "system", "project"],
    "model": ["recognition", "system", "project"],
    "system": ["recognition", "model", "language"],
    "speech": ["to", "text", "recognition"],
    "text": ["to", "speech", "convert"],
    "voice": ["recognition", "system", "text"],
    "convert": ["speech", "text", "system"],
    "love": ["you", "my", "sign"],
    "you": ["are", "want", "help"],
    "see": ["you", "project", "presentation"],
    "communicate": ["with", "using", "sign"],
    "understand": ["sign", "language", "project"],
    "gesture": ["recognition", "system", "project"],
    "recognize": ["sign", "gesture", "language"],
    "introduce": ["my", "team", "project"]
}


# Globals
current_suggestions = []
suggestion_labels = []
mp_hands = mp.solutions.hands

def create_suggestions_panel(parent):
    panel = tk.Frame(parent, bg='white', padx=15, pady=15, relief='ridge', bd=2)

    heading = tk.Label(
        panel, text="✨ Next Possible Words",
        font=('Helvetica', 16, 'bold'),
        fg='#00796B', bg='white'
    )
    heading.pack(anchor='w', pady=(0, 10))

    global suggestion_labels
    suggestion_labels.clear()
    for i in range(3):
        lbl = tk.Label(
            panel, text=f"{i+1}. (waiting...)",
            font=('Arial', 14), fg='#009688', bg='white',
            anchor='w', justify='left'
        )
        lbl.pack(anchor='w', pady=2)
        suggestion_labels.append(lbl)

    return panel

def pick_new_suggestions():
    global current_suggestions
    last_word = recorded_gestures[-1].lower() if recorded_gestures else ""

    if last_word in bigram_dict:
        pool = bigram_dict[last_word]
        if len(pool) >= 3:
            current_suggestions = random.sample(pool, 3)
        else:
            extra = random.sample([w for w in suggestion_words if w not in pool], 3 - len(pool))
            current_suggestions = pool + extra
    else:
        current_suggestions = random.sample(suggestion_words, 3)

    # update suggestion labels
    for i, word in enumerate(current_suggestions):
        suggestion_labels[i].config(text=f"{i+1}. {word}")

def start_suggestion_system():
    global suggestion_cap
    suggestion_cap = cv2.VideoCapture(0)
    pick_new_suggestions()
    print("Suggestion system started")


In [3]:
# Model loaders
def load_model_safe(path, label_list, model_type):
    global model, labels, current_model_type
    if os.path.exists(path):
        model = load_model(path)
        labels = label_list
        current_model_type = model_type
        
        messagebox.showinfo("Model Loaded", f"{model_type.upper()} model loaded.")
    else:
        messagebox.showerror("Error", f"Model file not found: {path}")

def load_alphabet_model(): load_model_safe('alphabet_model.h5', ['A','B','C','D','dot','E','F','G','H','I','J','K','L','M','N','num','O','P','Q','R','S','space','T','U','V','W','words','X','Y','Z'], "alpha")
def load_number_model(): load_model_safe('number_model.h5', [str(i) for i in range(10)] + ["space", "alphabets", "words", "dot"], "num")
def load_word_model(): load_model_safe('singleHnad.h5', ['alphabets','dot','Hello','I love you','No','num','Ok','space','Thanks','Yes'], "cnn")
def load_both_hand_model():
    global EXPECTED_LANDMARK_SIZE
    EXPECTED_LANDMARK_SIZE = 225 
    load_model_safe('bothHands.h5', ['Am','Done','Good','Help','I','Later','Morning','Night','Ok','Pray','See','Want','Water','When','You'], "mlp")
def load_dynamic_model(): load_model_safe('dynamic.keras', ['hello', 'my', 'name', 'is', 'i', 'study', 'in', 'department', 'am'], "lstm")


In [10]:
# -------MAIN GUI TESTING SCREEN CODE----------



import cv2
import numpy as np
import tkinter as tk
from tkinter import messagebox
from tkinter import ttk
from PIL import Image, ImageTk
# import pyttsx3
import time
import os
from collections import deque
from tensorflow.keras.models import load_model
import math
import mediapipe as mp
from cvzone.HandTrackingModule import HandDetector

# Constants
imgSize = 100
offset = 20
EXPECTED_LANDMARK_SIZE = 1662
sequence = deque(maxlen=40)

# Globals
running = True
auto_speak_mode = None  # will hold StringVar from speaking settings
model = None
labels = []
current_model_type = "cnn"
recorded_gestures = []
cap = None
detector = None
holistic = None
caption_label = None
suggestion_selection_active = False
last_finger_count_time = 0
finger_count_cooldown = 0.2  # seconds to hold



# GUI elements
root = None
canvas = None
text_box = None
recorded_text = None
auto_speak_var = None  # global toggle for auto speak


# TTS
# engine = None  # will initialize when user selects voice

 # Button colors: black buttons
btn_color = "#000000"
btn_active = "#333333"

    # Button style
btn_style = {
        'font': ('Segoe UI', 14),
        'width': 22,
        'height': 2,
        'bg': btn_color,
        'fg': 'white',
        'activebackground': btn_active,
        'activeforeground': 'white',
        'bd': 0,
        'relief': 'flat',
        'cursor': 'hand2'
}


# Gesture logic
def record_gesture(gesture):
    if gesture == "space":
        recorded_gestures.append(" ")
    elif gesture == "dot":
        speak_text()
    elif gesture == "num":
        load_number_model()
    elif gesture == "alphabets":
        load_alphabet_model()
    elif gesture == "words":
        load_word_model()
    else:
        recorded_gestures.append(gesture)
        
    update_recorded_text()
  
def speak_text():
    text = recorded_text.cget("text")
    if text:
        engine.say(text)
        engine.runAndWait()

def clear_text():
    recorded_gestures.clear()
    update_recorded_text()

def update_recorded_text():
    recorded_text.config(text=" ".join(recorded_gestures))
    pick_new_suggestions()
    
    # Auto speak here after text updated
    if auto_speak_var and auto_speak_var.get():
        if auto_speak_mode and auto_speak_mode.get() == "full":
            text = recorded_text.cget("text")
            if text:
                engine.say(text)
                engine.runAndWait()
        else:
            last = recorded_gestures[-1] if recorded_gestures else ""
            if last:
                engine.say(last)
                engine.runAndWait()


def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

# Prediction handlers
def predict_cnn(img):
    global detector
    if detector is None:
        detector = HandDetector(maxHands=1)
        
    hands, img = detector.findHands((img), draw=True)  # Enable drawing for visibility
    prediction = ""
    bbox = None

    if hands:
        hand = hands[0]
        x, y, w, h = hand['bbox']
        bbox = (x, y, w, h)

        img_white = np.ones((imgSize, imgSize, 3), np.uint8) * 255
        # Safe cropping with clamped coordinates
        y1 = max(0, y - offset)
        y2 = min(y + h + offset, img.shape[0])
        x1 = max(0, x - offset)
        x2 = min(x + w + offset, img.shape[1])
        img_crop = img[y1:y2, x1:x2]

        if img_crop.size == 0:
            return ""

        

        aspect_ratio = h / w
        if aspect_ratio > 1:
            k = imgSize / h
            w_cal = math.ceil(k * w)
            img_resize = cv2.resize(img_crop, (w_cal, imgSize))
            w_gap = math.ceil((imgSize - w_cal) / 2)
            img_white[:, w_gap:w_gap + w_cal] = img_resize
        else:
            k = imgSize / w
            h_cal = math.ceil(k * h)
            img_resize = cv2.resize(img_crop, (imgSize, h_cal))
            h_gap = math.ceil((imgSize - h_cal) / 2)
            img_white[h_gap:h_gap + h_cal, :] = img_resize

        img_white = np.expand_dims(img_white / 255.0, axis=0)
        predictions = model.predict(img_white)
        predicted_class = np.argmax(predictions)
        prediction = labels[predicted_class]

    return prediction


def predict_mlp(img):
    """Predict using MLP (Words)"""
    global holistic
    if holistic is None:
        holistic = mp.solutions.holistic.Holistic(static_image_mode=False, model_complexity=1, enable_segmentation=False)

    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = holistic.process(img_rgb)
    
    landmarks = []
    if results.pose_landmarks:
        for landmark in results.pose_landmarks.landmark:
            landmarks.extend([landmark.x, landmark.y, landmark.z])
    if results.left_hand_landmarks:
        for landmark in results.left_hand_landmarks.landmark:
            landmarks.extend([landmark.x, landmark.y, landmark.z])
    if results.right_hand_landmarks:
        for landmark in results.right_hand_landmarks.landmark:
            landmarks.extend([landmark.x, landmark.y, landmark.z])
            
    prediction = ""
    
    if results.left_hand_landmarks or results.right_hand_landmarks:
        if len(landmarks) > 0:
            landmarks = np.array(landmarks)
            if len(landmarks) < EXPECTED_LANDMARK_SIZE:
                landmarks = np.pad(landmarks, (0, EXPECTED_LANDMARK_SIZE - len(landmarks)))
            elif len(landmarks) > EXPECTED_LANDMARK_SIZE:
                landmarks = landmarks[:EXPECTED_LANDMARK_SIZE]

            predictions = model.predict(landmarks.reshape(1, -1))
            predicted_class = np.argmax(predictions)
            prediction = labels[predicted_class]
    return prediction


def predict_lstm(img):
    global holistic, sequence
    if holistic is None:
        holistic = mp.solutions.holistic.Holistic()
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = holistic.process(img_rgb)
    keypoints = extract_keypoints(results)
    sequence.append(keypoints)
    if len(sequence) == 40:
        prediction = model.predict(np.expand_dims(sequence, axis=0))
        return labels[np.argmax(prediction)]
    return ""

# Main camera loop
def update_frame():
    global cap,running
    if not hasattr(update_frame, 'last_finger_count'):
        update_frame.last_finger_count = None
        update_frame.last_finger_count_time = time.time()
    success, img = cap.read()
    if not running:
        return
    if not success:
        print("Camera failed.")
        return

    gesture = ""
    landmarks_drawn = False  # Flag to control drawing

    if current_model_type in ["cnn", "num",'alpha']:
       gesture = predict_cnn(img)

    elif current_model_type == "mlp":
        gesture = predict_mlp(img)

    elif current_model_type in ["lstm"]:
        if holistic is None:
            initialize_holistic = mp.solutions.holistic.Holistic()
        else:
            initialize_holistic = holistic

        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        results = initialize_holistic.process(img_rgb)

        # Draw landmarks
        mp_drawing = mp.solutions.drawing_utils
        mp_drawing.draw_landmarks(img, results.pose_landmarks, mp.solutions.holistic.POSE_CONNECTIONS)
        mp_drawing.draw_landmarks(img, results.left_hand_landmarks, mp.solutions.holistic.HAND_CONNECTIONS)
        mp_drawing.draw_landmarks(img, results.right_hand_landmarks, mp.solutions.holistic.HAND_CONNECTIONS)
        mp_drawing.draw_landmarks(img, results.face_landmarks, mp.solutions.holistic.FACEMESH_TESSELATION)
        landmarks_drawn = True

        if current_model_type == "lstm":
            keypoints = extract_keypoints(results)
            sequence.append(keypoints)
            if len(sequence) == 40:
                prediction = model.predict(np.expand_dims(sequence, axis=0))
                gesture = labels[np.argmax(prediction)]

    if gesture:
        text_box.delete(0, tk.END)
        text_box.insert(0, gesture)

        if hasattr(update_frame, "last_gesture") and gesture == update_frame.last_gesture:
            if time.time() - update_frame.last_time >= 2:
                record_gesture(gesture)
                update_frame.last_time = time.time()
        else:
            update_frame.last_gesture = gesture
            update_frame.last_time = time.time()

    # Draw gesture text on the webcam frame
    if gesture:
        cv2.putText(img, f"Prediction: {gesture}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (50, 255, 50), 2)

    # Finger count selection
    if current_model_type in ["cnn", "mlp"]:
       global detector,last_suggestion_time,suggestion_selection_active
       if detector is None:
          detector = HandDetector(maxHands=1)
       hands, _ = detector.findHands(img, draw=False)
       if hands:
          hand = hands[0]
          fingers = detector.fingersUp(hand)
          count = fingers.count(1)
          now = time.time()
          if count in [1,2,3]:
            # check if same count is held for cooldown time
            if hasattr(update_frame, 'last_finger_count') and update_frame.last_finger_count == count:
                if time.time() - update_frame.last_finger_count_time >= finger_count_cooldown:
                   if not suggestion_selection_active:
                      idx = count - 1
                      if idx < len(current_suggestions):
                         word = current_suggestions[idx]
                         recorded_gestures.append(word)
                         update_recorded_text()
                         print(f"Selected: {word}")
                         pick_new_suggestions()
                         suggestion_selection_active = True
            else:
               # new finger count detected, reset timer
               update_frame.last_finger_count = count
               update_frame.last_finger_count_time = time.time()
          else:
             suggestion_selection_active = False
             update_frame.last_finger_count = None


    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(img)
    imgtk = ImageTk.PhotoImage(image=img)
    canvas.create_image(0, 0, anchor=tk.NW, image=imgtk)
    canvas.imgtk = imgtk
    
    root.after(10, update_frame)

       

# GUI setup
def start_gui():
    global root, cap, canvas, text_box, recorded_text
    running = True
    root = tk.Tk()
    root.title("ASL Recognition - All Models")
    root.attributes('-fullscreen', True)

    root.bind('<Escape>', lambda event: root.attributes('-fullscreen', False))

    # Create main frame horizontally
    main_frame = tk.Frame(root, bg="#f0f0f0")
    main_frame.pack(pady=10, expand=True)

    # Add some stretch space on the left to center the canvas
    left_spacer = tk.Frame(main_frame, width=250, bg="#f0f0f0")
    left_spacer.pack(side=tk.LEFT, fill=tk.Y)

    # Canvas in the center
    canvas = tk.Canvas(main_frame, width=640, height=400, bg='black')
    canvas.pack(side=tk.LEFT)

    # Suggestions panel on the right
    suggestions_panel = create_suggestions_panel(main_frame)
    suggestions_panel.pack(side=tk.LEFT, padx=20)
    pick_new_suggestions()
    
    text_frame = tk.Frame(root, bg="#f0f0f0")
    text_frame.pack(pady=10)

    text_box = tk.Entry(text_frame, font=('Arial', 24), width=10)
    text_box.pack(side=tk.LEFT, padx=(0,10))

    recorded_text = tk.Label(root, text="", font=('Arial', 14), relief="solid", width=20, height=2, wraplength=300)
    recorded_text.pack(pady=10)
    

    setup_buttons()

    # global suggestion_label
    # suggestion_label = tk.Label(root, text="", font=('Arial', 14), fg="blue")
    # suggestion_label.pack(pady=10)
    # pick_new_suggestions()

    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        messagebox.showerror("Error", "Cannot access webcam.")
        return

    update_frame()
    root.mainloop()
    try:
        if cap:
            cap.release()
    except:
        pass

# Initial model selector
import tkinter as tk

def model_selector():
    sel_root = tk.Tk()
    sel_root.title("🤟 Choose ASL Model")
    sel_root.geometry("500x520")
    sel_root.configure(bg="#ffffff")  # pure white background

    # Heading label
    label = tk.Label(
        sel_root,
        text="Select Initial Model",
        font=('Segoe UI', 20, 'bold'),
        bg="#ffffff",
        fg="#000000"
    )
    label.pack(pady=(40, 25))

   

    # Buttons with emojis
    tk.Button(sel_root, text="🔤 Alphabets", command=lambda: [load_alphabet_model(), sel_root.destroy(), start_gui()], **btn_style).pack(pady=8)
    tk.Button(sel_root, text="🔢 Numbers", command=lambda: [load_number_model(), sel_root.destroy(), start_gui()], **btn_style).pack(pady=8)
    tk.Button(sel_root, text="📝 Words", command=lambda: [load_word_model(), sel_root.destroy(), start_gui()], **btn_style).pack(pady=8)
    tk.Button(sel_root, text="✋✋ Both Hands", command=lambda: [load_both_hand_model(), sel_root.destroy(), start_gui()], **btn_style).pack(pady=8)
    tk.Button(sel_root, text="⚡ Dynamic", command=lambda: [load_dynamic_model(), sel_root.destroy(), start_gui()], **btn_style).pack(pady=8)

    sel_root.mainloop()

if __name__ == "__main__":
    model_selector()





[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84

In [11]:
import tkinter as tk
import threading
import speech_recognition as sr

def start_listen_gui():
    root = tk.Tk()
    root.title("🎤 Voice Recognition")
    root.geometry("500x300")
    root.resizable(False, False)
    root.configure(bg='#f0f4f8')  # soft background

    frame = tk.Frame(root, bg='#f0f4f8')
    frame.pack(expand=True)

    # Stylish title (static, no blink)
    title = tk.Label(frame, text="🎤 Listening... Say 'exit' to stop",
                     font=('Segoe UI', 18, 'bold'), bg='#f0f4f8', fg='#333')
    title.pack(pady=(20,10))

    # Text display with bold font
    text_display = tk.Label(frame, text="📝 Waiting for speech...",
                            font=('Segoe UI', 14, 'bold'), wraplength=450,
                            bg='white', fg='#444', width=40, height=5,
                            bd=2, relief="groove", justify="center")
    text_display.pack(pady=15, padx=20)

    stop_flag = threading.Event()

    def safe_update_text(new_text):
        if text_display.winfo_exists():
            text_display.config(text=new_text)

    def listen_loop():
        recognizer = sr.Recognizer()
        mic = sr.Microphone()
        with mic as source:
            recognizer.adjust_for_ambient_noise(source)
            while not stop_flag.is_set():
                try:
                    audio = recognizer.listen(source, timeout=10, phrase_time_limit=5)
                    text = recognizer.recognize_google(audio)

                    root.after(0, lambda: safe_update_text(f"📝 {text}"))

                    if text.lower() in ["quit", "exit", "stop"]:
                        stop_flag.set()
                        root.after(0, lambda: safe_update_text("👋 Returning to home..."))
                        def go_home():
                            try:
                               root.destroy()
                            except: pass
                            model_selector()  # go back to home screen
                        root.after(1000, go_home)
                        break


                except sr.WaitTimeoutError:
                    root.after(0, lambda: safe_update_text("⏸️ No speech detected."))
                    break
                except sr.UnknownValueError:
                    root.after(0, lambda: safe_update_text("🤔 Couldn't understand audio."))
                except sr.RequestError:
                    root.after(0, lambda: safe_update_text("❌ Check your internet connection."))
                    break

    threading.Thread(target=listen_loop, daemon=True).start()
    root.mainloop()



In [12]:
# -------VOICE SETTING GUI AND LOGIC----------

import tkinter as tk
from tkinter import ttk
import pyttsx3

engine = pyttsx3.init()
default_rate = engine.getProperty('rate')

def blend_colors(color1, color2, factor):
    def hex_to_rgb(hex):
        hex = hex.lstrip('#')
        return tuple(int(hex[i:i+2], 16) for i in (0, 2, 4))
    def rgb_to_hex(rgb):
        return '#{:02x}{:02x}{:02x}'.format(*rgb)
    rgb1 = hex_to_rgb(color1)
    rgb2 = hex_to_rgb(color2)
    blended = tuple(int(a + (b - a) * factor) for a, b in zip(rgb1, rgb2))
    return rgb_to_hex(blended)

def select_exact_voice_and_start_gui():
    global engine, cap, root
    try:
        if cap:
            cap.release()
            cap = None
    except: pass
    try:
        if root:
            root.destroy()
            root = None
    except: pass

    voices = engine.getProperty('voices')
    voice_names = [v.name for v in voices]

    sel_win = tk.Tk()
    sel_win.title("🎙️ Select Voice & Settings")
    sel_win.geometry("440x420")
    sel_win.configure(bg="#ffffff")

    # --- Title ---
    tk.Label(sel_win, text="Choose Voice to Speak", font=('Segoe UI', 18, 'bold'),
             bg='#ffffff').pack(pady=(20, 12))

    # --- Voice selection combobox ---
    voice_var = tk.StringVar(value=voice_names[0])
    style = ttk.Style(sel_win)
    style.theme_use('default')
    style.configure("TCombobox", font=('Segoe UI', 14))
    ttk.Combobox(sel_win, textvariable=voice_var, state="readonly",
                 values=voice_names, width=30).pack(pady=5)

    # --- Auto speak toggle (better visibility & centered) ---
    auto_speak_var_local = tk.BooleanVar(value=True)

    toggle_frame = tk.Frame(sel_win, bg='#ffffff')
    toggle_frame.pack(pady=5)

    auto_speak_chk = tk.Checkbutton(
        toggle_frame,
        text="🔊 Enable Auto Speak",
        variable=auto_speak_var_local,
        onvalue=True, offvalue=False,
        font=('Segoe UI', 13, 'bold'),
        bg='#ffffff',
        activebackground='#ffffff',
        relief='solid',
        bd=1,
        highlightthickness=1,
        padx=12,
        pady=6
    )
    auto_speak_chk.pack()


    # --- Auto speak mode (last gesture / full text) ---
    auto_speak_mode_local = tk.StringVar(value="last")
    tk.Label(sel_win, text="Auto Speak Mode:", font=('Segoe UI', 13, 'bold'),
             bg='#ffffff').pack(pady=(12, 3))
    mode_frame = tk.Frame(sel_win, bg='#ffffff')
    mode_frame.pack(pady=3)
    tk.Radiobutton(mode_frame, text="🗣️ Last Gesture", variable=auto_speak_mode_local,
                   value="last", font=('Segoe UI', 11), bg='#ffffff',
                   activebackground='#ffffff').pack(side=tk.LEFT, padx=10)
    tk.Radiobutton(mode_frame, text="📢 Full Text", variable=auto_speak_mode_local,
                   value="full", font=('Segoe UI', 11), bg='#ffffff',
                   activebackground='#ffffff').pack(side=tk.LEFT, padx=10)

    # --- Speech speed slider ---
    tk.Label(sel_win, text="Speech Speed:", font=('Segoe UI', 13, 'bold'),
             bg='#ffffff').pack(pady=(18, 3))
    speed_var = tk.IntVar(value=default_rate)
    speed_display = tk.Label(sel_win, text=f"{default_rate} WPM",
                             font=('Segoe UI', 12), bg='#ffffff', fg='#00b894')
    speed_display.pack(pady=2)

    color_stops = [
        (100, '#00b894'),   # teal
        (120, '#74b9ff'),   # sky blue
        (160, '#a29bfe'),   # lilac
        (180, '#dfe6e9'),   # light gray
        (200, '#636e72'),   # dark gray
        (220, '#000000')    # black
    ]

    def get_gradient_color(speed):
        for i in range(len(color_stops)-1):
            s1, c1 = color_stops[i]
            s2, c2 = color_stops[i+1]
            if s1 <= speed <= s2:
                factor = (speed - s1) / (s2 - s1)
                return blend_colors(c1, c2, factor)
        return color_stops[-1][1]

    def on_speed_change(val):
        v = int(float(val))
        speed_display.config(text=f"{v} WPM")
        new_color = get_gradient_color(v)
        style.configure("Custom.Horizontal.TScale",
                        background=new_color, troughcolor=new_color)
        speed_display.config(fg=new_color)

    style.configure("Custom.Horizontal.TScale", thickness=10,
                    background='#00b894', troughcolor='#00b894')
    ttk.Scale(sel_win, from_=100, to=240, orient='horizontal',
              variable=speed_var, command=on_speed_change,
              length=260, style="Custom.Horizontal.TScale").pack(pady=5)

    # --- Continue button ---
    def start_with_selected_voice():
        global auto_speak_var, auto_speak_mode
        selected = voice_var.get()
        for v in voices:
            if selected == v.name:
                engine.setProperty('voice', v.id)
                break
        engine.setProperty('rate', speed_var.get())
        auto_speak_var = auto_speak_var_local
        auto_speak_mode = auto_speak_mode_local
        sel_win.destroy()
        model_selector()

    tk.Button(sel_win, text="✅ Continue", command=start_with_selected_voice,
              font=('Segoe UI', 14, 'bold'), bg='#000000', fg='white',
              activebackground='#333333', activeforeground='white',
              bd=0, relief='flat', cursor='hand2', width=18, height=2).pack(pady=20)

    sel_win.mainloop()


In [14]:
# -------FIRST SCREEN WITH ANIMATION----------


import tkinter as tk

def open_speak():
    model_selector()  # define elsewhere

def open_listen():
    start_listen_gui()  # define elsewhere

def main_screen():
    root = tk.Tk()
    root.title("🤟 ASL Recognition System")
    root.geometry("600x500")
    root.resizable(False, False)
    root.configure(bg='#f9f9f9')

    frame = tk.Frame(root, bg='#f9f9f9')
    frame.pack(expand=True)

    label = tk.Label(frame, text="Welcome to ASL Recognition",
                     font=('Segoe UI', 24, 'bold'), bg='#f9f9f9', fg='#333')
    label.pack(pady=30)

    font_family = 'Segoe UI'

    # Start small, remember current sizes
    btn_listen_font_size = 1
    btn_speak_font_size = 1

    btn_listen = tk.Button(frame, text="🎧 Listen", font=(font_family, btn_listen_font_size, 'bold'),
                           bg='#00b894', fg='white',
                           activebackground='#019875', activeforeground='white',
                           width=1, height=1, bd=0, relief='flat', cursor='hand2',
                           command=lambda: [root.destroy(), open_listen()])
    btn_listen.pack(pady=12) 

    btn_speak = tk.Button(frame, text="🤟 Speak (Sign)", font=(font_family, btn_speak_font_size, 'bold'),
                          bg='#0984e3', fg='white',
                          activebackground='#086fc1', activeforeground='white',
                          width=1, height=1, bd=0, relief='flat', cursor='hand2',
                          command=lambda: [root.destroy(), select_exact_voice_and_start_gui()])
    btn_speak.pack(pady=12)

    # Animate: just keep track of current size ourselves
    def animate_button(widget, current_size, target_size, target_width, target_height, step=1):
        if current_size < target_size:
            current_size += step
            widget.config(
                font=(font_family, current_size, 'bold'),
                width=min(target_width, widget.cget('width') + 1),
                height=min(target_height, widget.cget('height') + 1)
            )
            root.after(15, lambda: animate_button(widget, current_size, target_size, target_width, target_height, step))

    root.after(300, lambda: animate_button(btn_listen, btn_listen_font_size, target_size=16, target_width=16, target_height=2))
    root.after(600, lambda: animate_button(btn_speak, btn_speak_font_size, target_size=16, target_width=16, target_height=2))

    root.mainloop()

main_screen()


In [8]:
import pyttsx3
engine = pyttsx3.init()
for voice in engine.getProperty('voices'):
    print(voice.name)


Microsoft Hazel Desktop - English (Great Britain)
Microsoft David Desktop - English (United States)
Microsoft Zira Desktop - English (United States)
