In [5]:
import cv2
import numpy as np
import tkinter as tk
from tkinter import messagebox
from PIL import Image, ImageTk
import pyttsx3
import time
import os
from collections import deque
from tensorflow.keras.models import load_model
import math
import mediapipe as mp
from cvzone.HandTrackingModule import HandDetector

# Constants
imgSize = 100
offset = 20
EXPECTED_LANDMARK_SIZE = 1662
sequence = deque(maxlen=40)

# Globals
model = None
labels = []
current_model_type = "cnn"
recorded_gestures = []
cap = None
detector = None
holistic = None

# GUI elements
root = None
canvas = None
text_box = None
recorded_text = None

# TTS
engine = pyttsx3.init()
engine.setProperty('rate', engine.getProperty('rate') - 60)

# Model loaders
def load_model_safe(path, label_list, model_type):
    global model, labels, current_model_type
    if os.path.exists(path):
        model = load_model(path)
        labels = label_list
        current_model_type = model_type
        
        messagebox.showinfo("Model Loaded", f"{model_type.upper()} model loaded.")
    else:
        messagebox.showerror("Error", f"Model file not found: {path}")

def load_alphabet_model(): load_model_safe('alphabet_model.h5', [chr(i) for i in range(65, 91)] + ["space", "num", "words", "dot"], "cnn")
def load_number_model(): load_model_safe('number_model.h5', [str(i) for i in range(10)] + ["space", "alphabets", "words", "dot"], "cnn")
def load_word_model(): load_model_safe('singleHand.h5', ['alphabets','dot','Hello','I love you','No','num','Ok','space','Thanks','Yes'], "cnn")
def load_both_hand_model():
    global EXPECTED_LANDMARK_SIZE
    EXPECTED_LANDMARK_SIZE = 225 
    load_model_safe('bothHands.h5', ['Am','Done','Good','Help','I','Later','Morning','Night','Ok','Pray','See','Want','Water','When','You'], "mlp")
def load_dynamic_model(): load_model_safe('dynamic.keras', ['hello', 'my', 'name', 'is', 'i', 'study', 'in', 'department', 'am'], "lstm")

# Gesture logic
def record_gesture(gesture):
    if gesture == "space":
        recorded_gestures.append(" ")
    elif gesture == "dot":
        speak_text()
    elif gesture == "num":
        load_number_model()
    elif gesture == "alphabets":
        load_alphabet_model()
    elif gesture == "words":
        load_word_model()
    else:
        recorded_gestures.append(gesture)
    update_recorded_text()

def speak_text():
    text = recorded_text.cget("text")
    if text:
        engine.say(text)
        engine.runAndWait()

def clear_text():
    recorded_gestures.clear()
    update_recorded_text()

def update_recorded_text():
    recorded_text.config(text=" ".join(recorded_gestures))

def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

# Prediction handlers
def predict_cnn(img):
    global detector
    if detector is None:
        detector = HandDetector(maxHands=1)
    hands, img = detector.findHands(img, draw=False)
    if hands:
        hand = hands[0]
        x, y, w, h = hand['bbox']
        imgCrop = img[y - offset:y + h + offset, x - offset:x + w + offset]
        imgWhite = np.ones((imgSize, imgSize, 3), np.uint8) * 255

        if imgCrop.size == 0:
            return ""

        aspectRatio = h / w
        if aspectRatio > 1:
            k = imgSize / h
            wCal = math.ceil(k * w)
            imgResize = cv2.resize(imgCrop, (wCal, imgSize))
            wGap = math.ceil((imgSize - wCal) / 2)
            imgWhite[:, wGap:wGap + wCal] = imgResize
        else:
            k = imgSize / w
            hCal = math.ceil(k * h)
            imgResize = cv2.resize(imgCrop, (imgSize, hCal))
            hGap = math.ceil((imgSize - hCal) / 2)
            imgWhite[hGap:hGap + hCal, :] = imgResize

        imgWhite = imgWhite / 255.0
        imgWhite = np.expand_dims(imgWhite, axis=0)
        predictions = model.predict(imgWhite)
        return labels[np.argmax(predictions)]
    return ""

def predict_mlp(img):
    """Predict using MLP (Words)"""
    global holistic
    if holistic is None:
        holistic = mp.solutions.holistic.Holistic(static_image_mode=False, model_complexity=1, enable_segmentation=False)

    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = holistic.process(img_rgb)
    
    landmarks = []
    if results.pose_landmarks:
        for landmark in results.pose_landmarks.landmark:
            landmarks.extend([landmark.x, landmark.y, landmark.z])
    if results.left_hand_landmarks:
        for landmark in results.left_hand_landmarks.landmark:
            landmarks.extend([landmark.x, landmark.y, landmark.z])
    if results.right_hand_landmarks:
        for landmark in results.right_hand_landmarks.landmark:
            landmarks.extend([landmark.x, landmark.y, landmark.z])
    
    prediction = ""
    if len(landmarks) > 0:
        landmarks = np.array(landmarks)
        if len(landmarks) < EXPECTED_LANDMARK_SIZE:
            landmarks = np.pad(landmarks, (0, EXPECTED_LANDMARK_SIZE - len(landmarks)))
        elif len(landmarks) > EXPECTED_LANDMARK_SIZE:
            landmarks = landmarks[:EXPECTED_LANDMARK_SIZE]
        
        predictions = model.predict(landmarks.reshape(1, -1))
        predicted_class = np.argmax(predictions)
        prediction = labels[predicted_class]
    
    return prediction


def predict_lstm(img):
    global holistic, sequence
    if holistic is None:
        holistic = mp.solutions.holistic.Holistic()
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = holistic.process(img_rgb)
    keypoints = extract_keypoints(results)
    sequence.append(keypoints)
    if len(sequence) == 40:
        prediction = model.predict(np.expand_dims(sequence, axis=0))
        return labels[np.argmax(prediction)]
    return ""

# Main camera loop
def update_frame():
    global cap
    success, img = cap.read()
    if not success:
        print("Camera failed.")
        return

    gesture = ""
    landmarks_drawn = False  # Flag to control drawing

    if current_model_type == "cnn":
        gesture = predict_cnn(img)

    elif current_model_type == "mlp":
        gesture = predict_mlp(img)

    elif current_model_type in ["lstm"]:
        if holistic is None:
            initialize_holistic = mp.solutions.holistic.Holistic()
        else:
            initialize_holistic = holistic

        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        results = initialize_holistic.process(img_rgb)

        # Draw landmarks
        mp_drawing = mp.solutions.drawing_utils
        mp_drawing.draw_landmarks(img, results.pose_landmarks, mp.solutions.holistic.POSE_CONNECTIONS)
        mp_drawing.draw_landmarks(img, results.left_hand_landmarks, mp.solutions.holistic.HAND_CONNECTIONS)
        mp_drawing.draw_landmarks(img, results.right_hand_landmarks, mp.solutions.holistic.HAND_CONNECTIONS)
        mp_drawing.draw_landmarks(img, results.face_landmarks, mp.solutions.holistic.FACEMESH_TESSELATION)
        landmarks_drawn = True

        if current_model_type == "lstm":
            keypoints = extract_keypoints(results)
            sequence.append(keypoints)
            if len(sequence) == 40:
                prediction = model.predict(np.expand_dims(sequence, axis=0))
                gesture = labels[np.argmax(prediction)]

    if gesture:
        text_box.delete(0, tk.END)
        text_box.insert(0, gesture)

        if hasattr(update_frame, "last_gesture") and gesture == update_frame.last_gesture:
            if time.time() - update_frame.last_time >= 2:
                record_gesture(gesture)
                update_frame.last_time = time.time()
        else:
            update_frame.last_gesture = gesture
            update_frame.last_time = time.time()

    # Draw gesture text on the webcam frame
    if gesture:
        cv2.putText(img, f"Prediction: {gesture}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (50, 255, 50), 2)

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(img)
    imgtk = ImageTk.PhotoImage(image=img)
    canvas.create_image(0, 0, anchor=tk.NW, image=imgtk)
    canvas.imgtk = imgtk

    root.after(10, update_frame)

       

# GUI setup
def start_gui():
    global root, cap, canvas, text_box, recorded_text
    root = tk.Tk()
    root.title("ASL Recognition - All Models")
    root.geometry("900x700")

    canvas = tk.Canvas(root, width=640, height=400)
    canvas.pack()

    text_box = tk.Entry(root, font=('Arial', 24), width=20)
    text_box.pack(pady=10)

    recorded_text = tk.Label(root, text="", font=('Arial', 14), relief="solid", width=30, height=2, wraplength=300)
    recorded_text.pack(pady=10)

    frame = tk.Frame(root)
    frame.pack(pady=10)
    tk.Button(frame, text="Speak", command=speak_text, bg="lightgreen", font=('Arial', 14)).grid(row=0, column=0, padx=5)
    tk.Button(frame, text="Clear", command=clear_text, bg="red", font=('Arial', 14)).grid(row=0, column=1, padx=5)

    switch = tk.Frame(root)
    switch.pack(pady=10)
    tk.Button(switch, text="Alphabets", command=load_alphabet_model).grid(row=0, column=0, padx=5)
    tk.Button(switch, text="Numbers", command=load_number_model).grid(row=0, column=1, padx=5)
    tk.Button(switch, text="Words", command=load_word_model).grid(row=0, column=2, padx=5)
    tk.Button(switch, text="Both Hands", command=load_both_hand_model).grid(row=0, column=3, padx=5)
    tk.Button(switch, text="Dynamic", command=load_dynamic_model).grid(row=0, column=4, padx=5)

    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        messagebox.showerror("Error", "Cannot access webcam.")
        return

    update_frame()
    root.mainloop()
    cap.release()

# Initial model selector
def model_selector():
    sel_root = tk.Tk()
    sel_root.title("Choose Model")
    sel_root.geometry("300x350")

    tk.Label(sel_root, text="Select Initial Model", font=('Arial', 16)).pack(pady=20)
    tk.Button(sel_root, text="Alphabets", command=lambda: [load_alphabet_model(), sel_root.destroy(), start_gui()]).pack(pady=5)
    tk.Button(sel_root, text="Numbers", command=lambda: [load_number_model(), sel_root.destroy(), start_gui()]).pack(pady=5)
    tk.Button(sel_root, text="Words", command=lambda: [load_word_model(), sel_root.destroy(), start_gui()]).pack(pady=5)
    tk.Button(sel_root, text="Both Hands", command=lambda: [load_both_hand_model(), sel_root.destroy(), start_gui()]).pack(pady=5)
    tk.Button(sel_root, text="Dynamic", command=lambda: [load_dynamic_model(), sel_root.destroy(), start_gui()]).pack(pady=5)

    sel_root.mainloop()

model_selector()




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36

In [6]:
import cv2
import numpy as np
import tkinter as tk
from tkinter import messagebox
from PIL import Image, ImageTk
import pyttsx3
import time
import os
from collections import deque
from tensorflow.keras.models import load_model
import math
import mediapipe as mp
from cvzone.HandTrackingModule import HandDetector

from cvzone.HandTrackingModule import HandDetector
detector = None

# Constants
imgSize = 100
offset = 20
EXPECTED_LANDMARK_SIZE = 1662
sequence = deque(maxlen=40)

# Globals
model = None
labels = []
current_model_type = "cnn"
recorded_gestures = []
cap = None
detector = HandDetector(maxHands=1)
holistic = None

# GUI elements
root = None
canvas = None
text_box = None
recorded_text = None

# TTS
engine = pyttsx3.init()
engine.setProperty('rate', engine.getProperty('rate') - 60)

# Model loaders
def load_model_safe(path, label_list, model_type):
    global model, labels, current_model_type
    if os.path.exists(path):
        model = load_model(path)
        labels = label_list
        current_model_type = model_type
        
        messagebox.showinfo("Model Loaded", f"{model_type.upper()} model loaded.")
    else:
        messagebox.showerror("Error", f"Model file not found: {path}")

def load_alphabet_model(): 
   
    load_model_safe('alphabet_model.h5', [chr(i) for i in range(65, 91)] + ["space", "num", "words", "dot"], "cnn")
def load_number_model(): 
    
    load_model_safe('number_model.h5', [str(i) for i in range(10)] + ["space", "alphabets", "words", "dot"], "cnn")
def load_word_model(): 
    
    load_model_safe('singleHand.h5', ['alphabets','dot','Hello','I love you','No','num','Ok','space','Thanks','Yes'], "cnn")
def load_both_hand_model():
    global EXPECTED_LANDMARK_SIZE
    EXPECTED_LANDMARK_SIZE = 225 
    load_model_safe('bothHands.h5', ['Am','Done','Good','Help','I','Later','Morning','Night','Ok','Pray','See','Want','Water','When','You'], "mlp")
def load_dynamic_model(): load_model_safe('dynamic.keras', ['hello', 'my', 'name', 'is', 'i', 'study', 'in', 'department', 'am'], "lstm")

# Gesture logic
def record_gesture(gesture):
    if gesture == "space":
        recorded_gestures.append(" ")
    elif gesture == "dot":
        speak_text()
    elif gesture == "num":
        load_number_model()
    elif gesture == "alphabets":
        load_alphabet_model()
    elif gesture == "words":
        load_word_model()
    else:
        recorded_gestures.append(gesture)
    update_recorded_text()

def speak_text():
    text = recorded_text.cget("text")
    if text:
        engine.say(text)
        engine.runAndWait()

def clear_text():
    recorded_gestures.clear()
    update_recorded_text()

def update_recorded_text():
    recorded_text.config(text=" ".join(recorded_gestures))

def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

# Prediction handlers
def predict_cnn(img):
    """Predict using CNN (Alphabets/Numbers/singlehandwords) with bounding box"""
    hands, img = detector.findHands(img)
    prediction = ""
    bbox = None

    if hands:
        hand = hands[0]
        x, y, w, h = hand['bbox']
        bbox = (x, y, w, h)
        img_white = np.ones((imgSize, imgSize, 3), np.uint8) * 255
        img_crop = img[max(0, y - offset):min(y + h + offset, img.shape[0]),
                       max(0, x - offset):min(x + w + offset, img.shape[1])]
        try:
            aspect_ratio = h / w
            if aspect_ratio > 1:
                k = imgSize / h
                w_cal = math.ceil(k * w)
                img_resize = cv2.resize(img_crop, (w_cal, imgSize))
                w_gap = math.ceil((imgSize - w_cal) / 2)
                img_white[:, w_gap:w_gap + w_cal] = img_resize
            else:
                k = imgSize / w
                h_cal = math.ceil(k * h)
                img_resize = cv2.resize(img_crop, (imgSize, h_cal))
                h_gap = math.ceil((imgSize - h_cal) / 2)
                img_white[h_gap:h_gap + h_cal, :] = img_resize

            img_white = np.expand_dims(img_white / 255.0, axis=0)
            predictions = model.predict(img_white)
            predicted_class = np.argmax(predictions)
            prediction = labels[predicted_class]
        except Exception as e:
            print("Prediction error:", e)

    return prediction, bbox

def predict_mlp(img):
    """Predict using MLP (Words)"""
    global holistic
    if holistic is None:
        holistic = mp.solutions.holistic.Holistic(static_image_mode=False, model_complexity=1, enable_segmentation=False)

    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = holistic.process(img_rgb)
    
    landmarks = []
    if results.pose_landmarks:
        for landmark in results.pose_landmarks.landmark:
            landmarks.extend([landmark.x, landmark.y, landmark.z])
    if results.left_hand_landmarks:
        for landmark in results.left_hand_landmarks.landmark:
            landmarks.extend([landmark.x, landmark.y, landmark.z])
    if results.right_hand_landmarks:
        for landmark in results.right_hand_landmarks.landmark:
            landmarks.extend([landmark.x, landmark.y, landmark.z])
    
    prediction = ""
    if len(landmarks) > 0:
        landmarks = np.array(landmarks)
        if len(landmarks) < EXPECTED_LANDMARK_SIZE:
            landmarks = np.pad(landmarks, (0, EXPECTED_LANDMARK_SIZE - len(landmarks)))
        elif len(landmarks) > EXPECTED_LANDMARK_SIZE:
            landmarks = landmarks[:EXPECTED_LANDMARK_SIZE]
        
        predictions = model.predict(landmarks.reshape(1, -1))
        predicted_class = np.argmax(predictions)
        prediction = labels[predicted_class]
    
    return prediction


def predict_lstm(img):
    global holistic, sequence
    if holistic is None:
        holistic = mp.solutions.holistic.Holistic()
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = holistic.process(img_rgb)
    keypoints = extract_keypoints(results)
    sequence.append(keypoints)
    if len(sequence) == 40:
        prediction = model.predict(np.expand_dims(sequence, axis=0))
        return labels[np.argmax(prediction)]
    return ""

# Main camera loop
def update_frame():
    global cap
    success, img = cap.read()
    if not success:
        print("Camera failed.")
        return

    gesture = ""
    landmarks_drawn = False  # Flag to control drawing

    img_output = img.copy()
    prediction = ""
    mode_text = ""
    
    if current_model_type == "cnn":
        prediction, bbox = predict_cnn(img)
        
        # Draw bounding box and prediction if hand detected
        if bbox:
            x, y, w, h = bbox
            cv2.rectangle(img_output, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cv2.putText(img_output, prediction, (x, y - 10), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    elif current_model_type == "mlp":
        gesture = predict_mlp(img)

    elif current_model_type in ["lstm"]:
        if holistic is None:
            initialize_holistic = mp.solutions.holistic.Holistic()
        else:
            initialize_holistic = holistic

        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        results = initialize_holistic.process(img_rgb)

        # Draw landmarks
        mp_drawing = mp.solutions.drawing_utils
        mp_drawing.draw_landmarks(img, results.pose_landmarks, mp.solutions.holistic.POSE_CONNECTIONS)
        mp_drawing.draw_landmarks(img, results.left_hand_landmarks, mp.solutions.holistic.HAND_CONNECTIONS)
        mp_drawing.draw_landmarks(img, results.right_hand_landmarks, mp.solutions.holistic.HAND_CONNECTIONS)
        mp_drawing.draw_landmarks(img, results.face_landmarks, mp.solutions.holistic.FACEMESH_TESSELATION)
        landmarks_drawn = True

        if current_model_type == "lstm":
            keypoints = extract_keypoints(results)
            sequence.append(keypoints)
            if len(sequence) == 40:
                prediction = model.predict(np.expand_dims(sequence, axis=0))
                gesture = labels[np.argmax(prediction)]

    if gesture:
        text_box.delete(0, tk.END)
        text_box.insert(0, gesture)

        if hasattr(update_frame, "last_gesture") and gesture == update_frame.last_gesture:
            if time.time() - update_frame.last_time >= 2:
                record_gesture(gesture)
                update_frame.last_time = time.time()
        else:
            update_frame.last_gesture = gesture
            update_frame.last_time = time.time()

    # Draw gesture text on the webcam frame
    if gesture:
        cv2.putText(img, f"Prediction: {gesture}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (50, 255, 50), 2)

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(img)
    imgtk = ImageTk.PhotoImage(image=img)
    canvas.create_image(0, 0, anchor=tk.NW, image=imgtk)
    canvas.imgtk = imgtk

    root.after(10, update_frame)

       

# GUI setup
def start_gui():
    global root, cap, canvas, text_box, recorded_text
    root = tk.Tk()
    root.title("ASL Recognition - All Models")
    root.geometry("900x700")

    canvas = tk.Canvas(root, width=640, height=400)
    canvas.pack()

    text_box = tk.Entry(root, font=('Arial', 24), width=20)
    text_box.pack(pady=10)

    recorded_text = tk.Label(root, text="", font=('Arial', 14), relief="solid", width=30, height=2, wraplength=300)
    recorded_text.pack(pady=10)

    frame = tk.Frame(root)
    frame.pack(pady=10)
    tk.Button(frame, text="Speak", command=speak_text, bg="lightgreen", font=('Arial', 14)).grid(row=0, column=0, padx=5)
    tk.Button(frame, text="Clear", command=clear_text, bg="red", font=('Arial', 14)).grid(row=0, column=1, padx=5)

    switch = tk.Frame(root)
    switch.pack(pady=10)
    tk.Button(switch, text="Alphabets", command=load_alphabet_model).grid(row=0, column=0, padx=5)
    tk.Button(switch, text="Numbers", command=load_number_model).grid(row=0, column=1, padx=5)
    tk.Button(switch, text="Words", command=load_word_model).grid(row=0, column=2, padx=5)
    tk.Button(switch, text="Both Hands", command=load_both_hand_model).grid(row=0, column=3, padx=5)
    tk.Button(switch, text="Dynamic", command=load_dynamic_model).grid(row=0, column=4, padx=5)

    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        messagebox.showerror("Error", "Cannot access webcam.")
        return

    update_frame()
    root.mainloop()
    cap.release()

# Initial model selector
def model_selector():
    sel_root = tk.Tk()
    sel_root.title("Choose Model")
    sel_root.geometry("300x350")

    tk.Label(sel_root, text="Select Initial Model", font=('Arial', 16)).pack(pady=20)
    tk.Button(sel_root, text="Alphabets", command=lambda: [load_alphabet_model(), sel_root.destroy(), start_gui()]).pack(pady=5)
    tk.Button(sel_root, text="Numbers", command=lambda: [load_number_model(), sel_root.destroy(), start_gui()]).pack(pady=5)
    tk.Button(sel_root, text="Words", command=lambda: [load_word_model(), sel_root.destroy(), start_gui()]).pack(pady=5)
    tk.Button(sel_root, text="Both Hands", command=lambda: [load_both_hand_model(), sel_root.destroy(), start_gui()]).pack(pady=5)
    tk.Button(sel_root, text="Dynamic", command=lambda: [load_dynamic_model(), sel_root.destroy(), start_gui()]).pack(pady=5)

    sel_root.mainloop()

model_selector()




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38