In [None]:
import cv2
import numpy as np
from keras.models import load_model
from keras.layers import DepthwiseConv2D
import tkinter as tk
from PIL import Image, ImageTk
import time
from cvzone.HandTrackingModule import HandDetector

class CustomDepthwiseConv2D(DepthwiseConv2D):
    def __init__(self, **kwargs):
        kwargs.pop('groups', None)
        super().__init__(**kwargs)

def custom_classifier(model_path):
    model = load_model(model_path, custom_objects={'DepthwiseConv2D': CustomDepthwiseConv2D})
    return model

cap = cv2.VideoCapture(0)
detector = HandDetector(maxHands=1)
classifier = custom_classifier("best_model.keras")

# Set detection parameters
offset = 20
imgSize = 224
labels = ['GOODBYE', 'HELLO', 'I LOVE YOU', 'THANK YOU']
threshold = 0.75
stable_frames_required = 5
stable_count = 0
last_label = None
last_detection_time = time.time()

root = tk.Tk()
root.title("Sign Language Detection")
root.geometry("900x500")

# Left frame for detected text
left_frame = tk.Frame(root, width=300, height=500)
left_frame.pack(side="left", fill="y")

text_display = tk.Text(left_frame, font=("Helvetica", 16), wrap="word", spacing3=10, width=30, height=25)
text_display.pack(pady=20)

# Right frame for camera feed
right_frame = tk.Frame(root, width=600, height=500)
right_frame.pack(side="right", fill="both", expand=True)

camera_label = tk.Label(right_frame)
camera_label.pack()

def update_frame():
    global stable_count, last_label, last_detection_time

    success, img = cap.read()
    if not success:
        return
    
    imgOutput = img.copy()
    hands, img = detector.findHands(img)

    if hands:
        hand = hands[0]
        x, y, w, h = hand['bbox']
        imgWhite = np.ones((imgSize, imgSize, 3), np.uint8) * 255

        y1, y2 = max(0, y - offset), min(img.shape[0], y + h + offset)
        x1, x2 = max(0, x - offset), min(img.shape[1], x + w + offset)
        imgCrop = img[y1:y2, x1:x2]

        if imgCrop.size == 0:
            return

        imgResize = cv2.resize(imgCrop, (imgSize, imgSize))
        imgWhite = imgResize.astype(np.float32) / 255.0
        imgWhite = np.expand_dims(imgWhite, axis=0)

        # Prediction and processing
        prediction = classifier.predict(imgWhite)
        index = np.argmax(prediction)
        confidence = prediction[0][index]

        current_time = time.time()

        if confidence > threshold:
            detected_text = labels[index]
            if detected_text == last_label:
                stable_count += 1
            else:
                stable_count = 1
            last_label = detected_text

            if stable_count >= stable_frames_required:
                text_display.insert(tk.END, f"{detected_text}\n")
                text_display.see("end")
                last_detection_time = current_time
                stable_count = 0 

        else:
            stable_count = 0
            last_label = None

    imgOutput_rgb = cv2.cvtColor(imgOutput, cv2.COLOR_BGR2RGB)
    img_pil = Image.fromarray(imgOutput_rgb)
    img_tk = ImageTk.PhotoImage(image=img_pil)
    
    camera_label.imgtk = img_tk
    camera_label.configure(image=img_tk)
    root.after(1, update_frame)

# Start updating the frame
update_frame()
root.mainloop()

# Release the camera
cap.release()
cv2.destroyAllWindows()


I0000 00:00:1730360564.591844    5753 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1730360564.593747   34533 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 Mesa 24.1.3-2), renderer: Mesa Intel(R) HD Graphics 520 (SKL GT2)
W0000 00:00:1730360564.626347   34527 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1730360564.651466   34530 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42