In [4]:
import cv2
import numpy as np
import tensorflow as tf
import mediapipe as mp
from collections import deque

# ===============================
# Load TensorFlow Lite Model + Labels
# ===============================
MODEL_PATH = "isl_model.tflite"
LABELS_PATH = "labels.txt"

interpreter = tf.lite.Interpreter(model_path=MODEL_PATH)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

labels = [line.strip() for line in open(LABELS_PATH) if line.strip()]
print(f"Model loaded successfully with {len(labels)} classes.")

# ===============================
# Initialize MediaPipe Hands
# ===============================
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.6,
    min_tracking_confidence=0.6
)

# ===============================
# Prediction Smoothing + Locking
# ===============================
PREDICTION_WINDOW = 50  # ~2 seconds at 25 fps
pred_queue = deque(maxlen=PREDICTION_WINDOW)

locked_letter = None
lock_frames = 30  # how long we show locked result
lock_counter = 0

# ===============================
# Start Webcam Stream
# ===============================
cap = cv2.VideoCapture(0)
print("Webcam started — Press 'q' to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    h, w, _ = frame.shape
    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = hands.process(img_rgb)

    if results.multi_hand_landmarks:
        all_x, all_y = [], []

        for handLms in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, handLms, mp_hands.HAND_CONNECTIONS)
            for lm in handLms.landmark:
                all_x.append(lm.x)
                all_y.append(lm.y)

        xmin, xmax = int(min(all_x) * w), int(max(all_x) * w)
        ymin, ymax = int(min(all_y) * h), int(max(all_y) * h)

        pad = 30
        xmin, ymin = max(0, xmin - pad), max(0, ymin - pad)
        xmax, ymax = min(w, xmax + pad), min(h, ymax + pad)

        combined_crop = frame[ymin:ymax, xmin:xmax]

        if combined_crop.size != 0:

            # Preprocess
            hand_rgb = cv2.cvtColor(combined_crop, cv2.COLOR_BGR2RGB)
            hand_resized = cv2.resize(hand_rgb, (96, 96))
            input_data = np.expand_dims(hand_resized / 255.0, axis=0).astype(np.float32)

            # Inference
            interpreter.set_tensor(input_details[0]['index'], input_data)
            interpreter.invoke()
            output = interpreter.get_tensor(output_details[0]['index'])[0]

            idx_pred = np.argmax(output)
            confidence = output[idx_pred]
            label = labels[idx_pred].upper()

            # Store prediction
            pred_queue.append((label, confidence))

            # ===============================
            # LOCKED MODE (show final result)
            # ===============================
            if locked_letter:
                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 255, 0), 2)
                cv2.putText(frame, f"Locked: {locked_letter}",
                            (xmin, ymin - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 1.0,
                            (255, 255, 0), 3)

                lock_counter -= 1
                if lock_counter <= 0:
                    locked_letter = None  # unlock again

            else:
                # ===============================
                # TEMPORAL SMOOTHING & LOCKING LOGIC
                # ===============================
                counts = {}

                for l, c in pred_queue:
                    if c > 0.90:  # high confidence only
                        counts[l] = counts.get(l, 0) + 1

                if counts:
                    stable_letter = max(counts, key=counts.get)
                    stability = counts[stable_letter]

                    # Lock if stable for 60% of the window
                    if stability > PREDICTION_WINDOW * 0.6:
                        locked_letter = stable_letter
                        lock_counter = lock_frames

                # Draw live (unlocked) prediction
                color = (0, 255, 0) if confidence > 0.6 else (0, 0, 255)
                text = f"{label} ({confidence*100:.1f}%)"

                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)
                cv2.putText(frame, text, (xmin, ymin - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.9,
                            color, 2)

    else:
        cv2.putText(frame, "No hands detected", (30, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    cv2.imshow("ISL Detection (TFLite + MediaPipe, Stable + Locking)", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
print("Detection stopped.")


Model loaded successfully with 26 classes.
Webcam started — Press 'q' to quit.
Detection stopped.
