In [None]:
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
import time

# Load your trained model
model = tf.keras.models.load_model("modelAdditionalLayers4001.keras")

# Initialize MediaPipe
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False,
                       max_num_hands=1,
                       min_detection_confidence=0.5,
                       min_tracking_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

labels = [chr(i) for i in range(ord('A'), ord('Z') + 1)]

def draw_hand_landmarks_on_black(hand_landmarks, image_size=128):
    landmarks = np.array([[lm.x, lm.y] for lm in hand_landmarks.landmark])
    x_vals, y_vals = landmarks[:, 0], landmarks[:, 1]
    min_x, max_x = np.min(x_vals), np.max(x_vals)
    min_y, max_y = np.min(y_vals), np.max(y_vals)

    width = max_x - min_x
    height = max_y - min_y
    scale = 0.7 * image_size / max(width, height)

    landmarks[:, 0] = (landmarks[:, 0] - min_x) * scale
    landmarks[:, 1] = (landmarks[:, 1] - min_y) * scale

    offset_x = (image_size - width * scale) / 2
    offset_y = (image_size - height * scale) / 2
    landmarks[:, 0] += offset_x
    landmarks[:, 1] += offset_y

    black_img = np.zeros((image_size, image_size, 3), dtype=np.uint8)
    landmark_points = landmarks.astype(np.int32)

    for connection in mp_hands.HAND_CONNECTIONS:
        start = tuple(landmark_points[connection[0]])
        end = tuple(landmark_points[connection[1]])
        cv2.line(black_img, start, end, (0, 255, 0), 1)

    for point in landmark_points:
        cv2.circle(black_img, tuple(point), 1, (0, 255, 0), -1)

    return black_img

# Real-time tracking variables
sentence = ""
current_letter = ""
confirmed_letter = ""
stable_count = 0
last_confirmed_time = time.time()
last_frame_time = time.time()
no_hand_start_time = None

# Configurable thresholds
STABILITY_THRESHOLD = 3
LETTER_COOLDOWN = 1.0  # Seconds to allow repeat letter
NO_HAND_TIMEOUT = 2.0  # Seconds to separate words

cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)

    prediction_text = "Waiting for hand..."

    current_time = time.time()

    if results.multi_hand_landmarks:
        no_hand_start_time = None  # Reset no-hand timer

        for hand_landmarks in results.multi_hand_landmarks:
            colored_img = draw_hand_landmarks_on_black(hand_landmarks, image_size=128)
            gray_img = cv2.cvtColor(colored_img, cv2.COLOR_BGR2GRAY)
            normalized_img = gray_img.astype("float32") / 255.0
            input_img = np.expand_dims(normalized_img, axis=(0, -1))

            prediction = model.predict(input_img, verbose=0)
            predicted_index = np.argmax(prediction)
            confidence = prediction[0][predicted_index]

            if confidence > 0.8:
                predicted_letter = labels[predicted_index]

                if predicted_letter == confirmed_letter:
                    stable_count += 1
                else:
                    stable_count = 0
                    confirmed_letter = predicted_letter

                if stable_count > STABILITY_THRESHOLD:
                    # Accept letter only if enough time has passed since last one
                    if (current_letter != confirmed_letter) or (current_time - last_confirmed_time > LETTER_COOLDOWN):
                        sentence += confirmed_letter
                        current_letter = confirmed_letter
                        last_confirmed_time = current_time
                    prediction_text = f"Letter: {confirmed_letter}"
                else:
                    prediction_text = f"Stabilizing: {confirmed_letter}"
            else:
                prediction_text = "Unsure"

            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            cv2.imshow("Model Input", colored_img)
            break  # Use only the first hand
    else:
        # Handle no hand detected (to insert space)
        if no_hand_start_time is None:
            no_hand_start_time = current_time
        elif current_time - no_hand_start_time > NO_HAND_TIMEOUT:
            if not sentence.endswith(" "):
                sentence += " "  # Add space to separate words
            no_hand_start_time = None  # Prevent multiple spaces

    # Show sentence above frame
    cv2.putText(frame, prediction_text, (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    cv2.putText(frame, f"Sentence: {sentence}", (10, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
    cv2.imshow("ASL to Sentence", frame)

    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()
