In [4]:
import numpy as np
import cv2
import mediapipe as mp
from tensorflow.keras.models import load_model
import pickle
import pyttsx3
import threading  # <--- Required for background speech

# 1. Initialize the TTS engine
engine = pyttsx3.init()
engine.setProperty('rate', 150) # Speed of speech
last_spoken = None  # To prevent the same letter from repeating constantly

def speak(text):
  """Function to run in a separate thread"""
  try:
    engine.say(text)
    engine.runAndWait()
  except Exception as e:
    print(f"TTS Error: {e}")

# load trained model
model = load_model("asl_landmark_anni.h5")

# load label encoder
with open("label_encoder.pkl", "rb") as f:
  le = pickle.load(f)

print("model and label encoder loaded!")

mpHands = mp.solutions.hands
hands = mpHands.Hands(
  static_image_mode = False,
  max_num_hands = 1,
  min_detection_confidence = 0.7,
  min_tracking_confidence = 0.7
)

mpDraw = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)

while True:
  success, img = cap.read()

  if not success:
    break

  img = cv2.flip(img, 1)

  imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  results = hands.process(imgRGB)

  if results.multi_hand_landmarks:
    for handLms in results.multi_hand_landmarks:
      mpDraw.draw_landmarks(img, handLms, mpHands.HAND_CONNECTIONS)

      landmarks = []
      for lm in handLms.landmark:
        landmarks.extend([lm.x, lm.y, lm.z])

      landmarks = np.array(landmarks).reshape(1,-1)

      # prediction = model.predict(landmarks)
      # class_id = np.argmax(prediction)
      # label = le.inverse_transform([class_id])[0]

      # changed
      prediction = model.predict(landmarks)[0]  # flatten
      class_id = np.argmax(prediction)
      label = le.inverse_transform([class_id])[0]

      cv2.putText(img, f"Prediction: {label}", (10, 60),
                  cv2.FONT_HERSHEY_COMPLEX, 1.2, (255, 0, 255), 3)
      
      # changed
      y = 100
      for i, prob in enumerate(prediction):
          letter = le.inverse_transform([i])[0]
          cv2.putText(
              img,
              f"{letter}: {prob*100:.2f}%",
              (10, y),
              cv2.FONT_HERSHEY_SIMPLEX,
              0.8,
              (0, 255, 0),
              2
          )
          y += 30
      
      # 2. Trigger speech ONLY when the label changes
      if label != last_spoken:
        # Run the speak function in a new thread so video doesn't stop
        threading.Thread(target=speak, args=(label,), daemon=True).start()
        last_spoken = label
      
  cv2.imshow("ASL Real Time Detection", img)

  if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

model and label encoder loaded!
TTS Error: run loop already started
TTS Error: run loop already started
TTS Error: run loop already started
