In [7]:
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
import time
import nltk
from textblob import TextBlob
from textblob import Word
import re

# Initialize NLTK data
nltk.download('punkt')
nltk.download('wordnet')

# Load your trained model
model = tf.keras.models.load_model("modelAdditionalLayers.keras")

# Initialize MediaPipe
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False,
                      max_num_hands=1,
                      min_detection_confidence=0.7,
                      min_tracking_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils

# ASL Labels (A-Z)
labels = [chr(i) for i in range(ord('A'), ord('Z') + 1)]

# Ambiguous ASL corrections
ASL_COMMON_ERRORS = {
    'A':'E', 'B': 'V', 'D': 'F', 'E': 'M',
    'G': 'Q', 'I': 'J', 'K': 'P', 'M': 'N',
    'S': 'A', 'V': 'B', 'F': 'D', 'N': 'M'
}

# Common ASL words
ASL_COMMON_WORDS = {
    'HELLO', 'WORLD', 'THANK', 'YOU', 'YES', 'NO', 
    'PLEASE', 'SORRY', 'HELP', 'NAME', 'MY', 'WHAT',
    'WHERE', 'WHEN', 'WHY', 'HOW', 'MEET', 'AGAIN',
    'GOOD', 'MORNING', 'AFTERNOON', 'NIGHT', 'I', 'LOVE'
}

def draw_hand_landmarks_on_black(hand_landmarks, image_size=128):
    landmarks = np.array([[lm.x, lm.y] for lm in hand_landmarks.landmark])
    x_vals, y_vals = landmarks[:, 0], landmarks[:, 1]
    min_x, max_x = np.min(x_vals), np.max(x_vals)
    min_y, max_y = np.min(y_vals), np.max(y_vals)
    width, height = max_x - min_x, max_y - min_y
    scale = 0.8 * image_size / max(width, height)

    landmarks[:, 0] = (landmarks[:, 0] - min_x) * scale
    landmarks[:, 1] = (landmarks[:, 1] - min_y) * scale
    landmarks[:, 0] += (image_size - width * scale) / 2
    landmarks[:, 1] += (image_size - height * scale) / 2

    black_img = np.zeros((image_size, image_size, 3), dtype=np.uint8)
    points = landmarks.astype(np.int32)

    for connection in mp_hands.HAND_CONNECTIONS:
        start = tuple(points[connection[0]])
        end = tuple(points[connection[1]])
        cv2.line(black_img, start, end, (0, 255, 0), 2)

    for point in points:
        cv2.circle(black_img, tuple(point), 3, (0, 255, 0), -1)

    return black_img

def pre_correct_letter(letter):
    return ASL_COMMON_ERRORS.get(letter, letter)

def auto_correct_word(word):
    if len(word) <= 1:
        return word
    corrected = ''.join([pre_correct_letter(c) for c in word])
    if corrected in ASL_COMMON_WORDS:
        return corrected
    try:
        tb = TextBlob(corrected)
        if tb.words:
            suggestion, confidence = tb.words[0].spellcheck()[0]
            return suggestion if confidence > 0.7 else corrected
    except:
        pass
    return corrected

def format_sentence(raw_sentence):
    if not raw_sentence:
        return ""
    words = raw_sentence.split()
    if words:
        words[0] = words[0].capitalize()
    if words and not raw_sentence.endswith(('.', '!', '?')):
        words[-1] += '.'
    formatted = ' '.join(words)
    formatted = formatted.replace(" i ", " I ")
    formatted = formatted.replace(" i'm ", " I'm ")
    formatted = formatted.replace(" i've ", " I've ")
    return formatted

raw_sentence = []
current_word = []
confirmed_letter = ""
stable_count = 0
last_confirmed_time = time.time()
no_hand_start_time = None
last_letter_time = time.time()

STABILITY_THRESHOLD = 5
LETTER_COOLDOWN = 0.5
NO_HAND_TIMEOUT = 2.0
WORD_TIMEOUT = 3.0
MIN_WORD_LENGTH = 3

cap = cv2.VideoCapture(0)
cv2.namedWindow("ASL Translator", cv2.WINDOW_NORMAL)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    frame = cv2.flip(frame, 1)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)
    current_time = time.time()

    prediction_text = "Show hand sign..."
    current_letter = ""

    if results.multi_hand_landmarks:
        no_hand_start_time = None
        for hand_landmarks in results.multi_hand_landmarks:
            hand_img = draw_hand_landmarks_on_black(hand_landmarks)
            gray_img = cv2.cvtColor(hand_img, cv2.COLOR_BGR2GRAY)
            normalized_img = gray_img.astype("float32") / 255.0
            input_img = np.expand_dims(normalized_img, axis=(0, -1))

            prediction = model.predict(input_img, verbose=0)
            predicted_index = np.argmax(prediction)
            confidence = prediction[0][predicted_index]

            if predicted_index < len(labels) and confidence > 0.85:
                predicted_letter = labels[predicted_index]
                if predicted_letter == confirmed_letter:
                    stable_count += 1
                else:
                    stable_count = 0
                    confirmed_letter = predicted_letter

                if stable_count > STABILITY_THRESHOLD:
                    prediction_text = f"Letter: {confirmed_letter}"
                    current_letter = confirmed_letter
                else:
                    prediction_text = f"Detecting: {confirmed_letter}"

                if (stable_count > STABILITY_THRESHOLD and 
                    (current_time - last_letter_time > LETTER_COOLDOWN)):
                    current_word.append(confirmed_letter)
                    last_letter_time = current_time
                    last_confirmed_time = current_time
                    stable_count = 0

            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            cv2.imshow("Hand Input", hand_img)
            break
    else:
        if no_hand_start_time is None:
            no_hand_start_time = current_time
        elif current_time - no_hand_start_time > NO_HAND_TIMEOUT:
            if current_word:
                word = ''.join(current_word)
                corrected_word = auto_correct_word(word)
                raw_sentence.append(corrected_word)
                current_word = []
            no_hand_start_time = None

    if current_word and (current_time - last_letter_time > WORD_TIMEOUT):
        word = ''.join(current_word)
        corrected_word = auto_correct_word(word)
        raw_sentence.append(corrected_word)
        current_word = []

    display_word = auto_correct_word(''.join(current_word)) if current_word else ""
    full_sentence = format_sentence(' '.join(raw_sentence + [display_word]))

    y_offset = 40
    cv2.putText(frame, prediction_text, (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    y_offset += 40

    cv2.putText(frame, f"Current: {display_word}", (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 0), 2)
    y_offset += 40

    cv2.putText(frame, f"Sentence: {full_sentence}", (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
    y_offset += 40

    cv2.imshow("ASL Translator", frame)
    if cv2.waitKey(1) & 0xFF == 27:
        break

if current_word:
    raw_sentence.append(auto_correct_word(''.join(current_word)))

print("\nFinal Sentence:", format_sentence(' '.join(raw_sentence)))
cap.release()
cv2.destroyAllWindows()


[nltk_data] Downloading package punkt to C:\Users\Denin
[nltk_data]     Thomas\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\Denin
[nltk_data]     Thomas\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!



Final Sentence: H NN Q.
