In [None]:
import cv2
import numpy as np
import mediapipe as mp
from keras.models import load_model
import time

In [None]:
# --- A. CONFIGURATION AND HYPERPARAMETERS ---
MODEL_PATH = 'asl_cnn_model.h5'
IMAGE_SIZE = 32
ASL_LETTERS = 'ABCDEFGHIKLMNOPQRSTUVWXY'
CONFIDENCE_THRESHOLD = 0.95  # REQUIRED: Only process predictions > 95%
APPEND_DELAY_SEC = 2.0       # Time (seconds) sign must be held steady before appending
PREDICT_INTERVAL_MS = 50     # Run model prediction only every 50ms (throttling for speed)

In [None]:
# Initialize global tracking variables
recognized_text = ""
previous_letter = ""
last_append_time = time.time()
last_predict_time = 0

In [None]:
# --- B. MODEL & MEDIAPIPE SETUP ---
try:
    model = load_model(MODEL_PATH)
except Exception as e:
    print(f"‚ùå Error loading model: {e}")
    exit()

In [None]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.7)

In [None]:
# Define a function to map the CNN model's prediction to the corresponding ASL letter
def predict_asl_letter(prediction_index):
    """Maps the model's output index to the ASL character."""
    # Handle bounds check just in case
    if 0 <= prediction_index < len(ASL_LETTERS):
        return ASL_LETTERS[prediction_index]
    return "Unknown"

In [None]:
# Define the preprocessing function
def preprocess_image(image):
    # Convert the hand image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply Gaussian blur
    gray = cv2.GaussianBlur(gray, (15, 15), 0)
    
    # Apply binary thresholding (Tuning the 161 value might still be necessary!)
    _, thresholded = cv2.threshold(gray, THRESHOLD_VALUE, 255, cv2.THRESH_BINARY) 
    
    # Resize the image to the model's required input size
    resized = cv2.resize(thresholded, (IMAGE_SIZE, IMAGE_SIZE))
    
    # Normalize and Reshape (1, 32, 32, 1)
    normalized = resized / 255.0
    reshaped = np.reshape(normalized, (1, IMAGE_SIZE, IMAGE_SIZE, 1))
    
    return reshaped, resized # resized is the thresholded image for display

--- C. MAIN EXECUTION LOOP ---

In [None]:
cap = cv2.VideoCapture(0)
# Use a static threshold value for the preprocessing function
# NOTE: This THRESHOLD_VALUE is not defined globally, so I'm setting a default optimal one here.
# You MUST tune this (e.g., to 140 or 161) based on your background.
THRESHOLD_VALUE = 161 

In [None]:
while cap.isOpened():
    success, frame = cap.read()
    if not success: break
    
    frame = cv2.flip(frame, 1) # Flip for mirror effect
    h, w, c = frame.shape
    current_time_ms = cv2.getTickCount() / cv2.getTickFrequency() * 1000 
    
    predicted_sign = ""
    confidence = 0.0

    # Convert the video frame to RGB for MediaPipe
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(frame_rgb)

    # 1. Prediction Throttling (SPEED FIX)
    if (current_time_ms - last_predict_time) > PREDICT_INTERVAL_MS and result.multi_hand_landmarks:
        
        # Reset prediction time
        last_predict_time = current_time_ms
        
        # --- Hand Cropping Logic ---
        hand_landmarks = result.multi_hand_landmarks[0] # Assume only one hand
        x_min, y_min = w, h
        x_max, y_max = 0, 0
        
        for landmark in hand_landmarks.landmark:
            x, y = int(landmark.x * w), int(landmark.y * h)
            x_min, y_min = min(x_min, x), min(y_min, y)
            x_max, y_max = max(x_max, x), max(y_max, y)

        margin = 30
        x_min = max(0, x_min - margin)
        y_min = max(0, y_min - margin)
        x_max = min(w, x_max + margin)
        y_max = min(h, y_max + margin)

        hand_image = frame[y_min:y_max, x_min:x_max]

        if hand_image.size > 0:
            preprocessed_image, resized_image = preprocess_image(hand_image)
            
            # 2. Use the trained model to predict the ASL letter
            prediction = model.predict(preprocessed_image, verbose=0)
            predicted_label_index = np.argmax(prediction)
            confidence = np.max(prediction)
            predicted_sign = predict_asl_letter(predicted_label_index)
            
            # Store the current highly confident prediction
            if confidence > CONFIDENCE_THRESHOLD:
                asl_letter = predicted_sign
            else:
                asl_letter = "" # Ignore low confidence predictions
            
            # Draw bounding box and prediction
            box_color = (0, 255, 0) if confidence > CONFIDENCE_THRESHOLD else (0, 165, 255)
            cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), box_color, 2)
            
            text_display = f'ASL: {predicted_sign} ({confidence*100:.1f}%)'
            cv2.putText(frame, text_display, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, box_color, 2, cv2.LINE_AA)
            
            cv2.imshow('Preprocessed Image', resized_image)
            
        else:
            asl_letter = "" # No image to process
    
    # 3. Text Appending and Stability Logic (LOGIC FIX)
    
    # If the current highly confident prediction (asl_letter) matches the previous one
    if predicted_sign == previous_letter and predicted_sign != "" and confidence >= CONFIDENCE_THRESHOLD:
        
        current_time = time.time()
        
        # Append the letter if the required delay has passed
        if current_time - last_append_time >= APPEND_DELAY_SEC:
            recognized_text += predicted_sign
            last_append_time = current_time # Reset timer
            
    elif predicted_sign != "":
        # Reset timer and previous letter when a NEW confident sign is seen
        previous_letter = predicted_sign
        last_append_time = time.time()
        
    # --- Display Windows ---
    cv2.imshow('ASL Recognition', frame)

    text_window = np.ones((200, 500, 3), dtype=np.uint8) * 255
    cv2.putText(text_window, 'Recognized Text:', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
    cv2.putText(text_window, recognized_text, (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    cv2.imshow('Recognized Text', text_window)

    # --- Key Press Controls ---
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'): break
    if key == ord('c'): recognized_text = ''
    if key == ord('s'): recognized_text += " "
    if key == ord('b'): recognized_text = recognized_text[:-1]

In [None]:
cap.release()
cv2.destroyAllWindows()