In [None]:
# predict_asl_corrected.py
# This script performs real-time sign language recognition using a pre-trained model.

import cv2
import numpy as np
import os
import json
import mediapipe as mp
from tensorflow.keras.models import load_model
from collections import deque

# --- 1. Global Constants and Helper Functions ---

# --- Landmark & Preprocessing Constants (MUST MATCH TRAINING SCRIPT) ---
NUM_POSE_LANDMARKS = 33
NUM_HAND_LANDMARKS = 21
POSE_LANDMARK_PAIRS = {11: 12, 13: 14, 15: 16, 23: 24, 25: 26, 27: 28, 29: 30, 31: 32}
POSE_LANDMARK_PAIRS.update({v: k for k, v in POSE_LANDMARK_PAIRS.items()})

def mediapipe_detection(image, model):
    """Processes an image with the MediaPipe Holistic model."""
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_styled_landmarks(image, results):
    """Draws styled landmarks on the image."""
    mp_drawing = mp.solutions.drawing_utils
    mp_holistic = mp.solutions.holistic
    if results.pose_landmarks:
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                                 mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(80, 44, 121), thickness=2, circle_radius=2))
    if results.left_hand_landmarks:
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                 mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2))
    if results.right_hand_landmarks:
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                 mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

def extract_keypoints_no_z(results):
    """
    CORRECTED: Extracts keypoints WITHOUT Z for a vector length of 183.
    This function is the main fix for the dimension mismatch error.
    """
    pose = np.array([[res.x, res.y, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(NUM_POSE_LANDMARKS * 3)
    lh = np.array([[res.x, res.y] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(NUM_HAND_LANDMARKS * 2)
    rh = np.array([[res.x, res.y] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(NUM_HAND_LANDMARKS * 2)
    return np.concatenate([pose, lh, rh])

def mirror_keypoints_frame_no_z(keypoints_frame):
    """CORRECTED: Mirrors a 183-length keypoint vector."""
    mirrored_frame = np.copy(keypoints_frame)
    # Mirror X-coords
    for i in range(NUM_POSE_LANDMARKS): mirrored_frame[i * 3] = 1.0 - mirrored_frame[i * 3]
    lh_start_idx = NUM_POSE_LANDMARKS * 3
    rh_start_idx = lh_start_idx + NUM_HAND_LANDMARKS * 2
    for i in range(NUM_HAND_LANDMARKS):
        mirrored_frame[lh_start_idx + i * 2] = 1.0 - mirrored_frame[lh_start_idx + i * 2]
        mirrored_frame[rh_start_idx + i * 2] = 1.0 - mirrored_frame[rh_start_idx + i * 2]
    # Swap hands
    lh_data = mirrored_frame[lh_start_idx:rh_start_idx].copy()
    rh_data = mirrored_frame[rh_start_idx:].copy()
    mirrored_frame[lh_start_idx:rh_start_idx] = rh_data
    mirrored_frame[rh_start_idx:] = lh_data
    # Swap pose
    pose_data = mirrored_frame[:lh_start_idx].reshape(NUM_POSE_LANDMARKS, 3)
    temp_pose_data = pose_data.copy()
    for l_idx, r_idx in POSE_LANDMARK_PAIRS.items(): pose_data[l_idx] = temp_pose_data[r_idx]
    mirrored_frame[:lh_start_idx] = pose_data.flatten()
    return mirrored_frame

def normalize_sequence_no_z(sequence_data):
    """CORRECTED: Robustly normalizes a sequence of 183-length keypoint vectors."""
    normalized_sequence = []
    for frame_kps in sequence_data:
        if np.all(frame_kps == 0):
            normalized_sequence.append(frame_kps)
            continue
        
        pose = frame_kps[:99].reshape(33, 3); lh = frame_kps[99:141].reshape(21, 2); rh = frame_kps[141:].reshape(21, 2)
        
        hip_l, hip_r = pose[23], pose[24]
        origin = (hip_l[:2] + hip_r[:2]) / 2.0 if hip_l[2] > 0.5 and hip_r[2] > 0.5 else pose[0][:2]
        pose[:, :2] -= origin; lh -= origin; rh -= origin
        
        shoulder_l, shoulder_r = pose[11], pose[12]
        scale = np.linalg.norm(shoulder_l[:2] - shoulder_r[:2]) if shoulder_l[2] > 0.5 and shoulder_r[2] > 0.5 else 1.0
        if scale < 1e-6: scale = 1.0
        pose[:, :2] /= scale; lh /= scale; rh /= scale
        
        normalized_sequence.append(np.concatenate([pose.flatten(), lh.flatten(), rh.flatten()]))
    return np.array(normalized_sequence)

def prob_viz(res, actions, input_frame, colors):
    """Visualize prediction probabilities as a bar chart."""
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0, 60 + num * 40), (int(prob * 100), 90 + num * 40), colors[num % len(colors)], -1)
        cv2.putText(output_frame, actions[num], (0, 85 + num * 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
    return output_frame


def main():
    # --- 2. Configuration and Setup ---
    SEQUENCE_LENGTH = 30
    MODEL_PATH = os.path.join('models', 'best_action_model.h5')
    LABEL_MAP_PATH = os.path.join('models', 'label_map.json')

    print(f"Loading model from: {MODEL_PATH}")
    if not os.path.exists(MODEL_PATH):
        print(f"Error: Model file not found at {MODEL_PATH}")
        return
    model = load_model(MODEL_PATH)

    print(f"Loading label map from: {LABEL_MAP_PATH}")
    if not os.path.exists(LABEL_MAP_PATH):
        print(f"Error: Label map file not found at {LABEL_MAP_PATH}")
        return
    with open(LABEL_MAP_PATH, 'r') as f:
        label_map = json.load(f)

    # Create a reverse mapping from index to label string
    actions = {int(v): k for k, v in label_map.items()}
    print(f"Model loaded. Actions: {actions}")

    # --- 3. Real-time Prediction Loop ---
    sequence = deque(maxlen=SEQUENCE_LENGTH)
    sentence = []
    predictions = deque(maxlen=10) # For debouncing
    threshold = 0.95
    dominant_hand = 'RIGHT'

    colors = [(245, 117, 16), (117, 245, 16), (16, 117, 245), (200, 100, 50), (50, 200, 100)]

    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: Could not open camera.")
        return

    mp_holistic = mp.solutions.holistic
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                print("Ignoring empty camera frame.")
                continue

            image, results = mediapipe_detection(frame, holistic)
            draw_styled_landmarks(image, results)
            
            # --- Prediction Logic ---
            left_hand_detected = results.left_hand_landmarks is not None
            right_hand_detected = results.right_hand_landmarks is not None
            if left_hand_detected and not right_hand_detected:
                dominant_hand = 'LEFT'
            elif right_hand_detected:
                dominant_hand = 'RIGHT'

            keypoints = extract_keypoints_no_z(results)
            if dominant_hand == 'LEFT':
                keypoints = mirror_keypoints_frame_no_z(keypoints)
            sequence.append(keypoints)

            if len(sequence) == SEQUENCE_LENGTH:
                # Preprocess the sequence exactly as in training
                normalized_data = normalize_sequence_no_z(np.array(list(sequence)))
                
                # Make prediction
                res = model.predict(np.expand_dims(normalized_data, axis=0))[0]
                
                prediction_idx = np.argmax(res)
                predictions.append(prediction_idx)
                
                # Debouncing logic: ensure prediction is stable
                if np.unique(list(predictions))[-1] == prediction_idx and len(predictions) == predictions.maxlen:
                    if res[prediction_idx] > threshold:
                        current_action = actions[prediction_idx]
                        if not sentence or sentence[-1] != current_action:
                            sentence.append(current_action)
                
                if len(sentence) > 5:
                    sentence = sentence[-5:]
                
                # Visualize probabilities
                top_indices = np.argsort(res)[-5:][::-1]
                top_actions = [actions.get(i, 'N/A') for i in top_indices]
                top_probs = res[top_indices]
                image = prob_viz(top_probs, top_actions, image, colors)
            
            # --- Display UI Elements ---
            cv2.rectangle(image, (0, 0), (280, 40), (245, 117, 16), -1)
            cv2.putText(image, f'HAND: {dominant_hand}', (10, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA)

            cv2.rectangle(image, (0, image.shape[0] - 40), (image.shape[1], image.shape[0]), (0, 0, 0), -1)
            cv2.putText(image, ' '.join(sentence), (10, image.shape[0] - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

            cv2.imshow('ASL Tutor - Real-time Prediction', image)

            if cv2.waitKey(10) & 0xFF == ord('q'):
                break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()