In [None]:
import cv2
import numpy as np
import mediapipe as mp
from keras.models import load_model
from sklearn.metrics import accuracy_score, confusion_matrix
from collections import Counter
import time
import sys
import matplotlib.pyplot as plt # For plotting
import seaborn as sns          # For heatmap visualization

In [None]:
# --- A. CONFIGURATION ---
MODEL_PATH = 'asl_cnn_model.h5' # Ensure this points to your trained model
IMAGE_SIZE = 32 
THRESHOLD_VALUE = 161 # OpenCV threshold (TUNE THIS for your specific lighting)
ASL_LETTERS = 'ABCDEFGHIKLMNOPQRSTUVWXY'
SESSION_DURATION = 5.0 # Seconds to capture data for each sign

In [None]:
# --- B. MODEL & MEDIAPIPE SETUP ---
try:
    model = load_model(MODEL_PATH)
except Exception as e:
    print(f"❌ Error loading model '{MODEL_PATH}'. Ensure training was successful.")
    sys.exit()

In [None]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.7)

--- C. PREPROCESSING FUNCTIONS ---

In [None]:
def preprocess_image(image):
    """Processes the cropped hand image to match the model's training input."""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.flip(gray, 1)
    gray = cv2.GaussianBlur(gray, (15, 15), 0)
    
    # Using THRESH_BINARY to get a white hand on a black background (if hand is brighter than BG)
    # If the hand is darker than BG, change to cv2.THRESH_BINARY_INV
    _, thresholded = cv2.threshold(gray, THRESHOLD_VALUE, 255, cv2.THRESH_BINARY) 
    
    resized = cv2.resize(thresholded, (IMAGE_SIZE, IMAGE_SIZE))
    normalized = resized / 255.0
    reshaped = np.reshape(normalized, (1, IMAGE_SIZE, IMAGE_SIZE, 1))
    
    return reshaped, resized

In [None]:
def predict_asl_letter(prediction):
    """Maps the model's prediction index to the ASL character."""
    return ASL_LETTERS[np.argmax(prediction)]

In [None]:
# --- D. PREDICTION AND EVALUATION LOOP ---
def run_predictions(true_label):
    """Captures predictions for 5 seconds and calculates accuracy for the true_label."""
    start_time = time.time()
    y_true = [] 
    y_pred = []
    
    while time.time() - start_time < SESSION_DURATION:
        success, frame = cap.read()
        if not success: break

        frame = cv2.flip(frame, 1)
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        result = hands.process(frame_rgb)

        if result.multi_hand_landmarks:
            
            # --- Hand Cropping Logic (Simplified to use the first detected hand) ---
            h, w, c = frame.shape
            x_min, y_min = w, h
            x_max, y_max = 0, 0
            
            for landmark in result.multi_hand_landmarks[0].landmark:
                x, y = int(landmark.x * w), int(landmark.y * h)
                x_min, y_min = min(x_min, x), min(y_min, y)
                x_max, y_max = max(x_max, x), max(y_max, y)

            margin = 30
            x_min = max(0, x_min - margin)
            y_min = max(0, y_min - margin)
            x_max = min(w, x_max + margin)
            y_max = min(h, y_max + margin)

            hand_image = frame[y_min:y_max, x_min:x_max]
            
            if hand_image.size > 0:
                preprocessed_image, resized_image = preprocess_image(hand_image)

                prediction = model.predict(preprocessed_image, verbose=0)
                asl_letter = predict_asl_letter(prediction)
                confidence = np.max(prediction) * 100
                
                # Store data points for metric calculation
                y_true.append(true_label)
                y_pred.append(asl_letter)

                # Draw UI feedback
                cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
                cv2.putText(frame, f'PRED: {asl_letter} ({confidence:.1f}%)', 
                            (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
                cv2.imshow('Preprocessed Image', resized_image)

        cv2.putText(frame, f"HOLD: {true_label} | TIME: {SESSION_DURATION - (time.time() - start_time):.1f}s", 
                    (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
        cv2.imshow('ASL Recognition', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    return y_true, y_pred

In [None]:
# --- E. MAIN EXECUTION ---
cap = cv2.VideoCapture(0)
metrics_per_letter = {}
y_true_total = [] 
y_pred_total = [] 

In [None]:
print("=============================================")
print("ASL REAL-TIME EVALUATION MODE")
print("=============================================")

In [None]:
for true_label in ASL_LETTERS:
    print(f"\n👉 Ready for letter: {true_label}. Press 'C' to begin capture.")
    
    # Loop to wait for user input (C or Q)
    while True:
        ret, frame = cap.read()
        if not ret: sys.exit()
        
        frame = cv2.flip(frame, 1)
        cv2.putText(frame, f"Ready: {true_label}. Press 'C' to start 5s test.", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
        cv2.imshow('ASL Recognition', frame)

        key = cv2.waitKey(1) & 0xFF
        if key == ord('c'):
            print(f"   Capturing {true_label} for {SESSION_DURATION} seconds.")
            break
        elif key == ord('q'):
            cap.release()
            cv2.destroyAllWindows()
            sys.exit()

    # Run the prediction session
    y_true, y_pred = run_predictions(true_label)

    # Calculate metrics after prediction session
    if y_pred:
        accuracy = accuracy_score(y_true, y_pred)
        most_common_label, count = Counter(y_pred).most_common(1)[0]
        
        metrics_per_letter[true_label] = {
            'Accuracy': accuracy,
            'Most Predicted Label': most_common_label,
            'Count': count,
            'Total Frames': len(y_pred)
        }
        
        y_true_total.extend(y_true)
        y_pred_total.extend(y_pred)
        
        print(f'   Result: Accuracy: {accuracy:.2f}, Most Predicted: {most_common_label} ({count} frames)')
        cv2.destroyAllWindows()
        
    else:
        print("   No hand detected during the session.")

In [None]:
# --- F. FINAL REPORTING ---
if y_true_total:
    final_accuracy = accuracy_score(y_true_total, y_pred_total)
    
    print("\n\n=============================================")
    print(f"     FINAL OVERALL REAL-TIME ACCURACY: {final_accuracy:.2f}")
    print("=============================================")

    print("\nDetailed Per-Letter Performance:")
    for letter, metrics in metrics_per_letter.items():
        print(f'Sign {letter}: ACC={metrics["Accuracy"]:.2f} | PRED={metrics["Most Predicted Label"]} ({metrics["Count"]}/{metrics["Total Frames"]} frames)')
    
    
    # --- G. CONFUSION MATRIX GENERATION AND DISPLAY ---
    print("\n--- Generating Confusion Matrix Plot ---")
    
    labels_list = list(ASL_LETTERS) 
    
    # Calculate the Confusion Matrix
    cm = confusion_matrix(y_true_total, y_pred_total, labels=labels_list)

    # Plot the Matrix
    plt.figure(figsize=(18, 15)) 
    sns.heatmap(
        cm, 
        annot=True,        
        fmt='d',           
        cmap='Blues',      
        xticklabels=labels_list, 
        yticklabels=labels_list 
    )
    plt.title('Real-Time ASL Detection Confusion Matrix')
    plt.ylabel('True Label (Actual Sign Held)')
    plt.xlabel('Predicted Label (Model Output)')
    plt.show()

In [None]:
else:
    print("\nNo full evaluation cycle was completed.")

In [None]:
cap.release()
cv2.destroyAllWindows()