In [None]:
import cv2
import torch
import torch.nn as nn
import numpy as np
from collections import deque
from torchvision import transforms

# --------------------------
# CONFIGURATION
# --------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
IMG_SIZE = (32, 32)
CONFIDENCE_THRESHOLD = 0.4   # lowered for real-time
prediction_queue = deque(maxlen=8)

labels = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ") + ["del", "nothing", "space"]

# --------------------------
# MODEL DEFINITION (UNCHANGED)
# --------------------------
class CNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 4 * 4, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        return self.classifier(self.features(x))

# --------------------------
# LOAD MODEL
# --------------------------
model = CNN(len(labels)).to(device)
model.load_state_dict(torch.load("asl_cnn_model.pth", map_location=device))
model.eval()
print("âœ… Model loaded")

# --------------------------
# TRANSFORMS (MATCH TRAINING)
# --------------------------
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.5, 0.5, 0.5],
        std=[0.5, 0.5, 0.5]
    )
])

# --------------------------
# WEBCAM
# --------------------------
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    raise RuntimeError("Could not open webcam")

print("ðŸ“· Press 'q' to quit")

# --------------------------
# MAIN LOOP
# --------------------------
while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)

    # ROI
    x1, y1, x2, y2 = 100, 100, 400, 400
    roi = frame[y1:y2, x1:x2]
    cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)

    # --------------------------
    # PREPROCESS (CRITICAL)
    # --------------------------
    roi = cv2.GaussianBlur(roi, (5, 5), 0)
    roi = cv2.resize(roi, IMG_SIZE)

    roi_tensor = transform(roi).unsqueeze(0).to(device)

    # --------------------------
    # INFERENCE
    # --------------------------
    with torch.no_grad():
        output = model(roi_tensor)
        probs = torch.softmax(output, dim=1)
        conf, pred = torch.max(probs, dim=1)

    confidence = conf.item()
    label = labels[pred.item()]

    # --------------------------
    # SMOOTHING
    # --------------------------
    if confidence > CONFIDENCE_THRESHOLD:
        prediction_queue.append(label)
        final_pred = max(set(prediction_queue), key=prediction_queue.count)
    else:
        final_pred = "Unknown"

    # --------------------------
    # DISPLAY
    # --------------------------
    cv2.putText(frame, f"Prediction: {final_pred}",
                (30, 50), cv2.FONT_HERSHEY_SIMPLEX,
                1, (0, 255, 0), 2)

    cv2.putText(frame, f"Confidence: {confidence:.2f}",
                (30, 90), cv2.FONT_HERSHEY_SIMPLEX,
                0.8, (0, 255, 255), 2)

    cv2.imshow("ASL Real-Time Recognition", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


âœ… Model loaded
ðŸ“· Press 'q' to quit
