In [15]:
import torch
import torch.nn as nn
from torchvision import transforms
import cv2
from PIL import Image
import numpy as np

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Class mapping (Load it if you saved it)
try:
    # class_mapping = torch.load('class_mapping.pth')
    class_mapping = torch.load('class_mapping.pth', weights_only=True)
    idx_to_class = class_mapping  # Use the original mapping directly
    num_classes = len(class_mapping)
    print("Loaded class mapping.")
except FileNotFoundError:
    print("Error: class_mapping.pth not found.  Make sure the file exists and the path is correct.")
    exit() # Terminate the program as it cannot proceed without the class mapping


# Model Definition (MUST MATCH the training model EXACTLY)
class ArSLNet(nn.Module):
    def __init__(self, num_classes=32):  # Ensure num_classes matches your training data
        super(ArSLNet, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(256 * 14 * 14, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x



# Load the trained model
model = ArSLNet(num_classes=num_classes).to(device)  # Important: Instantiate the model *before* loading the state dict.  Pass num_classes!
try:
    # model.load_state_dict(torch.load('best_arsl_model.pth', map_location=device))  # Load to correct device.
    model.load_state_dict(torch.load('best_arsl_model.pth', map_location=device, weights_only=True))
    model.eval()  # Set to evaluation mode
    print("Model loaded successfully.")
except FileNotFoundError:
    print("Error: best_arsl_model.pth not found.  Ensure the file exists and the path is correct.")
    exit()  # Exit if the model isn't found
except RuntimeError as e:
    print(f"RuntimeError while loading the model: {e}")
    print("Check that the model architecture in this script EXACTLY matches the architecture used during training.")
    exit()


# Image Preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def predict_image(image_path):
    """Predicts the class of an image given its path."""
    try:
        img = Image.open(image_path).convert('RGB')  # Ensure image is RGB
    except FileNotFoundError:
        return "Error: Image not found."
    except Exception as e:
        return f"Error loading image: {e}"

    img_tensor = transform(img).unsqueeze(0).to(device)  # Add batch dimension and move to device

    with torch.no_grad():
        output = model(img_tensor)
        probabilities = torch.nn.functional.softmax(output[0], dim=0)  # Softmax to get probabilities
        _, predicted_idx = torch.max(output, 1)
        predicted_class = idx_to_class[predicted_idx.item()]
        confidence = probabilities[predicted_idx.item()].item() * 100

    return predicted_class, confidence


# OpenCV Integration (Real-time prediction from webcam)
def opencv_realtime():
    """Opens the webcam and predicts Arabic Sign Language in real time."""
    cap = cv2.VideoCapture(0)  # Use 0 for default webcam

    if not cap.isOpened():
        print("Error: Could not open webcam.")
        return

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Could not read frame.")
            break

        # Convert the OpenCV frame to PIL Image for processing
        try:
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) #Correct color issue with opencv
            pil_img = Image.fromarray(frame_rgb)
            img_tensor = transform(pil_img).unsqueeze(0).to(device) # Add batch dimension

            with torch.no_grad():
                output = model(img_tensor)
                probabilities = torch.nn.functional.softmax(output[0], dim=0)
                _, predicted_idx = torch.max(output, 1)
                predicted_class = idx_to_class[predicted_idx.item()] #Get predicted class
                confidence = probabilities[predicted_idx.item()].item() * 100


            label = f"Prediction: {predicted_class} (Confidence: {confidence:.2f}%)"
            cv2.putText(frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

        except Exception as e:
            print(f"Error processing frame: {type(e).__name__} - {e}")  # Print the exception type and message
            cv2.putText(frame, f"Error: {type(e).__name__}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) # just print error name

        cv2.imshow('Arabic Sign Language Prediction', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to quit
            break

    cap.release()
    cv2.destroyAllWindows()


# Run real-time prediction from webcam
opencv_realtime()

Using device: cpu
Loaded class mapping.
Model loaded successfully.


In [9]:
print("Class mapping:", class_mapping)
print("Idx to class mapping:", idx_to_class)

Class mapping: {0: 'ain', 1: 'al', 2: 'aleff', 3: 'bb', 4: 'dal', 5: 'dha', 6: 'dhad', 7: 'fa', 8: 'gaaf', 9: 'ghain', 10: 'ha', 11: 'haa', 12: 'jeem', 13: 'kaaf', 14: 'khaa', 15: 'la', 16: 'laam', 17: 'meem', 18: 'nun', 19: 'ra', 20: 'saad', 21: 'seen', 22: 'sheen', 23: 'ta', 24: 'taa', 25: 'thaa', 26: 'thal', 27: 'toot', 28: 'waw', 29: 'ya', 30: 'yaa', 31: 'zay'}
Idx to class mapping: {'ain': 0, 'al': 1, 'aleff': 2, 'bb': 3, 'dal': 4, 'dha': 5, 'dhad': 6, 'fa': 7, 'gaaf': 8, 'ghain': 9, 'ha': 10, 'haa': 11, 'jeem': 12, 'kaaf': 13, 'khaa': 14, 'la': 15, 'laam': 16, 'meem': 17, 'nun': 18, 'ra': 19, 'saad': 20, 'seen': 21, 'sheen': 22, 'ta': 23, 'taa': 24, 'thaa': 25, 'thal': 26, 'toot': 27, 'waw': 28, 'ya': 29, 'yaa': 30, 'zay': 31}


In [13]:
# Example Usage (Single image prediction)
image_path = 'ba.png'  # Replace with the path to your image
prediction, confidence = predict_image(image_path)
print(f"Predicted class: {prediction}, Confidence: {confidence:.2f}%")

Predicted class: laam, Confidence: 42.32%
