In [14]:
import torch
print("--- PYTORCH CHECK ---")
print(f"Version: {torch.__version__}")
is_available = torch.cuda.is_available()
print(f"GPU Available: {is_available}")

if is_available:
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
# Expected: GPU Available: True and the name of your GPU.

OSError: [WinError 127] The specified procedure could not be found. Error loading "c:\Users\adelg\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\lib\shm.dll" or one of its dependencies.

In [13]:
import cv2
print("--- OPENCV CHECK (cv2) ---")
print(f"Version: {cv2.__version__}")

# Check if a core component loads successfully
try:
    # A simple check to see if the library's features are accessible
    print(f"Module Status: Loaded successfully.")
except Exception as e:
    print(f"Status: FAIL - Crash: {e}")

# EXPECTED SUCCESS: Version 4.8.0.74

--- OPENCV CHECK (cv2) ---
Version: 4.11.0
Module Status: Loaded successfully.


In [12]:
import tensorflow as tf
print("--- TENSORFLOW CHECK ---")
print(f"Version: {tf.__version__}")
gpus = tf.config.list_physical_devices('GPU')
print(f"GPUs Detected: {gpus}")

# Expected: Version 2.10.0 and a list showing your physical device.

--- TENSORFLOW CHECK ---
Version: 2.10.0
GPUs Detected: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [11]:
pip list show

Package                      Version
---------------------------- ------------
absl-py                      2.3.1
anyio                        4.11.0
argon2-cffi                  25.1.0
argon2-cffi-bindings         25.1.0
arrow                        1.4.0
asttokens                    3.0.0
astunparse                   1.6.3
async-lru                    2.0.5
attrs                        25.4.0
babel                        2.17.0
beautifulsoup4               4.14.2
bleach                       6.2.0
cachetools                   6.2.2
certifi                      2025.11.12
cffi                         2.0.0
charset-normalizer           3.4.4
colorama                     0.4.6
comm                         0.2.3
contourpy                    1.3.0
cycler                       0.12.1
debugpy                      1.8.17
decorator                    5.2.1
defusedxml                   0.7.1
exceptiongroup               1.2.2
executing                    2.2.1
fastjsonschema               2.21

In [9]:
import tensorflow as tf

# Check if the device is found and accessible
gpus = tf.config.list_physical_devices('GPU')

if gpus:
    try:
        # Access the properties of the first detected device (GPU:0)
        details = tf.config.experimental.get_device_details(gpus[0])
        # The friendly name is usually stored here
        name = details.get('device_name', 'Name retrieval failed.') 
        print(f"--- DETAILED TENSORFLOW GPU CHECK ---")
        print(f"✅ Device Found: {gpus[0].name}")
        print(f"✅ **GPU NAME**: {name}")
    except Exception:
        # Fallback if the experimental API is not stable
        print("✅ GPU Detected. Name retrieval failed (API instability).")
        print(f"Path: {gpus[0].name}")
else:
    print("❌ No GPU found by TensorFlow.")

--- DETAILED TENSORFLOW GPU CHECK ---
✅ Device Found: /physical_device:GPU:0
✅ **GPU NAME**: NVIDIA GeForce MX150


Removing the .DS_Store(hidden files) files in the training dataset

In [10]:
import mediapipe as mp
print("--- MEDIAPIPE CHECK ---")

try:
    # Attempt to initialize a core module (Hands) that relies on Protobuf
    mp_hands = mp.solutions.hands
    print("Status: SUCCESS - Module loaded and initialized.")
except Exception as e:
    print(f"Status: FAIL - Crash: {e}")
# EXPECTED SUCCESS: Status: SUCCESS - Module loaded and initialized.

--- MEDIAPIPE CHECK ---
Status: SUCCESS - Module loaded and initialized.


In [5]:
import tensorflow as tf
print("--- TENSORFLOW CHECK ---")
print(f"Version: {tf.__version__}")

gpus = tf.config.list_physical_devices('GPU')
print(f"GPUs Detected: {gpus}")

# EXPECTED SUCCESS: Version 2.10.0 and a list of physical devices (e.g., '[PhysicalDevice...]')

--- TENSORFLOW CHECK ---
Version: 2.10.0
GPUs Detected: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [1]:
import tensorflow as tf
import os

print("Running DLL Check...")

# 1. Check if CUDA is in Path
cuda_path = os.environ.get('CUDA_PATH')
print(f"CUDA_PATH: {cuda_path}")

# 2. Try to load the GPU
gpus = tf.config.list_physical_devices('GPU')
if len(gpus) > 0:
    print(f"✅ SUCCESS! Detected: {gpus[0].name}")
else:
    print("❌ FAILURE: No GPU detected.")
    print("Possible missing files in 'C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.2\\bin':")
    print("- cudart64_110.dll (From CUDA Toolkit)")
    print("- cudnn64_8.dll    (From cuDNN)")
    print("- zlibwapi.dll     (The secret file)")

Running DLL Check...
CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2
✅ SUCCESS! Detected: /physical_device:GPU:0


In [None]:
import os

dataset_dir = '/Users/js/Desktop/Sign Recognition Application/Sign_to_Sentence Project/Asl_Sign_Data/asl_alphabet_train/asl_alphabet_train'

# Traverse all subdirectories and remove .DS_Store files
for root, dirs, files in os.walk(dataset_dir):
    for file in files:
        if file == ".DS_Store":
            file_path = os.path.join(root, file)
            os.remove(file_path)
            print(f"Removed: {file_path}")

print("All .DS_Store files removed successfully!")


In [8]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import cv2


Data Visualisation

In [None]:
dataset_dir = '/Users/js/Desktop/Sign Recognition Application/Sign_to_Sentence Project/Asl_Sign_Data/asl_alphabet_train/asl_alphabet_train'

classes = os.listdir(dataset_dir)
print(f"Classes: {classes}")

# Visualize some images
fig, axes = plt.subplots(3, 5, figsize=(12, 8))

for i, label in enumerate(classes[:5]):  # Show first 5 classes
    class_dir = os.path.join(dataset_dir, label)
    img_files = os.listdir(class_dir)[:3]  # Show 3 images per class

    for j, img_name in enumerate(img_files):
        img_path = os.path.join(class_dir, img_name)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
        img = cv2.resize(img, (128, 128))
        
        axes[j, i].imshow(img)
        axes[j, i].axis("off")
        axes[j, i].set_title(label)

plt.show()

Data Pre-processing

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define image size and batch size
IMG_SIZE = 128 
BATCH_SIZE = 32

# Data generators
# We are using this approach to make it less computaionally extensive as the data consists of 87,000 images appx,
# loading all the images as generally done and then label encoding them will be CPU extensive task. 

train_datagen = ImageDataGenerator(
    rescale=1./255,  # Normalize
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # Splitting data into train (80%) and val (20%)
)

# Train & validation generators (load images directly from disk)
train_generator = train_datagen.flow_from_directory(
    dataset_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    subset="training"
)

val_generator = train_datagen.flow_from_directory(
    dataset_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    subset="validation"
)

print("Class labels:", train_generator.class_indices)

Defining the MobileNet V2 Model

In [None]:
base_model = MobileNetV2(
    weights="imagenet", 
    include_top=False, 
    input_shape=(128, 128, 3)
)

base_model.trainable = False  # Freezing all layers initially as this is an intial training

x = GlobalAveragePooling2D()(base_model.output)
x = Dense(256, activation="relu")(x)
x = Dropout(0.3)(x)
output_layer = Dense(len(train_generator.class_indices), activation="softmax")(x)

model = Model(inputs=base_model.input, outputs=output_layer)

# Compile model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

Initial training on the dataset

In [None]:
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=5 
)

Fine Tuning the model further

In [None]:
for layer in base_model.layers[-20:]:  # Unfreeze last 20 layers
    layer.trainable = True

# Recompile with a lower learning rate to avoid overfitting
model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=1e-4),
              loss="categorical_crossentropy",
              metrics=["accuracy"])

history_finetune = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10
)


In [None]:
model.save("sign_language_model_MobileNetV2.h5")
print("Model saved successfully!")

Model Testing

In [None]:
# Load the trained model
model = tf.keras.models.load_model("sign_language_model_MobileNetV2.h5")

print("Model loaded successfully!")

In [None]:
class_labels = {
    0: "A", 1: "B", 2: "C", 3: "D", 4: "E", 5: "F", 6: "G", 7: "H",
    8: "I", 9: "J", 10: "K", 11: "L", 12: "M", 13: "N", 14: "O",
    15: "P", 16: "Q", 17: "R", 18: "S", 19: "T", 20: "U", 21: "V",
    22: "W", 23: "X", 24: "Y", 25: "Z", 26: "del", 27: "nothing", 28: "space"
}


In [None]:
IMG_SIZE = 128 
test_folder = '/Users/js/Desktop/Sign Recognition Application/Sign_to_Sentence Project/Asl_Sign_Data/asl_alphabet_test/asl_alphabet_test'
test_images = []
image_names = []

for img_name in os.listdir(test_folder):
    if img_name.endswith((".jpg", ".png", ".jpeg")):
        img_path = os.path.join(test_folder, img_name)

        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        img = img / 255.0

        test_images.append(img)
        image_names.append(img_name)

# Convert to NumPy array
test_images = np.array(test_images)

print(f"Loaded {len(test_images)} test images.")


In [None]:
# Get model predictions
predictions = model.predict(test_images)

# Convert probabilities to class labels
predicted_classes = np.argmax(predictions, axis=1)

for i, img_name in enumerate(image_names):
    pred_label = class_labels[predicted_classes[i]]
    print(f"Image: {img_name} → Predicted as: {pred_label}")


Test Accuracy

In [None]:
true_labels = [img_name.split("_")[0] for img_name in image_names]

correct = sum([1 if class_labels[predicted_classes[i]] == true_labels[i] else 0 for i in range(len(true_labels))])
accuracy = correct / len(true_labels) * 100

print(f"Test Accuracy: {accuracy:.2f}%")

Results Visualisation

In [None]:
history_dict = history.history

# Plot Accuracy Graph
plt.figure(figsize=(10, 5))
plt.plot(history_dict["accuracy"], label="Training Accuracy", marker="o", color="orange")
plt.plot(history_dict["val_accuracy"], label="Validation Accuracy", marker="o", color="red")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Model Accuracy Over Epochs")
plt.legend()
plt.grid(True)
plt.show()

# Plot Loss Graph
plt.figure(figsize=(10, 5))
plt.plot(history_dict["loss"], label="Training Loss", marker="o", color="red")
plt.plot(history_dict["val_loss"], label="Validation Loss", marker="o", color="blue")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Model Loss Over Epochs")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(true_labels, [class_labels[i] for i in predicted_classes], labels=list(class_labels.values()))

# Plot confusion matrix
plt.figure(figsize=(12, 8))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=class_labels.values(), yticklabels=class_labels.values())
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix for ASL Sign Recognition")
plt.show()

Predicting the Signs from camera feed

- The approach is to use mediapipe to find the center of the hand, make a box/ frame around it, pass the image inside of frame to the trained model.

- Since MobileNet V2 is convolutional neural network, it would not give accurate results on real life data

In [None]:
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf

# Load trained MobileNetV2 model
mobilenet_model = tf.keras.models.load_model("sign_language_model_MobileNetV2.h5")

# Correct class labels (matching `train_generator.class_indices`)
class_labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
                'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
                'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)

cap = cv2.VideoCapture(0)

#Bounding Box Adjustments
HEIGHT_EXPAND = 220
WIDTH_EXPAND = 150

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Get bounding box coordinates
            x_min = min([lm.x for lm in hand_landmarks.landmark]) * frame.shape[1]
            y_min = min([lm.y for lm in hand_landmarks.landmark]) * frame.shape[0]
            x_max = max([lm.x for lm in hand_landmarks.landmark]) * frame.shape[1]
            y_max = max([lm.y for lm in hand_landmarks.landmark]) * frame.shape[0]

            x_min = max(0, int(x_min - WIDTH_EXPAND))   
            y_min = max(0, int(y_min - HEIGHT_EXPAND)) 
            x_max = min(frame.shape[1], int(x_max + WIDTH_EXPAND))  
            y_max = min(frame.shape[0], int(y_max + HEIGHT_EXPAND)) 

            # Crop the hand region
            hand_crop = frame[y_min:y_max, x_min:x_max]

            # Resize to (128x128) for MobileNetV2
            if hand_crop.shape[0] > 0 and hand_crop.shape[1] > 0:
                hand_resized = cv2.resize(hand_crop, (128, 128))
                hand_resized = np.expand_dims(hand_resized, axis=0) / 255.0  # Normalize

                # Predict using MobileNetV2
                prediction = mobilenet_model.predict(hand_resized)
                
                # Get top 3 predictions
                top_3_indices = np.argsort(prediction[0])[-3:][::-1]
                top_3_labels = [class_labels[i] for i in top_3_indices]
                top_3_scores = [prediction[0][i] for i in top_3_indices]

                # Debugging: Print top 3 predictions
                print(f"Top 3 Predictions: {list(zip(top_3_labels, top_3_scores))}")

                # Get the best prediction
                predicted_label = top_3_labels[0]

                # Display Prediction
                cv2.putText(frame, f"Predicted: {predicted_label}", (50, 50),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                
            cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

    cv2.imshow("MobileNetV2 Sign Prediction (Fixed Bounding Box Height)", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


After testing it is seen that this model is useful and clasifies the signs correctly which has distinct unique shapes like - letter A, L, etc but fails in complex signs like - letter K, X etc.