In [3]:
import os
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model


In [8]:
import os
import shutil
import random

# Define dataset path
dataset_path = './Dataset/'  # Replace with actual dataset path
train_dir = './train'
test_dir = './test'

# Create train and test directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Loop through each subdirectory (class label)
for class_name in os.listdir(dataset_path):
    class_path = os.path.join(dataset_path, class_name)
    
    if os.path.isdir(class_path):  # Check if it's a directory
        # Create corresponding class folders in train and test directories
        train_class_dir = os.path.join(train_dir, class_name)
        test_class_dir = os.path.join(test_dir, class_name)
        
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(test_class_dir, exist_ok=True)
        
        # Get all image filenames inside the class directory
        images = [f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
        
        # Shuffle images to ensure randomness
        random.shuffle(images)
        
        # Split index (80% training, 20% testing)
        split_index = int(len(images) * 0.8)
        
        train_images = images[:split_index]
        test_images = images[split_index:]
        
        # Move images to respective train/test subdirectories
        for image in train_images:
            shutil.move(os.path.join(class_path, image), os.path.join(train_class_dir, image))
        
        for image in test_images:
            shutil.move(os.path.join(class_path, image), os.path.join(test_class_dir, image))
        
        print(f"Class '{class_name}': {len(train_images)} images for training, {len(test_images)} images for testing.")

print("Dataset split complete!")


Class 'I': 2400 images for training, 600 images for testing.
Class 'G': 2400 images for training, 600 images for testing.
Class 'A': 2400 images for training, 600 images for testing.
Class 'F': 2400 images for training, 600 images for testing.
Class 'H': 2400 images for training, 600 images for testing.
Class 'J': 2400 images for training, 600 images for testing.
Class 'C': 2400 images for training, 600 images for testing.
Class 'D': 2400 images for training, 600 images for testing.
Class 'E': 2400 images for training, 600 images for testing.
Class 'B': 2400 images for training, 600 images for testing.
Dataset split complete!


In [10]:
# Define required constants
IMG_SIZE = (224, 224)  # Adjust size according to your model's requirements
BATCH_SIZE = 32  # Adjust batch size as needed

# Data augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest"
)

# Rescaling for testing (no augmentation)
test_datagen = ImageDataGenerator(rescale=1.0 / 255)

# Load dataset
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

# Get class names
class_names = list(train_generator.class_indices.keys())
print("Classes:", class_names)


Found 24000 images belonging to 10 classes.
Found 6000 images belonging to 10 classes.
Classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']


In [11]:
# Load pre-trained MobileNetV2
base_model = MobileNetV2(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Freeze base model layers
for layer in base_model.layers:
    layer.trainable = False

# Add new layers for classification
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation="relu")(x)
x = Dropout(0.3)(x)
x = Dense(len(class_names), activation="softmax")(x)

# Define final model
model = Model(inputs=base_model.input, outputs=x)

# Compile the model (Use legacy Adam optimizer for M1/M2 Macs)
model.compile(
    optimizer=tf.keras.optimizers.legacy.Adam(),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

# Show model summary
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 Conv1 (Conv2D)              (None, 112, 112, 32)         864       ['input_1[0][0]']             
                                                                                                  
 bn_Conv1 (BatchNormalizati  (None, 112, 112, 32)         128       ['Conv1[0][0]']               
 on)                                                                                              
                                                                                                  
 Conv1_relu (ReLU)           (None, 112, 112, 32)         0         ['bn_Conv1[0][0]']        

 block_3_expand_relu (ReLU)  (None, 56, 56, 144)          0         ['block_3_expand_BN[0][0]']   
                                                                                                  
 block_3_pad (ZeroPadding2D  (None, 57, 57, 144)          0         ['block_3_expand_relu[0][0]'] 
 )                                                                                                
                                                                                                  
 block_3_depthwise (Depthwi  (None, 28, 28, 144)          1296      ['block_3_pad[0][0]']         
 seConv2D)                                                                                        
                                                                                                  
 block_3_depthwise_BN (Batc  (None, 28, 28, 144)          576       ['block_3_depthwise[0][0]']   
 hNormalization)                                                                                  
          

 block_6_depthwise_BN (Batc  (None, 14, 14, 192)          768       ['block_6_depthwise[0][0]']   
 hNormalization)                                                                                  
                                                                                                  
 block_6_depthwise_relu (Re  (None, 14, 14, 192)          0         ['block_6_depthwise_BN[0][0]']
 LU)                                                                                              
                                                                                                  
 block_6_project (Conv2D)    (None, 14, 14, 64)           12288     ['block_6_depthwise_relu[0][0]
                                                                    ']                            
                                                                                                  
 block_6_project_BN (BatchN  (None, 14, 14, 64)           256       ['block_6_project[0][0]']     
 ormalizat

                                                                                                  
 block_9_add (Add)           (None, 14, 14, 64)           0         ['block_8_add[0][0]',         
                                                                     'block_9_project_BN[0][0]']  
                                                                                                  
 block_10_expand (Conv2D)    (None, 14, 14, 384)          24576     ['block_9_add[0][0]']         
                                                                                                  
 block_10_expand_BN (BatchN  (None, 14, 14, 384)          1536      ['block_10_expand[0][0]']     
 ormalization)                                                                                    
                                                                                                  
 block_10_expand_relu (ReLU  (None, 14, 14, 384)          0         ['block_10_expand_BN[0][0]']  
 )        

                                                                                                  
 block_13_expand_relu (ReLU  (None, 14, 14, 576)          0         ['block_13_expand_BN[0][0]']  
 )                                                                                                
                                                                                                  
 block_13_pad (ZeroPadding2  (None, 15, 15, 576)          0         ['block_13_expand_relu[0][0]']
 D)                                                                                               
                                                                                                  
 block_13_depthwise (Depthw  (None, 7, 7, 576)            5184      ['block_13_pad[0][0]']        
 iseConv2D)                                                                                       
                                                                                                  
 block_13_

 iseConv2D)                                                                                       
                                                                                                  
 block_16_depthwise_BN (Bat  (None, 7, 7, 960)            3840      ['block_16_depthwise[0][0]']  
 chNormalization)                                                                                 
                                                                                                  
 block_16_depthwise_relu (R  (None, 7, 7, 960)            0         ['block_16_depthwise_BN[0][0]'
 eLU)                                                               ]                             
                                                                                                  
 block_16_project (Conv2D)   (None, 7, 7, 320)            307200    ['block_16_depthwise_relu[0][0
                                                                    ]']                           
          

In [12]:
EPOCHS = 10

history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=test_generator
)

# Save the trained model
model.save("sign_language_mobilenet.h5")
print("Model saved successfully!")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(


Model saved successfully!


In [13]:
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc * 100:.2f}%")


Test Accuracy: 98.25%


In [15]:
import cv2
import numpy as np
import tensorflow as tf
import mediapipe as mp
import time

# Load trained model
model = tf.keras.models.load_model("sign_language_mobilenet.h5")

# Define class names (Update according to your dataset)
class_names = ["A", "B", "C", "D", "E","F","G","H","I","J"]

# Initialize Mediapipe hand tracking
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)

# Start webcam feed
cap = cv2.VideoCapture(1, cv2.CAP_AVFOUNDATION)

# Allow webcam to warm up
time.sleep(2)

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame.")
        break

    # Flip the frame for natural view
    frame = cv2.flip(frame, 1)

    # Convert BGR to RGB for Mediapipe
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Process frame with Mediapipe Hands
    results = hands.process(rgb_frame)

    # If hands are detected
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Draw hand landmarks
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            
            # Get bounding box around hand
            h, w, _ = frame.shape
            x_min = w
            y_min = h
            x_max = 0
            y_max = 0

            for lm in hand_landmarks.landmark:
                x, y = int(lm.x * w), int(lm.y * h)
                x_min = min(x_min, x)
                y_min = min(y_min, y)
                x_max = max(x_max, x)
                y_max = max(y_max, y)

            # Expand box slightly
            padding = 20
            x_min = max(0, x_min - padding)
            y_min = max(0, y_min - padding)
            x_max = min(w, x_max + padding)
            y_max = min(h, y_max + padding)

            # Extract hand region
            hand_img = frame[y_min:y_max, x_min:x_max]

            if hand_img.shape[0] > 0 and hand_img.shape[1] > 0:
                # Resize and preprocess hand image
                hand_img = cv2.resize(hand_img, (224, 224))
                hand_img = np.expand_dims(hand_img, axis=0) / 255.0

                # Predict sign language gesture
                prediction = model.predict(hand_img)
                predicted_class = class_names[np.argmax(prediction)]
                confidence = np.max(prediction) * 100

                # Draw bounding box
                cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

                # Add label with background
                label = f"{predicted_class} ({confidence:.2f}%)"
                label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2)
                label_x = x_min
                label_y = y_min - 10 if y_min - 10 > 10 else y_min + 20

                cv2.rectangle(frame, (label_x, label_y - label_size[1] - 5),
                              (label_x + label_size[0], label_y + 5), (0, 255, 0), -1)
                cv2.putText(frame, label, (label_x, label_y),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)

    # Display results
    cv2.imshow("Sign Language Detection", frame)

    # Press 'q' to quit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release camera & close windows
cap.release()
cv2.destroyAllWindows()


I0000 00:00:1741661100.380455  246605 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M2
W0000 00:00:1741661100.405980  327510 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1741661100.415481  327515 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


























KeyboardInterrupt: 