In [8]:
import cv2 as cv
from cvzone.HandTrackingModule import HandDetector
import numpy as np
import math
import time
import os
import random

# Initialize Webcam
cap = cv.VideoCapture(0)
cap.set(3, 640)  # Set width
cap.set(4, 480)  # Set height

# Initialize Hand Detector
detector = HandDetector(maxHands=1)


# Image Processing Variables
offset = 20
imgSize = 300
counter = 0

# Define Hand Signs and Folder Structure
sign_labels = ["Hello", "ThankYou", "Yes", "No", "Please"]  # Add more as needed
current_sign_index = 0  # Track which sign we are collecting
folder_base = "C:/Users/vithustennysan/Desktop/Sign Language/inputs"

# Create folders for each sign
for label in sign_labels:
    os.makedirs(os.path.join(folder_base, label), exist_ok=True)

print(f"Collecting images for sign: {sign_labels[current_sign_index]}")
while True:
    success, img = cap.read()
    if not success:
        print("Error: Could not access the webcam!")
        break


    # img = cv.flip(img, 1)  # Flip image horizontally for easier viewing
    hands, img = detector.findHands(img)

    if hands:
        hand = hands[0]
        x, y, w, h = hand["bbox"]

        imgWhite = np.ones((imgSize, imgSize, 3), np.uint8) * 255

        # Clamp the crop coordinates
        h_img, w_img, c_img = img.shape
        y1, y2 = max(0, y - offset), min(h_img, y + h + offset)
        x1, x2 = max(0, x - offset), min(w_img, x + w + offset)

        imgCrop = img[y1:y2, x1:x2]

        if imgCrop.size == 0:
            print("Error: Cropped image is empty!")
            continue

        aspectRatio = h / w

        if aspectRatio > 1:
            # Height > Width (Tall Image)
            k = imgSize / h
            wCal = math.ceil(k * w)
            imgResize = cv.resize(imgCrop, (wCal, imgSize))
            wGap = math.ceil((imgSize - wCal) / 2)
            imgWhite[:, wGap:wGap+imgResize.shape[1]] = imgResize
        else:
            # Width > Height (Wide Image)
            k = imgSize / w
            hCal = math.ceil(k * h)
            imgResize = cv.resize(imgCrop, (imgSize, hCal))
            hGap = math.ceil((imgSize - hCal) / 2)
            imgWhite[hGap:hGap+imgResize.shape[0], :] = imgResize

        # Data Augmentation
        imgFlipped = cv.flip(imgWhite, 1)  # Flip image horizontally
        brightness = random.randint(-30, 30)  # Random brightness adjustment
        imgBright = cv.convertScaleAbs(imgWhite, alpha=1, beta=brightness)

        # Display Images
        cv.imshow("ImageCrop", imgCrop)
        cv.imshow("ImageWhite", imgWhite)
        cv.imshow("Augmented Flip", imgFlipped)
        cv.imshow("Augmented Bright", imgBright)

    # Display Current Sign Label
    cv.putText(img, f"Sign: {sign_labels[current_sign_index]}", (20, 50), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv.LINE_AA)
    cv.putText(img, f"Images: {counter}", (20, 90), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2, cv.LINE_AA)
    
    cv.imshow("Image", img)

    key = cv.waitKey(1)

    if key == ord('s'):  # Save Image
        counter += 1
        timestamp = time.strftime("%Y%m%d-%H%M%S")  # Format: YYYYMMDD-HHMMSS
        save_folder = os.path.join(folder_base, sign_labels[current_sign_index])
        
        filename1 = os.path.join(save_folder, f"Image_{timestamp}.jpg")
        filename2 = os.path.join(save_folder, f"Image_{timestamp}_flip.jpg")
        filename3 = os.path.join(save_folder, f"Image_{timestamp}_bright.jpg")

        cv.imwrite(filename1, imgWhite)
        cv.imwrite(filename2, imgFlipped)
        cv.imwrite(filename3, imgBright)

        print(f"Images saved: {filename1}, {filename2}, {filename3}")

    if key == ord('n'):  # Switch to Next Sign
        current_sign_index = (current_sign_index + 1) % len(sign_labels)
        counter = 0
        print(f"Switched to sign: {sign_labels[current_sign_index]}")

    if key == ord('q'):  # Quit
        break

cap.release()
cv.destroyAllWindows()

Collecting images for sign: Hello


In [10]:
import tensorflow as tf

# Define dataset path
dataset_path = "C:/Users/vithustennysan/Desktop/Sign Language/inputs"

# Preprocess images
img_height, img_width = 300, 300
batch_size = 32

# Automatically label images based on folder names
datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_data = datagen.flow_from_directory(
    dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

val_data = datagen.flow_from_directory(
    dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

# Print class labels
print("Labels:", train_data.class_indices)


Found 364 images belonging to 5 classes.
Found 89 images belonging to 5 classes.
Labels: {'Hello': 0, 'No': 1, 'Please': 2, 'ThankYou': 3, 'Yes': 4}


mobilenet

In [None]:
import tensorflow as tf

# Define dataset path
dataset_path = "C:/Users/vithustennysan/Desktop/Sign Language/inputs"

# Define input size (MobileNet requires 224x224)
img_height, img_width = 224, 224  
batch_size = 32

# Data Augmentation for better generalization
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # 80% training, 20% validation
)

# Load training and validation datasets
train_data = datagen.flow_from_directory(
    dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

val_data = datagen.flow_from_directory(
    dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

# Print class labels
print("Labels:", train_data.class_indices)


In [None]:
# Load MobileNetV2 with pre-trained weights and remove the top classification layer
base_model = tf.keras.applications.MobileNetV2(
    input_shape=(img_height, img_width, 3),
    include_top=False,  # Remove final classification layers
    weights='imagenet'  # Load pretrained weights
)

# Freeze the base model (so we don’t lose pre-trained features)
base_model.trainable = False


In [None]:

# Define the model
model = tf.keras.models.Sequential([
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),  # Reduce feature maps to a vector
    tf.keras.layers.Dense(128, activation='relu'),  # Fully connected layer
    tf.keras.layers.Dropout(0.5),  # Reduce overfitting
    tf.keras.layers.Dense(len(train_data.class_indices), activation='softmax')  # Output layer
])

# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Summary of the model
model.summary()


In [None]:

# Define callbacks
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)

# Train the model
history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=20,  # You can increase this
    callbacks=[early_stop, reduce_lr]
)

model.save("sign_language_mobilenet.h5")


In [None]:
import cv2 as cv
import numpy as np
import tensorflow as tf

# Load trained model
model = tf.keras.models.load_model("sign_language_mobilenet.h5")

# Define class labels
class_labels = {0: "Hello", 1: "No", 2: "Please", 3: "ThankYou", 4: "Yes"}

# Define image size (should match training size)
img_size = 224


In [None]:
from cvzone.HandTrackingModule import HandDetector

# Initialize webcam and hand detector
cap = cv.VideoCapture(0)  
detector = HandDetector(maxHands=2)  

offset = 20
imgSize = 224  # Match with MobileNetV2

while True:
    success, img = cap.read()
    if not success:
        print("Error: Could not access the webcam!")
        break

    img = cv.flip(img, 1)  # Mirror the image (fixes left/right hand issue)
    hands, img = detector.findHands(img)

    if hands:
        hand = hands[0]
        x, y, w, h = hand["bbox"]

        imgWhite = np.ones((imgSize, imgSize, 3), np.uint8) * 255

        # Crop and resize hand region
        y1, y2 = max(0, y - offset), min(img.shape[0], y + h + offset)
        x1, x2 = max(0, x - offset), min(img.shape[1], x + w + offset)
        imgCrop = img[y1:y2, x1:x2]

        if imgCrop.size == 0:
            continue

        aspectRatio = h / w

        if aspectRatio > 1:
            k = imgSize / h
            wCal = int(k * w)
            imgResize = cv.resize(imgCrop, (wCal, imgSize))
            wGap = (imgSize - wCal) // 2
            imgWhite[:, wGap:wGap + wCal] = imgResize
        else:
            k = imgSize / w
            hCal = int(k * h)
            imgResize = cv.resize(imgCrop, (imgSize, hCal))
            hGap = (imgSize - hCal) // 2
            imgWhite[hGap:hGap + hCal, :] = imgResize

        # Preprocess for prediction
        imgArray = imgWhite.astype("float32") / 255.0  # Normalize
        imgArray = np.expand_dims(imgArray, axis=0)  # Add batch dimension

        # Predict the sign language
        predictions = model.predict(imgArray)
        predicted_class = np.argmax(predictions)
        sign_text = class_labels[predicted_class]

        # Display prediction on the webcam
        cv.putText(img, sign_text, (x1, y1 - 10), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    cv.imshow("Sign Language Detection", img)
    if cv.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv.destroyAllWindows()


xxxxxxxxxxxxx

In [11]:
import tensorflow as tf

# Define CNN Model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(img_height, img_width, 3)),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),

    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),

    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),  # Prevent overfitting
    tf.keras.layers.Dense(len(train_data.class_indices), activation='softmax')  # Output layer
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()


In [12]:
# Train the model
epochs = 20  # Adjust based on accuracy
history = model.fit(train_data, validation_data=val_data, epochs=epochs)

# Save the trained model
model.save("sign_language_model.h5")

Epoch 1/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 1s/step - accuracy: 0.4302 - loss: 1.9101 - val_accuracy: 0.8090 - val_loss: 0.6970
Epoch 2/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.9106 - loss: 0.2086 - val_accuracy: 0.8427 - val_loss: 0.4990
Epoch 3/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - accuracy: 0.9809 - loss: 0.0746 - val_accuracy: 0.9775 - val_loss: 0.0890
Epoch 4/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - accuracy: 1.0000 - loss: 0.0025 - val_accuracy: 0.9775 - val_loss: 0.0940
Epoch 5/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - accuracy: 1.0000 - loss: 0.0020 - val_accuracy: 0.9551 - val_loss: 0.1208
Epoch 6/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - accuracy: 0.9962 - loss: 0.0125 - val_accuracy: 0.9326 - val_loss: 0.1686
Epoch 7/20
[1m12/12[0m [32m━━━━━━━━━━



In [9]:
import cv2 as cv
import numpy as np
import tensorflow as tf

# Load trained model
model = tf.keras.models.load_model("sign_language_model.h5")

# Load class labels
labels = list(train_data.class_indices.keys())

# Initialize webcam
cap = cv.VideoCapture(0)
detector = HandDetector(maxHands=1)
imgSize = 300
offset = 20

while True:
    success, img = cap.read()
    if not success:
        print("Error: Could not access webcam")
        break

    img = cv.flip(img, 1)  # Flip image horizontally for easier viewing

    hands, img = detector.findHands(img)
    if hands:
        hand = hands[0]
        x, y, w, h = hand["bbox"]

        imgWhite = np.ones((imgSize, imgSize, 3), np.uint8) * 255

        y1, y2 = max(0, y - offset), min(img.shape[0], y + h + offset)
        x1, x2 = max(0, x - offset), min(img.shape[1], x + w + offset)
        imgCrop = img[y1:y2, x1:x2]

        if imgCrop.size != 0:
            imgResize = cv.resize(imgCrop, (imgSize, imgSize))
            imgResize = imgResize / 255.0  # Normalize pixel values
            imgResize = np.expand_dims(imgResize, axis=0)

            prediction = model.predict(imgResize)
            index = np.argmax(prediction)
            sign_label = labels[index]

            # Display prediction
            cv.putText(img, sign_label, (x, y - 10), cv.FONT_HERSHEY_SIMPLEX, 
                       1, (0, 255, 0), 2, cv.LINE_AA)

    cv.imshow("Sign Language Detection", img)

    key = cv.waitKey(1)
    if key == ord('q'):
        break

cap.release()
cv.destroyAllWindows()




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6