CNN for static classification

In [None]:
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Path to your training and validation datasets
train_data_dir = 'E:\Projects\Sign Language Project\SignSpeak\data/raw'
val_data_dir = 'E:\Projects\Sign Language Project\SignSpeak\data\ASL/train_reduced10'

# Use ImageDataGenerator for loading and augmenting the data
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

# Create generators for training and validation sets
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(300, 300),
    color_mode='grayscale',  # Since we're working with grayscale images
    batch_size=32,
    class_mode='categorical'  # Assuming multiple classes (signs/gestures)
)

val_generator = val_datagen.flow_from_directory(
    val_data_dir,
    target_size=(300, 300),
    color_mode='grayscale',
    batch_size=32,
    class_mode='categorical'
)

num_classes = len(train_generator.class_indices)

In [None]:

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization

# Define input shape based on 300x300 grayscale images (1 channel)
input_shape = (300, 300, 1)

# Create CNN model
model = Sequential([
    # Convolutional layers with MaxPooling and BatchNormalization
    Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
    MaxPooling2D(pool_size=(2, 2)),
    BatchNormalization(),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    BatchNormalization(),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    BatchNormalization(),

    # Flatten the output from Conv layers
    Flatten(),

    # Fully connected layer
    Dense(128, activation='relu'),
    Dropout(0.5),

    # Output layer with softmax for classification
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy',
              metrics=['accuracy'])

# Summary of the model
model.summary()

Model training

In [None]:
# Train the model with training data, and validate with validation data
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=val_generator,
    validation_steps=val_generator.samples // val_generator.batch_size,
    epochs=20  # Adjust based on performance
)

Evaluation

In [None]:
# Evaluate the model on test data
test_generator = val_datagen.flow_from_directory(
    'test_images',
    target_size=(300, 300),
    color_mode='grayscale',
    batch_size=32,
    class_mode='categorical'
)

model.save('sign_language_model.h5')

# Evaluate the model
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc}")

CNN-LSTM for Gesture Sequences

In [None]:
# from tensorflow.keras.layers import TimeDistributed, LSTM

# # Sequential model with CNN + LSTM layers for gesture sequence recognition
# model = Sequential([
#     # TimeDistributed CNN layers
#     TimeDistributed(Conv2D(32, (3, 3), activation='relu'),
#                     input_shape=(sequence_length, 300, 300, 1)),
#     TimeDistributed(MaxPooling2D(pool_size=(2, 2))),
#     TimeDistributed(Flatten()),

#     # LSTM layer
#     LSTM(128, return_sequences=False),

#     # Fully connected layer
#     Dense(128, activation='relu'),

#     # Output layer for classification
#     Dense(num_classes, activation='softmax')
# ])

# model.compile(optimizer='adam', loss='categorical_crossentropy',
#               metrics=['accuracy'])

# model.summary()

# # Save the model
# # model.save('sign_language_model.h5')