Image preprocessing

In [None]:
import tensorflow
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Set the directories for training and validation data
# train_dir = 'data/train'
train_dir = 'E:/Projects/Sign Language Project/SignSpeak/data/raw'
# val_dir = 'data/val'
# val_dir = 'E:/Projects/Sign Language Project/SignSpeak/data/raw/test'
val_dir = 'E:/Projects/Sign Language Project/SignSpeak/data/ASL/train_reduced10'

# Image size
# IMAGE_SIZE = (64, 64)  # Adjust based on the input requirements of your model
IMAGE_SIZE = (200, 200)  # Adjust based on the input requirements of your model
BATCH_SIZE = 32
NUM_FRAMES = 30

# Create an ImageDataGenerator for preprocessing images
train_datagen = ImageDataGenerator(
    rescale=1.0/255,  # Normalize the pixel values to [0, 1]
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1.0/255)

# Load the images in batches directly from the directory
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

# Get the number of classes
num_classes = len(train_generator.class_indices)

print(f"Number of classes: {num_classes}")

Model design and image sequence generator

In [None]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, LSTM, Dense, TimeDistributed, Dropout

# Define CNN-LSTM model
def build_cnn_lstm_model(input_shape, num_classes):
    model = Sequential()

    # TimeDistributed CNN to process image sequences
    model.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu'), input_shape=input_shape))
    model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
    model.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu')))
    model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
    model.add(TimeDistributed(Conv2D(128, (3, 3), activation='relu')))
    model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
    model.add(TimeDistributed(Flatten()))

    # LSTM layer to capture temporal dependencies
    model.add(LSTM(64, return_sequences=False))

    # Dense layers
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))

    # Compile the model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model


# # Define the necessary variables
# NUM_FRAMES = 30  # Example value, set according to your dataset
# IMAGE_SIZE = (64, 64)  # Example value, set according to your dataset
# num_classes = 10  # Example value, set according to your dataset


def image_sequence_generator(directory, batch_size, target_size, num_frames):
    """
    Custom data generator to yield batches of image sequences.
    """
    class_folders = os.listdir(directory)
    num_classes = len(class_folders)

    while True:
        X_batch = []
        y_batch = []

        for _ in range(batch_size):
            # Randomly select a class folder
            class_folder = np.random.choice(class_folders)
            class_index = class_folders.index(class_folder)
            class_path = os.path.join(directory, class_folder)

            # Randomly select sequence of images
            image_files = os.listdir(class_path)
            selected_images = np.random.choice(
                image_files, num_frames, replace=False)

            # Load and preprocess each image in the sequence
            image_sequence = []
            for image_file in selected_images:
                image_path = os.path.join(class_path, image_file)
                image = load_img(image_path, target_size=target_size)
                image = img_to_array(image) / 255.0  # Normalize pixel values
                image_sequence.append(image)

            # Stack sequence and add to batch
            X_batch.append(np.stack(image_sequence))
            y_batch.append(class_index)

        # Convert batches to numpy arrays
        X_batch = np.array(X_batch)
        y_batch = np.array(y_batch)

        # Convert labels to categorical
        y_batch = tf.keras.utils.to_categorical(
            y_batch, num_classes=num_classes)

        yield X_batch, y_batch


# Parameters
num_frames = 10  # Number of frames in each sequence
batch_size = 32
target_size = IMAGE_SIZE  # Same as used earlier (e.g., (64, 64))

# Create generators for training and validation
train_seq_generator = image_sequence_generator(
    train_dir, batch_size, target_size, num_frames)
val_seq_generator = image_sequence_generator(
    val_dir, batch_size, target_size, num_frames)


# Define input shape: (num_frames, image_height, image_width, num_channels)
input_shape = (NUM_FRAMES, IMAGE_SIZE[0], IMAGE_SIZE[1], 3)  # 3 channels (RGB)

# Build the model
model = build_cnn_lstm_model(input_shape, num_classes)

# Model summary
model.summary()

In [None]:
# Training the CNN-LSTM model
history = model.fit(
    train_seq_generator,
    validation_data=val_seq_generator,
    epochs=30,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    validation_steps=val_generator.samples // BATCH_SIZE
)

# Save the model after training
model.save('models/sign_language_cnn_lstm.h5')

In [None]:
# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(
    val_seq_generator, steps=val_generator.samples // BATCH_SIZE)

print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")