In [None]:
# Import libraries
import numpy as np
import splitfolders
import cv2
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

# Step 1: Dataset Preprocessing and Splitting (No changes needed here unless your folder structure is different)
# --------------------------------------------
input_folder = "C:/Users/crite/Downloads/Final_Sign_Dataset"
output_folder = "preprocessed_alphaDigi_dataset"


# Step 2: Create Data Generators with Preprocessing
# ------------------------------------------------

# Training generator with more aggressive augmentation (within reason for sign language)
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.05,  # Add horizontal shift
    height_shift_range=0.05, # Add vertical shift
    zoom_range=0.05,
    shear_range=0.03,        # Add shear
    horizontal_flip=False,
    brightness_range=[0.6, 1.4], # More variation in brightness
    fill_mode='constant',
    cval=0                      # Fill with black (often better than white for CNNs)
)

# Validation and Test generators (only rescaling)
val_test_datagen = ImageDataGenerator(rescale=1./255)

# Common parameters
target_size = (128, 128)
color_mode = 'rgb'  # Experiment with RGB - might capture more subtle features
batch_size = 32
class_mode = 'categorical'

# Create generators
train_generator = train_datagen.flow_from_directory(
    os.path.join(output_folder, 'train'),
    target_size=target_size,
    color_mode=color_mode,
    batch_size=batch_size,
    class_mode=class_mode,
    shuffle=True
)

val_generator = val_test_datagen.flow_from_directory(
    os.path.join(output_folder, 'val'),
    target_size=target_size,
    color_mode=color_mode,
    batch_size=batch_size,
    class_mode=class_mode,
    shuffle=False # No need to shuffle validation data
)

test_generator = val_test_datagen.flow_from_directory(
    os.path.join(output_folder, 'test'),
    target_size=target_size,
    color_mode=color_mode,
    batch_size=batch_size,
    class_mode=class_mode,
    shuffle=False # Important for accurate evaluation
)

# Step 3: Improved Model Building
# ----------------------

print("training")
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D((2, 2)),
    BatchNormalization(),
    Dropout(0.3),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    BatchNormalization(),
    Dropout(0.3),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    BatchNormalization(),
    Dropout(0.4),

    Flatten(),
    Dense(128, activation='relu'), 
    BatchNormalization(),
    Dropout(0.5),

    Dense(train_generator.num_classes, activation='softmax')
])

# Use a smaller learning rate initially and add ReduceLROnPlateau
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Step 4: Training with Enhanced Callbacks
# ------------------------------------
early_stop = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True, verbose=1) # Increased patience
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001, verbose=1) # Reduce LR if val_loss plateaus

history = model.fit(
    train_generator,
    epochs=25, # Increased epochs - early stopping will handle overtraining
    validation_data=val_generator,
    callbacks=[early_stop, reduce_lr]
)

# Step 5: Final Evaluation
# ------------------------
print("\nTest Set Evaluation:")
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc:.2%}")

# Save the model
model.save('sign_language_alphaDigit_model.h5')

# Optional: Visualize Preprocessing
# ---------------------------------
def visualize_augmentations(generator, num_images=5):
    next(generator)
    plt.figure(figsize=(15, 5))
    for i in range(num_images):
        plt.subplot(1, num_images, i + 1)
        plt.imshow(x[i].squeeze())
        plt.title(f"Label: {list(generator.class_indices.keys())[np.argmax(y[i])]}")
        plt.axis('off')
    plt.show()

print("\nTraining samples (with augmentation):")
visualize_augmentations(train_generator)

print("\nValidation samples (no augmentation):")
visualize_augmentations(val_generator)

Found 8996 images belonging to 36 classes.
Found 2989 images belonging to 36 classes.
Found 3031 images belonging to 36 classes.
training


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/25
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m307s[0m 1s/step - accuracy: 0.5075 - loss: 1.9517 - val_accuracy: 0.3757 - val_loss: 2.2107 - learning_rate: 0.0010
Epoch 2/25
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3127s[0m 11s/step - accuracy: 0.8647 - loss: 0.4503 - val_accuracy: 0.9575 - val_loss: 0.1590 - learning_rate: 0.0010
Epoch 3/25
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 423ms/step - accuracy: 0.9200 - loss: 0.3052 - val_accuracy: 0.9913 - val_loss: 0.0488 - learning_rate: 0.0010
Epoch 4/25
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 407ms/step - accuracy: 0.9457 - loss: 0.2038 - val_accuracy: 0.9920 - val_loss: 0.0439 - learning_rate: 0.0010
Epoch 5/25
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 423ms/step - accuracy: 0.9541 - loss: 0.1698 - val_accuracy: 0.9358 - val_loss: 0.2088 - learning_rate: 0.0010
Epoch 6/25
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[



Test Accuracy: 99.74%

Training samples (with augmentation):


AttributeError: 'DirectoryIterator' object has no attribute 'next'