Introduction

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

Data preparation

In [2]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Paths
data_dir = '../dataset/asl_alphabet_train'

# Data generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,  # 20% for validation
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

# Training generator
train_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=(200, 200),
    batch_size=32,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

# Validation generator
val_generator = val_datagen.flow_from_directory(
    data_dir,
    target_size=(200, 200),
    batch_size=32,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)


Found 69600 images belonging to 29 classes.
Found 17400 images belonging to 29 classes.


Building model

In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras import Input

model = Sequential([
    # Block 1
    Input(shape=(200, 200, 3)),  # Explicit Input layer
    Conv2D(32, (5, 5), activation='relu'),
    MaxPooling2D(pool_size=(2, 2), strides=2),

    # Block 2
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2), strides=2),

    # Block 3
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2), strides=2),

    # Flatten and Dense layers
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),

    # Output layer
    Dense(29, activation='softmax')
])

model.summary()

Compile the model

In [None]:
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

Define Early Stopping Callback

In [3]:
early_stopping = EarlyStopping(
    monitor='val_accuracy',  # Monitor validation accuracy
    patience=3,              # Stop after 3 epochs without improvement
    restore_best_weights=True  # Keep the best model weights
)

Training the model:

1. Fit the model

Initial run:

In [11]:
# Fit the model to the training data with validation and early stopping
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=30,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)


Epoch 1/30
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9s/step - accuracy: 0.2288 - loss: 2.6480

[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19101s[0m 9s/step - accuracy: 0.2289 - loss: 2.6477 - val_accuracy: 0.5495 - val_loss: 1.3226
Epoch 2/30
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m762s[0m 350ms/step - accuracy: 0.6369 - loss: 1.0819 - val_accuracy: 0.6314 - val_loss: 1.0694
Epoch 3/30
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m760s[0m 350ms/step - accuracy: 0.7580 - loss: 0.7090 - val_accuracy: 0.7266 - val_loss: 0.8369
Epoch 4/30
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1251s[0m 575ms/step - accuracy: 0.8114 - loss: 0.5464 - val_accuracy: 0.7637 - val_loss: 0.7704
Epoch 5/30
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3264s[0m 2s/step - accuracy: 0.8499 - loss: 0.4441 - val_accuracy: 0.7723 - val_loss: 0.7978
Epoch 6/30
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3729s[0m 2s/step - accuracy: 0.8664 - loss: 0.3907 - val_accuracy: 0.7832 - val_loss: 0.7248
Epoch 7

Added BatchNormalization after each convolutional and dense layer to stabilize learning and improve validation accuracy.

Added L2 Regularization (0.001) to prevent overfitting by penalizing large weights.

Replaced Flatten with GlobalAveragePooling2D to drastically reduce parameters (will drop from 17M to under 500K).

Enhanced data augmentation with wider rotation range and brightness variations to improve generalization.

Added learning rate scheduler (ReduceLROnPlateau) to automatically reduce learning rate when validation loss plateaus.

Slightly reduced dropout from 0.5 to 0.4 as we're now using multiple regularization techniques.



In [4]:
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2

# Input layer
inputs = Input(shape=(200, 200, 3))

# Block 1
x = Conv2D(32, (5, 5), activation='relu', kernel_regularizer=l2(0.001))(inputs)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2), strides=2)(x)

# Block 2
x = Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.001))(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2), strides=2)(x)

# Block 3
x = Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.001))(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2), strides=2)(x)

# Global Average Pooling instead of Flatten
x = GlobalAveragePooling2D()(x)

# Dense layer with regularization
x = Dense(256, activation='relu', kernel_regularizer=l2(0.001))(x)
x = BatchNormalization()(x)
x = Dropout(0.4)(x)  # Slightly reduced dropout

# Output layer
outputs = Dense(29, activation='softmax')(x)

# Create model
model = Model(inputs, outputs)


In [5]:
# Enhanced data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=20,       # Increased rotation range
    width_shift_range=0.2,   # Increased shift range
    height_shift_range=0.2,
    zoom_range=0.2,          # Increased zoom range
    horizontal_flip=True,
    brightness_range=[0.8, 1.2]  # Add brightness variation
)

# Learning rate scheduler
from tensorflow.keras.callbacks import ReduceLROnPlateau
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=2,
    min_lr=0.00001
)
model.summary()

In [None]:
# Compile with the same optimizer but add metrics
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train with both callbacks
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=50,  # Increased epochs since we have early stopping
    callbacks=[
        early_stopping,
        reduce_lr
    ],
    verbose=1
)

  self._warn_if_super_not_called()


Epoch 1/50
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4s/step - accuracy: 0.3273 - loss: 2.6823

  self._warn_if_super_not_called()


[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8726s[0m 4s/step - accuracy: 0.3274 - loss: 2.6819 - val_accuracy: 0.4804 - val_loss: 2.0926 - learning_rate: 0.0010
Epoch 2/50
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2700s[0m 1s/step - accuracy: 0.8114 - loss: 0.8406 - val_accuracy: 0.5552 - val_loss: 2.1284 - learning_rate: 0.0010
Epoch 3/50
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2429s[0m 1s/step - accuracy: 0.8923 - loss: 0.5699 - val_accuracy: 0.5525 - val_loss: 2.1868 - learning_rate: 0.0010
Epoch 4/50
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1441s[0m 662ms/step - accuracy: 0.9605 - loss: 0.3628 - val_accuracy: 0.8349 - val_loss: 0.8209 - learning_rate: 2.0000e-04
Epoch 5/50
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2707s[0m 1s/step - accuracy: 0.9744 - loss: 0.2758 - val_accuracy: 0.8337 - val_loss: 0.8827 - learning_rate: 2.0000e-04
Epoch 6/50
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━