In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, CSVLogger
import cv2
import numpy as np
import os
import warnings

# Suppress specific warnings
warnings.filterwarnings('ignore', category=UserWarning, module='keras')

# Parameters
img_height, img_width = 128, 128
batch_size = 64

# Directory to save the model
save_dir = 'models'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

# Data augmentation and normalization
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,  # 20% of the data for validation
    rotation_range=15,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_data = datagen.flow_from_directory(
    'asl_alphabet_train',  # Update this path
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

val_data = datagen.flow_from_directory(
    'asl_alphabet_train',  # Update this path
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

# Verify the class indices
class_indices = train_data.class_indices
labels = {v: k for k, v in class_indices.items()}
print("Class indices:", class_indices)

# Print the number of classes
num_classes = train_data.num_classes
print("Number of classes:", num_classes)

# Verify the shape of the training data
data_batch, labels_batch = next(train_data)
print(f'Data batch shape: {data_batch.shape}')
print(f'Labels batch shape: {labels_batch.shape}')

# Model definition using Functional API
input_tensor = Input(shape=(img_height, img_width, 3))
base_model = EfficientNetB0(input_tensor=input_tensor, include_top=False, weights='imagenet')

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
output_tensor = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=input_tensor, outputs=output_tensor)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

# Print the model summary
model.summary()

Found 178472 images belonging to 29 classes.
Found 44602 images belonging to 29 classes.
Class indices: {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9, 'K': 10, 'L': 11, 'M': 12, 'N': 13, 'O': 14, 'P': 15, 'Q': 16, 'R': 17, 'S': 18, 'T': 19, 'U': 20, 'V': 21, 'W': 22, 'X': 23, 'Y': 24, 'Z': 25, 'del': 26, 'nothing': 27, 'space': 28}
Number of classes: 29
Data batch shape: (64, 128, 128, 3)
Labels batch shape: (64, 29)


In [None]:

# Callbacks for training with increased verbosity
early_stopping = EarlyStopping(patience=10, restore_best_weights=True, verbose=1)
reduce_lr = ReduceLROnPlateau(patience=5, factor=0.2, verbose=1)
checkpoint = ModelCheckpoint(os.path.join(save_dir, 'best_model.keras'), save_best_only=True, verbose=1)
csv_logger = CSVLogger(os.path.join(save_dir, 'training_log.csv'), append=True)

callbacks = [early_stopping, reduce_lr, checkpoint, csv_logger]

# Training the model with verbose output
history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=50,
    callbacks=callbacks,
    verbose=1
)

# Save the model manually if needed
model.save(os.path.join(save_dir, 'best_model.keras'))

In [None]:
# Load the saved model
model = tf.keras.models.load_model(os.path.join(save_dir, 'best_model.keras'))

# Callbacks for training with increased verbosity
early_stopping = EarlyStopping(patience=10, restore_best_weights=True, verbose=1)
reduce_lr = ReduceLROnPlateau(patience=5, factor=0.2, verbose=1)
checkpoint = ModelCheckpoint(os.path.join(save_dir, 'best_model.keras'), save_best_only=True, verbose=1)
csv_logger = CSVLogger(os.path.join(save_dir, 'training_log.csv'), append=True)

callbacks = [early_stopping, reduce_lr, checkpoint, csv_logger]

# Continue training with additional epochs
additional_epochs = 20  # Define how many more epochs you want to train

# Continue training the model
history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=additional_epochs,
    callbacks=callbacks,
    verbose=1
)

# Save the model manually if needed
model.save(os.path.join(save_dir, 'best_model.keras'))


Epoch 1/20
[1m2789/2789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4s/step - accuracy: 0.9951 - loss: 0.0156
Epoch 1: val_loss improved from inf to 6.79619, saving model to models\best_model.keras
[1m2789/2789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10422s[0m 4s/step - accuracy: 0.9951 - loss: 0.0156 - val_accuracy: 0.0282 - val_loss: 6.7962 - learning_rate: 1.0000e-04
Epoch 2/20
[1m2789/2789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14s/step - accuracy: 0.9954 - loss: 0.0153 
Epoch 2: val_loss improved from 6.79619 to 4.27340, saving model to models\best_model.keras
[1m2789/2789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40704s[0m 15s/step - accuracy: 0.9954 - loss: 0.0153 - val_accuracy: 0.1007 - val_loss: 4.2734 - learning_rate: 1.0000e-04
Epoch 3/20
[1m2789/2789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.9959 - loss: 0.0130
Epoch 3: val_loss improved from 4.27340 to 1.41525, saving model to models\best_model.

In [8]:
# Load the best saved model
model = tf.keras.models.load_model(os.path.join(save_dir, 'best_model.keras'))  # Ensure this path is correct

# Function to predict sign language gesture
def predict_sign(image):
    image = cv2.resize(image, (img_height, img_width))
    image = np.expand_dims(image, axis=0) / 255.0
    prediction = model.predict(image)
    predicted_class = np.argmax(prediction, axis=1)
    return labels[predicted_class[0]]

# Example usage
image_path = 'asl_alphabet_test/K_test.jpg'  # Update this path
image = cv2.imread(image_path)
predicted_label = predict_sign(image)
print(f'Predicted label: {predicted_label}')

# Real-time video capture and prediction
# cap = cv2.VideoCapture(0)

# while True:
#     ret, frame = cap.read()
#     if not ret:
#         break
    
#     predicted_label = predict_sign(frame)
#     cv2.putText(frame, f'Prediction: {predicted_label}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
    
#     cv2.imshow('Sign Language Recognition', frame)
    
#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         break

# cap.release()
# cv2.destroyAllWindows()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
Predicted label: K


In [None]:
# Save the model as TensorFlow Lite model
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the TensorFlow Lite model
with open(os.path.join(save_dir, 'model.tflite'), 'wb') as f:
    f.write(tflite_model)