In [2]:
!pip install mediapipe tensorflow opencv-python pandas

Collecting mediapipe
  Downloading mediapipe-0.10.21-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.6-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.21-cp310-cp310-manylinux_2_28_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m51.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading protobuf-4.25.6-cp37-abi3-manylinux2014_x86_64.whl (294 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.6/294.6 kB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: protobuf, sounddevice, mediapipe
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.20.3
    Uninstalling prot

In [3]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

# Define dataset path (adjust based on Kaggle dataset mount location)
DATASET_PATH = "/kaggle/input/aslamerican-sign-language-aplhabet-dataset/ASL_Alphabet_Dataset/asl_alphabet_train"

# Image parameters
IMG_SIZE = 64  # Resize images to 64x64
BATCH_SIZE = 32

# Data augmentation & normalization
datagen = ImageDataGenerator(
    rescale=1./255,    # Normalize pixel values
    validation_split=0.2  # 20% for validation
)

# Load training data
train_data = datagen.flow_from_directory(
    DATASET_PATH,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'
)

# Load validation data
val_data = datagen.flow_from_directory(
    DATASET_PATH,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'
)

# Get class names
class_names = list(train_data.class_indices.keys())
print("Classes:", class_names)


Found 178472 images belonging to 29 classes.
Found 44602 images belonging to 29 classes.
Classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']


In [9]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Flatten, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# Define dataset path (adjust based on Kaggle dataset location)
DATASET_PATH = "/kaggle/input/aslamerican-sign-language-aplhabet-dataset/ASL_Alphabet_Dataset/asl_alphabet_train"

# Image parameters
IMG_SIZE = 64  # Resize images
BATCH_SIZE = 32

# Define Image Data Generator with train-validation split
datagen = ImageDataGenerator(
    rescale=1./255,  # Normalize pixel values
    validation_split=0.2,  # 20% for validation
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Load training data
train_generator = datagen.flow_from_directory(
    DATASET_PATH,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'
)

# Load validation data
val_generator = datagen.flow_from_directory(
    DATASET_PATH,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'
)

# Get class names
class_names = list(train_generator.class_indices.keys())
print("Classes:", class_names)

# Load MobileNetV2 as base model
base_model = MobileNetV2(input_shape=(IMG_SIZE, IMG_SIZE, 3), include_top=False, weights="imagenet")
base_model.trainable = False  # Freeze base model layers

# Add classification layers
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Reduce dimensions
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
output_layer = Dense(len(class_names), activation='softmax')(x)  # Output layer for ASL classes

# Define the final model
model = Model(inputs=base_model.input, outputs=output_layer)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Define Callbacks
early_stopping = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)
checkpoint = ModelCheckpoint("best_model.keras", monitor="val_accuracy", save_best_only=True, mode="max")

# Train the model
history = model.fit(
    train_generator,
    epochs=10,  # Reduce to 10 epochs to save time
    validation_data=val_generator,
    callbacks=[early_stopping, reduce_lr, checkpoint]
)

# Save final model
model.save("asl_trained_model.keras")


Found 178472 images belonging to 29 classes.
Found 44602 images belonging to 29 classes.
Classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']


  base_model = MobileNetV2(input_shape=(IMG_SIZE, IMG_SIZE, 3), include_top=False, weights="imagenet")


Epoch 1/10
[1m5578/5578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m636s[0m 113ms/step - accuracy: 0.2695 - loss: 2.5207 - val_accuracy: 0.3979 - val_loss: 2.1130 - learning_rate: 0.0010
Epoch 2/10
[1m5578/5578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m640s[0m 115ms/step - accuracy: 0.4009 - loss: 1.9790 - val_accuracy: 0.4174 - val_loss: 2.0702 - learning_rate: 0.0010
Epoch 3/10
[1m5578/5578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m642s[0m 115ms/step - accuracy: 0.4251 - loss: 1.8914 - val_accuracy: 0.4266 - val_loss: 2.0274 - learning_rate: 0.0010
Epoch 4/10
[1m5578/5578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m727s[0m 130ms/step - accuracy: 0.4326 - loss: 1.8626 - val_accuracy: 0.4379 - val_loss: 2.0088 - learning_rate: 0.0010
Epoch 5/10
[1m5578/5578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m771s[0m 138ms/step - accuracy: 0.4409 - loss: 1.8359 - val_accuracy: 0.4408 - val_loss: 2.0019 - learning_rate: 0.0010
Epoch 6/10
[1m5578/5578[0m [32m━━━━━━

In [10]:
# Import libraries
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# Set dataset path (Kaggle dataset location)
DATASET_PATH = "/kaggle/input/aslamerican-sign-language-aplhabet-dataset/ASL_Alphabet_Dataset/asl_alphabet_train"

# Image parameters
IMG_SIZE = 64  # Resize images to 64x64
BATCH_SIZE = 32

# Data Augmentation & Normalization
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,  # 20% data for validation
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Load training data
train_data = datagen.flow_from_directory(
    DATASET_PATH,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'
)

# Load validation data
val_data = datagen.flow_from_directory(
    DATASET_PATH,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'
)

# Get class names (labels)
class_names = list(train_data.class_indices.keys())
print("Classes:", class_names)

# Load Pre-trained MobileNetV2 Model (Feature Extractor)
base_model = MobileNetV2(input_shape=(IMG_SIZE, IMG_SIZE, 3), include_top=False, weights="imagenet")
base_model.trainable = False  # Freeze the whole MobileNetV2 initially

# Build Custom Model
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = BatchNormalization()(x)  # Normalize activations
x = Dropout(0.5)(x)
x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
output_layer = Dense(len(class_names), activation='softmax')(x)

# Create Final Model
model = Model(inputs=base_model.input, outputs=output_layer)

# Compile Model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

# Model Summary
model.summary()

# Define Callbacks
early_stopping = EarlyStopping(
    monitor='val_accuracy',
    patience=5,
    restore_best_weights=True
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-6
)

checkpoint = ModelCheckpoint(
    "best_model.keras",
    monitor="val_accuracy",
    save_best_only=True,
    mode="max"
)

# Train Model (Phase 1 - Feature Extraction)
history = model.fit(
    train_data,
    epochs=5,  # Initial 5 epochs
    validation_data=val_data,
    callbacks=[early_stopping, reduce_lr, checkpoint]
)

# **Fine-Tune Model (Unfreeze Last 20 Layers)**
for layer in base_model.layers[-20:]:  
    layer.trainable = True

# Recompile model with lower learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

# Train Model Again (Phase 2 - Fine-Tuning)
history_finetune = model.fit(
    train_data,
    epochs=10,  # Train 10 more epochs after unfreezing
    validation_data=val_data,
    callbacks=[early_stopping, reduce_lr, checkpoint]
)

# Save Final Model
model.save("asl_model_final.keras")

print("🎉 Training Complete! Model Saved as 'asl_model_final.keras'")


Found 178472 images belonging to 29 classes.
Found 44602 images belonging to 29 classes.
Classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']


  base_model = MobileNetV2(input_shape=(IMG_SIZE, IMG_SIZE, 3), include_top=False, weights="imagenet")


Epoch 1/5
[1m5578/5578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m671s[0m 119ms/step - accuracy: 0.3035 - loss: 2.4698 - val_accuracy: 0.4171 - val_loss: 2.0495 - learning_rate: 0.0010
Epoch 2/5
[1m5578/5578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m784s[0m 141ms/step - accuracy: 0.4468 - loss: 1.8232 - val_accuracy: 0.4428 - val_loss: 1.9706 - learning_rate: 0.0010
Epoch 3/5
[1m5578/5578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m815s[0m 146ms/step - accuracy: 0.4783 - loss: 1.7085 - val_accuracy: 0.4598 - val_loss: 1.9252 - learning_rate: 0.0010
Epoch 4/5
[1m5578/5578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m819s[0m 147ms/step - accuracy: 0.4997 - loss: 1.6335 - val_accuracy: 0.4786 - val_loss: 1.8671 - learning_rate: 0.0010
Epoch 5/5
[1m5578/5578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m823s[0m 147ms/step - accuracy: 0.5129 - loss: 1.5893 - val_accuracy: 0.4838 - val_loss: 1.8640 - learning_rate: 0.0010
Epoch 1/10
[1m5578/5578[0m [32m━━━━━━━━━━━

In [15]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint

# Define constants
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 10  # Increase as needed
LEARNING_RATE = 1e-5  # Lower LR for fine-tuning
DATA_DIR = "/kaggle/input/aslamerican-sign-language-aplhabet-dataset/ASL_Alphabet_Dataset/asl_alphabet_train"  # Update path

# Data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # 80% train, 20% validation
)

# Load train & validation data
train_generator = train_datagen.flow_from_directory(
    DATA_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'
)

val_generator = train_datagen.flow_from_directory(
    DATA_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'
)

# Load MobileNetV2 with pre-trained weights
base_model = MobileNetV2(input_shape=(IMG_SIZE, IMG_SIZE, 3), include_top=False, weights="imagenet")
base_model.trainable = False  # Freeze base layers

# Add custom layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.4)(x)  # Reduce overfitting
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
predictions = Dense(29, activation='softmax')(x)  # 27 classes (excluding 'space' and 'del')

model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer=Adam(learning_rate=LEARNING_RATE), loss='categorical_crossentropy', metrics=['accuracy'])

# Callbacks
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
checkpoint = ModelCheckpoint("asl_model_finetuned.keras", monitor='val_accuracy', save_best_only=True, verbose=1)

# Initial Training (Before Unfreezing Layers)
model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=5,  # Initial training
    callbacks=[reduce_lr, checkpoint]
)

# Unfreeze some layers for fine-tuning
for layer in base_model.layers[-30:]:  # Unfreezing last 30 layers
    layer.trainable = True

# Compile again with lower learning rate
model.compile(optimizer=Adam(learning_rate=1e-6), loss='categorical_crossentropy', metrics=['accuracy'])

# Train again with fine-tuning
model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10,  # Fine-tuning epochs
    callbacks=[reduce_lr, checkpoint]
)

# Save final model
model.save("asl_model_final.keras")

print("🎉 Fine-tuning complete! Model saved.")


Found 178472 images belonging to 29 classes.
Found 44602 images belonging to 29 classes.
Epoch 1/5
[1m5577/5578[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 368ms/step - accuracy: 0.1349 - loss: 3.1406
Epoch 1: val_accuracy improved from -inf to 0.52590, saving model to asl_model_finetuned.keras
[1m5578/5578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2632s[0m 470ms/step - accuracy: 0.1349 - loss: 3.1405 - val_accuracy: 0.5259 - val_loss: 1.8883 - learning_rate: 1.0000e-05
Epoch 2/5
[1m5577/5578[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 367ms/step - accuracy: 0.4685 - loss: 1.8144
Epoch 2: val_accuracy improved from 0.52590 to 0.61881, saving model to asl_model_finetuned.keras
[1m5578/5578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2612s[0m 468ms/step - accuracy: 0.4685 - loss: 1.8143 - val_accuracy: 0.6188 - val_loss: 1.4451 - learning_rate: 1.0000e-05
Epoch 3/5
[1m5577/5578[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 367ms/step - accuracy:

KeyboardInterrupt: 

In [11]:
from tensorflow.keras.models import load_model
import numpy as np
from tensorflow.keras.preprocessing import image

# Load Model
model = load_model("asl_model_final.keras")

# Load & Preprocess Test Image
img_path = "/kaggle/input/aslamerican-sign-language-aplhabet-dataset/ASL_Alphabet_Dataset/asl_alphabet_test/H_test.jpg"
img = image.load_img(img_path, target_size=(IMG_SIZE, IMG_SIZE))
img_array = image.img_to_array(img) / 255.0  # Normalize
img_array = np.expand_dims(img_array, axis=0)

# Predict
predictions = model.predict(img_array)
predicted_class = class_names[np.argmax(predictions)]
print("Predicted ASL Letter:", predicted_class)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
Predicted ASL Letter: H
