In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Rescaling
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.data import Dataset

# Load the dataset
dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/Users/mariaadelinemaharaniwidyatmoko/Downloads/asl_alphabet_train/asl_alphabet_train',
    image_size=(64, 64),
    batch_size=32,
    label_mode='categorical'  # Ensures one-hot encoded labels
)

# Split dataset into training and validation sets (80% train, 20% validation)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size).take(val_size)

# Preprocess the datasets by rescaling images
preprocess_layer = Rescaling(1./255)
train_dataset = train_dataset.map(lambda x, y: (preprocess_layer(x), y))
val_dataset = val_dataset.map(lambda x, y: (preprocess_layer(x), y))

# Verify dataset shapes for debugging
for images, labels in train_dataset.take(1):
    print(f"Train images shape: {images.shape}")
    print(f"Train labels shape: {labels.shape}")  # Should be (batch_size, 29)

for val_images, val_labels in val_dataset.take(1):
    print(f"Validation images shape: {val_images.shape}")
    print(f"Validation labels shape: {val_labels.shape}")  # Should be (batch_size, 29)

# Create the CNN model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(29, activation='softmax')  # 29 output classes
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model with validation
history = model.fit(train_dataset, validation_data=val_dataset, epochs=10)

# Load test images without labels
test_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    '/Users/mariaadelinemaharaniwidyatmoko/Downloads/asl_alphabet_test',
    image_size=(64, 64),
    batch_size=32,
    label_mode=None  # No labels provided
)

# Preprocess the test dataset (rescale)
test_dataset = test_dataset.map(lambda x: preprocess_layer(x))
test_dataset = test_dataset.cache().prefetch(buffer_size=tf.data.experimental.AUTOTUNE)


# Evaluate the model on the test dataset
test_loss, test_acc = model.evaluate(test_dataset)
print(f"Test Accuracy: {test_acc}")

# Save the trained model
model.save('Users/mariaadelinemaharaniwidyatmoko/Downloads/my_trained_model.h5')


In [None]:

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers import Rescaling, Dropout
from tensorflow.keras.preprocessing import image_dataset_from_directory

# Data augmentation and rescaling
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip('horizontal'),
    layers.RandomRotation(0.2),
    layers.RandomZoom(0.2),
])

rescaling = Rescaling(1./255)

def preprocess_image(image, label):
    image = rescaling(image)
    image = data_augmentation(image)
    return image, label

# Load the training dataset without preprocessing
raw_train_dataset = image_dataset_from_directory(
    '/Users/mariaadelinemaharaniwidyatmoko/Downloads/asl_alphabet_train/asl_alphabet_train',
    image_size=(64, 64),
    batch_size=32,
    validation_split=0.2,
    subset="training",
    seed=123,
    label_mode='categorical'
)

# Retrieve class names from the raw training dataset
class_labels = raw_train_dataset.class_names

# Apply preprocessing to the training dataset
train_dataset = raw_train_dataset.map(preprocess_image)
train_dataset = train_dataset.cache().prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

# Load and preprocess the validation dataset
raw_validation_dataset = image_dataset_from_directory(
    '/Users/mariaadelinemaharaniwidyatmoko/Downloads/asl_alphabet_train/asl_alphabet_train',
    image_size=(64, 64),
    batch_size=32,
    validation_split=0.2,
    subset="validation",
    seed=123,
    label_mode='categorical'
)
validation_dataset = raw_validation_dataset.map(lambda x, y: (rescaling(x), y))
validation_dataset = validation_dataset.cache().prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

# Define and compile the model
model = tf.keras.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)),
    layers.MaxPooling2D((2, 2)),
    Dropout(0.25),  # Add dropout after first pooling layer

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    Dropout(0.25),  # Add dropout after second pooling layer

    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    Dropout(0.25),  # Add dropout after third pooling layer

    layers.Flatten(),
    layers.Dense(256, activation='relu'),  # Increased the number of neurons
    Dropout(0.5),  # Add dropout before the final dense layer

    layers.Dense(29, activation='softmax')
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(train_dataset, validation_data=validation_dataset, epochs=10)  # Increase epochs for better learning

# Load and preprocess the test dataset
test_dataset = image_dataset_from_directory(
    '/Users/mariaadelinemaharaniwidyatmoko/Downloads/asl_alphabet_test',
    image_size=(64, 64),
    batch_size=32,
    label_mode=None
)
test_dataset = test_dataset.map(lambda x: rescaling(x))

# Make predictions
predictions = model.predict(test_dataset)

# Get the predicted class for each image
predicted_classes = tf.argmax(predictions, axis=1)

# Map the predicted classes to their corresponding labels
predicted_labels = [class_labels[i] for i in predicted_classes.numpy()]

print(f'Predicted labels: {predicted_labels}')

# Save the trained model
model.save('Users/mariaadelinemaharaniwidyatmoko/Downloads/my_trained_model_9.h5')


In [None]:
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import os
import numpy as np

# Load the model
model = load_model('/Users/mariaadelinemaharaniwidyatmoko/Downloads/asl_alphabet_test/Users/mariaadelinemaharaniwidyatmoko/Downloads/my_trained_model_9.h5')

# Load the class labels
train_folders = '/Users/mariaadelinemaharaniwidyatmoko/Downloads/asl_alphabet_train/asl_alphabet_train'
labels = sorted(os.listdir(train_folders))  # Ensure labels are in the correct order

# Preprocess the image to test
img_path = '/Users/mariaadelinemaharaniwidyatmoko/Downloads/asl_alphabet_test/asl_alphabet_test/C.jpg'
img = image.load_img(img_path, target_size=(64, 64))
img_array = image.img_to_array(img)  # Convert the image to a numpy array
img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

# Apply the same normalization used during training
img_array = img_array / 255.0  # Normalize image

# Make the prediction
prediction = model.predict(img_array)

# Assuming the model outputs class probabilities
predicted_class = np.argmax(prediction, axis=1)

# Get the predicted label
predicted_label = labels[predicted_class[0]]
print(f'Predicted class: {predicted_label}')




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186ms/step
Predicted class: B


In [17]:
import cv2
import numpy as np
import mediapipe as mp
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import os

# Load the trained model
model_path = '/Users/mariaadelinemaharaniwidyatmoko/Downloads/asl_alphabet_train/my_trained_model-3.h5'
model = load_model(model_path)

# Load the class labels
train_folders = '/Users/mariaadelinemaharaniwidyatmoko/Downloads/asl_alphabet_train/asl_alphabet_train'
labels = sorted(os.listdir(train_folders))

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.6, min_tracking_confidence=0.6)

# Initialize MediaPipe Drawing
mp_drawing = mp.solutions.drawing_utils

# Initialize webcam
cap = cv2.VideoCapture(0)

# Set confidence threshold
confidence_threshold = 0.7

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()
    
    if not ret:
        print("Failed to grab frame")
        break
    
    # Convert the frame to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Process the frame with MediaPipe Hands
    results = hands.process(rgb_frame)
    
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Get bounding box for the hand
            h, w, _ = frame.shape
            x_min = int(min([landmark.x for landmark in hand_landmarks.landmark]) * w)
            x_max = int(max([landmark.x for landmark in hand_landmarks.landmark]) * w)
            y_min = int(min([landmark.y for landmark in hand_landmarks.landmark]) * h)
            y_max = int(max([landmark.y for landmark in hand_landmarks.landmark]) * h)
            
            # Apply padding to the bounding box
            padding = 50
            x_min = max(x_min - padding, 0)
            x_max = min(x_max + padding, w)
            y_min = max(y_min - padding, 0)
            y_max = min(y_max + padding, h)
            
            # Define the ROI based on the bounding box
            roi = frame[y_min:y_max, x_min:x_max]
            
            if roi.size > 0:
                try:
                    # Preprocess the frame (resize and normalize)
                    img = cv2.resize(roi, (64, 64))  # Resize to match input size of the model
                    img_array = image.img_to_array(img)  # Convert to array
                    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
                    img_array = img_array / 255.0  # Normalize the image
                    
                    # Predict the label
                    prediction = model.predict(img_array)
                    predicted_class = np.argmax(prediction, axis=1)
                    confidence = np.max(prediction)
                    
                    # Apply confidence threshold
                    if confidence > confidence_threshold:
                        predicted_label = labels[predicted_class[0]]
                    else:
                        predicted_label = "Unknown"
                    
                    # Display the prediction on the frame
                    cv2.putText(frame, f"{predicted_label} ({confidence:.2f})", (x_min, y_min - 10), 
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                    
                except Exception as e:
                    print(f"Error during image processing: {e}")
            
            # Draw landmarks and connections
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
    
    else:
        # If no hands are detected, display a blue frame
        frame[:] = [255, 0, 0]  # Set frame to blue
    
    # Display the resulting frame
    cv2.imshow('ASL Sign Language Detection', frame)
    
    # Break the loop if 'q' is pr


I0000 00:00:1725617804.363256 1495146 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 83.1), renderer: Apple M2 Pro
W0000 00:00:1725617804.380113 1759771 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1725617804.399520 1759771 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13