# Collect Images 

In [35]:
# Collect Images

import os
import cv2

DATA_DIR = './data/static/images'
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

# Set the number of classes and dataset size
number_of_classes = 1
dataset_size = 150

cap = cv2.VideoCapture(0)

# Loop for the specified number of classes
for j in range(number_of_classes):
    # Prompt the user to enter a new class name
    class_name = input(f'Enter the class name for dataset {j + 1} (will be stored in folder "): ').strip()
    class_dir = os.path.join(DATA_DIR, class_name)

    if not os.path.exists(class_dir):
        os.makedirs(class_dir)

    print(f'Collecting data for class: {class_name}')

    # Wait for the user to be ready before starting collection
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame.")
            break

        cv2.putText(frame, f'Press SPACE to start collecting data for class: {class_name}', 
                    (100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3, cv2.LINE_AA)
        cv2.imshow('frame', frame)

        key = cv2.waitKey(1)
        if key & 0xFF == ord('q'):
            print("Exiting program.")
            cap.release()
            cv2.destroyAllWindows()
            exit()
        elif key & 0xFF == ord(' '):  # Spacebar to start collecting
            break

    # Collect dataset_size images for the current class
    counter = 0
    while counter < dataset_size:
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame.")
            break

        cv2.imshow('frame', frame)
        cv2.waitKey(25)

        # Save the image with a default naming convention
        image_filename = f"{counter}.jpg"
        cv2.imwrite(os.path.join(class_dir, image_filename), frame)
        counter += 1

        # Check for 'q' key press to quit during image collection
        key = cv2.waitKey(1)
        if key & 0xFF == ord('q'):
            print("Data collection interrupted.")
            break

cap.release()
cv2.destroyAllWindows()


Enter the class name for dataset 1 (will be stored in folder "):  Z


Collecting data for class: Z


# Create Npz dataset

In [52]:
# Collecting hand landmarks base on mediapipe x,y only

import os
import numpy as np
import mediapipe as mp
import cv2

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.5)

DATA_DIR = r'C:\Users\puter\final\data\static\images'

static_landmarks = []
static_labels = []

# Count total images for progress tracking
total_images = sum(len(files) for _, _, files in os.walk(DATA_DIR))
processed_images = 0  # Initialize a counter for processed images

for dir_ in os.listdir(DATA_DIR):
    for img_path in os.listdir(os.path.join(DATA_DIR, dir_)):
        data_aux = []
        x_ = []
        y_ = []

        img = cv2.imread(os.path.join(DATA_DIR, dir_, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        results = hands.process(img_rgb)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y

                    x_.append(x)
                    y_.append(y)

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x - min(x_))
                    data_aux.append(y - min(y_))

            static_landmarks.append(data_aux)
            static_labels.append(dir_)

        # Update and print progress
        processed_images += 1
        if processed_images % 10 == 0 or processed_images == total_images:  # Print every 10 images or last one
            print(f'Processed {processed_images}/{total_images} images.')

# print(static_landmarks) 
# print(static_labels.shape[1]) 

# Convert data and labels to NumPy arrays
static_landmarks = np.array(static_landmarks)
static_labels = np.array(static_labels)

# Save data and labels using NumPy
np.savez(r".\data\npz\img_compile.npz", static_landmarks=static_landmarks, static_labels=static_labels)
print('Done')

Processed 10/3600 images.
Processed 20/3600 images.
Processed 30/3600 images.
Processed 40/3600 images.
Processed 50/3600 images.
Processed 60/3600 images.
Processed 70/3600 images.
Processed 80/3600 images.
Processed 90/3600 images.
Processed 100/3600 images.
Processed 110/3600 images.
Processed 120/3600 images.
Processed 130/3600 images.
Processed 140/3600 images.
Processed 150/3600 images.
Processed 160/3600 images.
Processed 170/3600 images.
Processed 180/3600 images.
Processed 190/3600 images.
Processed 200/3600 images.
Processed 210/3600 images.
Processed 220/3600 images.
Processed 230/3600 images.
Processed 240/3600 images.
Processed 250/3600 images.
Processed 260/3600 images.
Processed 270/3600 images.
Processed 280/3600 images.
Processed 290/3600 images.
Processed 300/3600 images.
Processed 310/3600 images.
Processed 320/3600 images.
Processed 330/3600 images.
Processed 340/3600 images.
Processed 350/3600 images.
Processed 360/3600 images.
Processed 370/3600 images.
Processed 

# Create labels into json

In [56]:
import os
import json

# Get unique labels
unique_labels = np.unique(static_labels)

# Create a dictionary with indices as keys and labels as values
label_dict = {str(index): label for index, label in enumerate(unique_labels)}

# Convert the dictionary to JSON format and save to a file
with open(r".\data\labels\compile.json", 'w') as json_file:
    json.dump(label_dict, json_file, indent=4)

print("Labels saved to compile.json")

Labels saved to compile.json


# Training the model base on the dataset in NPZ ([x,y], labels)

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

# Load your data from .npz file
data_dict = np.load(r".\data\npz\img_compile.npz")  # Ensure you have a .npz file
static_landmarks = data_dict['static_landmarks']  # Assuming 'data' is the key for features
static_labels = data_dict['static_labels']  # Assuming 'labels' is the key for labels

# Check the shape of the loaded data
print("Data shape:", static_landmarks.shape)
print("Labels shape before processing:", static_labels.shape)

# Ensure labels is a 1D array of class labels
# If your labels are originally strings like ['A', 'B', 'C', ...], convert them to integers
unique_labels, labels_indices = np.unique(static_labels, return_inverse=True)  # Convert to numerical indices
print("Unique labels:", unique_labels)
print("Labels indices shape:", labels_indices.shape)  # Should match the number of samples in data

# Convert labels to categorical (one-hot encoding)
num_classes = len(unique_labels)  # Number of unique classes
labels_categorical = to_categorical(labels_indices, num_classes=num_classes)  # Shape: (num_samples, num_classes)

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(static_landmarks, labels_categorical, test_size=0.2, shuffle=True)

# Define the Keras model
model = Sequential([
    Dense(128, activation='relu', input_shape=(static_landmarks.shape[1],)),  # Adjust input shape as needed
    Dense(64, activation='relu'),
    Dense(num_classes, activation='softmax')  # Output layer for number of classes
])



# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])




from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define the callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',  # Monitor validation loss for early stopping
    patience=10,         # Stop training if no improvement after 10 epochs
    restore_best_weights=True,  # Restore weights from the epoch with the best value of monitored metric
    mode='min'
)

model_checkpoint = ModelCheckpoint(
    filepath='./data/keras/AisyahSignX{epoch:02d}.h5',  # Path to save the best model
    monitor='val_loss',        # Monitor validation loss for checkpointing
    save_best_only=True,       # Save only the best model
    # vebose=1
)

# Train the model with callbacks
history = model.fit(
    x_train, y_train,
    epochs=100,
    batch_size=32,
    validation_data=(x_test, y_test),
    callbacks=[early_stopping, model_checkpoint]
)




Data shape: (3572, 42)
Labels shape before processing: (3572,)
Unique labels: ['A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'K' 'L' 'M' 'N' 'O' 'P' 'Q' 'R' 'S'
 'T' 'U' 'V' 'W' 'X' 'Y']
Labels indices shape: (3572,)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.1614 - loss: 3.0840 - val_accuracy: 0.3720 - val_loss: 2.5184
Epoch 2/100
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4756 - loss: 2.1543 - val_accuracy: 0.8000 - val_loss: 1.3109
Epoch 3/100
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7762 - loss: 1.1582 - val_accuracy: 0.8182 - val_loss: 0.7845
Epoch 4/100
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8509 - loss: 0.7184 - val_accuracy: 0.8434 - val_loss: 0.5418
Epoch 5/100
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9009 - loss: 0.4966 - val_accuracy: 0.9469 - val_loss: 0.3836
Epoch 6/100
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9420 - loss: 0.3771 - val_accuracy: 0.9622 - val_loss: 0.2928
Epoch 7/100
[1m90/90[0m [32m━━━

In [None]:
model.save('./data/keras/aisyahhand.h5')

# Live Test

## Read labels from json or npz

In [3]:
# Read json to use in live test

import json

# Load the JSON file
with open(r".\data\labels\compile.json", 'r') as json_file:
    label_dict = json.load(json_file)

# Now label_dict contains your data
print(label_dict)  # This will print the loaded dictionary


JSONDecodeError: Expecting value: line 2 column 10 (char 11)

### or

In [16]:
# Read labels in the npz folders


import numpy as np

# Load your data from .npz file
data_dict = np.load(r".\data\npz\img_compile.npz")  # Ensure you have a .npz file
# static_landmarks = data_dict['static_landmarks']  # Assuming 'data' is the key for features not needed currently
static_labels = data_dict['static_labels']  # Assuming 'labels' is the key for labels

# Assuming 'static_labels' is your array of labels from the npz file
# static_labels = np.array([...])  # Replace with the actual array

# Get unique sorted labels
unique_labels = sorted(np.unique(static_labels))

# Create dictionary with index as key and label as value
label_dict = {str(index): label for index, label in enumerate(unique_labels)}

print(label_dict)

{'0': 'A', '1': 'B', '2': 'C', '3': 'D', '4': 'E', '5': 'F', '6': 'G', '7': 'H', '8': 'I', '9': 'K', '10': 'L', '11': 'M', '12': 'N', '13': 'O', '14': 'P', '15': 'Q', '16': 'R', '17': 'S', '18': 'T', '19': 'U', '20': 'V', '21': 'W', '22': 'X', '23': 'Y'}


## Live test using opencv

In [28]:
# Import required libraries
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model

# Load the Keras model
model = load_model(r"C:\Users\puter\final\data\keras\AisyahSignX59.keras")

# Check the model's input shape to determine the expected input size
expected_input_size = model.input_shape[1]  # Adjust based on your model's input shape

# Initialize video capture
cap = cv2.VideoCapture(0)

# Check if the camera opened successfully
if not cap.isOpened():
    print("Error: Could not open the camera.")
    exit()

# Set up MediaPipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(
    static_image_mode=False,  # Set to False for continuous detection
    max_num_hands=1,  # Detect one hand at a time for simplicity
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

# Label dictionary for mapping predicted indices to characters, excluding 'J' and 'Z'
label_dict = {
    '0': 'A',
    '1': 'B',
    '2': 'C',
    '3': 'D',
    '4': 'E',
    '5': 'F',
    '6': 'G',
    '7': 'H',
    '8': 'I',
    '9': 'K',
    '10': 'L',
    '11': 'M',
    '12': 'N',
    '13': 'O',
    '14': 'P',
    '15': 'Q',
    '16': 'R',
    '17': 'S',
    '18': 'T',
    '19': 'U',
    '20': 'V',
    '21': 'W',
    '22': 'X',
    '23': 'Y',
}

try:
    while True:
        # Capture each frame
        ret, frame = cap.read()
        if not ret:
            print("Error: Failed to capture frame.")
            break

        # Convert frame to RGB for MediaPipe processing
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(frame_rgb)

        # If hand landmarks are detected
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                # Draw landmarks on the frame
                mp_drawing.draw_landmarks(
                    frame,
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS,
                    mp_drawing_styles.get_default_hand_landmarks_style(),
                    mp_drawing_styles.get_default_hand_connections_style()
                )

                # Extract normalized landmark coordinates
                data_aux = []
                x_ = []
                y_ = []
                for landmark in hand_landmarks.landmark:
                    x_.append(landmark.x)
                    y_.append(landmark.y)

                # Create feature vector
                min_x, min_y = min(x_), min(y_)
                for landmark in hand_landmarks.landmark:
                    data_aux.append(landmark.x - min_x)
                    data_aux.append(landmark.y - min_y)

                # Ensure correct data format for model prediction
                if len(data_aux) == expected_input_size:
                    data_aux = np.asarray(data_aux).reshape(1, -1)
                    prediction = model.predict(data_aux)

                    # Get predicted class and probability
                    predicted_class_index = np.argmax(prediction, axis=1)[0]
                    predicted_probability = prediction[0][predicted_class_index]

                    if predicted_probability >= 0.3:
                        predicted_character = label_dict.get(str(predicted_class_index), 'Unknown')
                    else:
                        predicted_character = 'Unknown'

                    # Draw prediction on the frame
                    x1 = int(min(x_) * frame.shape[1]) - 10
                    y1 = int(min(y_) * frame.shape[0]) - 10
                    x2 = int(max(x_) * frame.shape[1]) + 10
                    y2 = int(max(y_) * frame.shape[0]) + 10

                    cv2.rectangle(frame, (x1, y1), (x2, y2), (72, 61, 139), 4)
                    cv2.putText(frame, f'{predicted_character} ({predicted_probability * 100:.2f}%)',
                                (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (72, 61, 139), 3, cv2.LINE_AA)

        # Display the frame with annotations
        cv2.imshow('Sign Language Recognition', frame)

        # Exit the loop on 'q' key press
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

finally:
    # Release resources
    cap.release()
    cv2.destroyAllWindows()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23

In [7]:
import cv2

def list_available_cameras(max_index=10):
    available_cameras = []
    for index in range(max_index):
        cap = cv2.VideoCapture(index)
        if cap.isOpened():
            available_cameras.append(index)
            cap.release()  # Don't forget to release the camera
    return available_cameras

# Set max_index higher if you have more than 10 cameras
cameras = list_available_cameras(max_index=10)
print("Available cameras:", cameras)


Available cameras: [0]
