In [5]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.21-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.6-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.21-cp310-cp310-manylinux_2_28_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m51.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading protobuf-4.25.6-cp37-abi3-manylinux2014_x86_64.whl (294 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.6/294.6 kB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: protobuf, sounddevice, mediapipe
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.20.3
    Uninstalling prot

In [76]:
import os
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
from tensorflow.keras import layers, models, Input, Model
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [77]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode = True, max_num_hands = 1, min_detection_confidence = 0.4)

In [78]:
def extract_hand_landmarks(image_path):
    image = cv2.imread(image_path)
    if image is None:
        return np.zeros(42)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            landmarks = []
            for lm in hand_landmarks.landmark:
                landmarks.append(lm.x)
                landmarks.append(lm.y)
            return np.array(landmarks)
    return np.zeros(42)

In [79]:
def load_and_preprocess_image(image_path, img_size = (224, 224)):
    image = cv2.imread(image_path)
    if image is None:
        image = np.zeros((img_size[0], img_size[1], 3), dtype = np.uint8)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, img_size)
    image = preprocess_input(image) 
    return image

In [80]:
data_dir = '/kaggle/input/asl-dataset/asl_dataset'
images = []
landmarks = []
labels = []

In [81]:
for label_dir in os.listdir(data_dir):
    if label_dir.isdigit(): 
        continue

    label_path = os.path.join(data_dir, label_dir)
    if os.path.isdir(label_path):
        for file in os.listdir(label_path):
            file_path = os.path.join(label_path, file)
            if os.path.isfile(file_path):
                img = load_and_preprocess_image(file_path)
                lm = extract_hand_landmarks(file_path)
                images.append(img)
                landmarks.append(lm)
                labels.append(label_dir)

In [82]:
images = np.array(images)
landmarks = np.array(landmarks)
labels = np.array(labels)

In [83]:
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)
num_classes = len(label_encoder.classes_)
labels_categorical = to_categorical(labels_encoded, num_classes)

In [84]:
np.save('classes.npy', label_encoder.classes_)

In [85]:
X_img_train, X_img_test, X_lm_train, X_lm_test, y_train, y_test = train_test_split(images, landmarks, labels_categorical, test_size = 0.2, random_state = 42)

In [86]:
base_model = EfficientNetB0(include_top = False, input_shape = (224,224,3), pooling = 'avg')
base_model.trainable = False

image_input = Input(shape = (224,224,3), name = 'image_input')
x1 = base_model(image_input)
x1 = layers.BatchNormalization()(x1)

landmark_input = Input(shape = (42,), name = 'landmark_input')
x2 = layers.Dense(64, activation = 'relu')(landmark_input)
x2 = layers.BatchNormalization()(x2)
x2 = layers.Dropout(0.3)(x2)

combined = layers.concatenate([x1, x2])
combined = layers.Dense(128, activation = 'relu')(combined)
combined = layers.BatchNormalization()(combined)
combined = layers.Dropout(0.4)(combined)
output = layers.Dense(num_classes, activation = 'softmax')(combined)

model = Model(inputs = [image_input, landmark_input], outputs = output)
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [87]:
model.summary()

In [88]:
history = model.fit(
    {'image_input': X_img_train, 'landmark_input': X_lm_train},
    y_train,
    epochs = 10,
    batch_size = 32,
    validation_data = (
        {'image_input': X_img_test, 'landmark_input': X_lm_test},
        y_test
    )
)

Epoch 1/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 408ms/step - accuracy: 0.3433 - loss: 2.4497 - val_accuracy: 0.5262 - val_loss: 2.2809
Epoch 2/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 41ms/step - accuracy: 0.8016 - loss: 0.6580 - val_accuracy: 0.7135 - val_loss: 1.7945
Epoch 3/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 40ms/step - accuracy: 0.9033 - loss: 0.3616 - val_accuracy: 0.8457 - val_loss: 1.3421
Epoch 4/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 41ms/step - accuracy: 0.9575 - loss: 0.2230 - val_accuracy: 0.8926 - val_loss: 0.9917
Epoch 5/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 40ms/step - accuracy: 0.9639 - loss: 0.1757 - val_accuracy: 0.9311 - val_loss: 0.6946
Epoch 6/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 40ms/step - accuracy: 0.9750 - loss: 0.1327 - val_accuracy: 0.9449 - val_loss: 0.4515
Epoch 7/10
[1m46/46[0m [32m━━

In [68]:
import sklearn
print(sklearn.__version__)

1.2.2


In [89]:
model.save('Sign_language_detector.h5')