<a href="https://colab.research.google.com/github/datajcthemax/playdata/blob/main/hand.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,  # splitting data into 80% training and 20% validation
    fill_mode="nearest"
)

img_height, img_width = 256, 256  # replace with the size you want
batch_size = 64  # replace with the batch size you want

train_generator = datagen.flow_from_directory(
    'hle',
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training')  # set as training data

validation_generator = datagen.flow_from_directory(
    'hle',  # same directory as training data
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation')  # set as validation data

from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

# Load the InceptionV3 model without the top layer (which includes the final classification layer)
base_model = InceptionV3(weights='imagenet', include_top=False)

# Add a global spatial average pooling layer to reduce the feature maps size
x = base_model.output
x = GlobalAveragePooling2D()(x)

# Let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)

# And a logistic layer for your 3 classes (e, h, l, o, w, d, r)
predictions = Dense(7, activation='softmax')(x)

# This is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

# First, we will only train the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model on the new data for a few epochs
model.fit(train_generator, validation_data=validation_generator, epochs=10)

# At this point, the top layers are well trained and we can start fine-tuning
# convolutional layers from InceptionV3. We will freeze the bottom N layers
# and train the remaining top layers.

# Let's visualize layer names and layer indices to see how many layers we should freeze:
for i, layer in enumerate(base_model.layers):
    print(i, layer.name)

# We chose to train the top 2 inception blocks, i.e., we will freeze
# the first 249 layers and unfreeze the rest:
for layer in model.layers[:249]:
    layer.trainable = False
for layer in model.layers[249:]:
    layer.trainable = True

# We need to recompile the model for these modifications to take effect
# We use SGD with a low learning rate
from tensorflow.keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])

# We train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
model.fit(train_generator, validation_data=validation_generator, epochs=50)

import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image

# Load the saved model
model = load_model('sign_language_model.h5')

# Initialize MediaPipe Hands module
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.7, min_tracking_confidence=0.5)

# Define the list of class names (in the order as they were in train_generator.class_indices)
class_names = ['e', 'h', 'l', 'o', 'w','r','d']

# Open the webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    
    # Flip the image horizontally for a later selfie-view display, and convert the BGR image to RGB.
    frame = cv2.cvtColor(cv2.flip(frame, 1), cv2.COLOR_BGR2RGB)
    
    # Run hand detection
    results = hands.process(frame)
    
    # Convert the image color back so it can be displayed
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Draw hand landmarks
            mp.solutions.drawing_utils.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Preprocess the image for prediction
            img = cv2.resize(frame, (256, 256))
            img = image.img_to_array(img)
            img = np.expand_dims(img, axis=0)

            # Predict the class of the hand sign
            predictions = model.predict(img)
            class_id = np.argmax(predictions)
            sign_language = class_names[class_id]

            # Display the detected hand sign on the frame
            cv2.putText(frame, sign_language, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)

    # Display the frame
    cv2.imshow('Sign Language Recognition', frame)
    
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()