In [3]:
import os

# Create the main 'data' folder and 5 subfolders for each sign
gestures = ['palm', 'fist', 'thumbs_up', 'peace', 'ok']
for g in gestures:
    os.makedirs(f'data/{g}', exist_ok=True)

print("Folders created successfully!")

Folders created successfully!


In [8]:
# creating your own dataset , change the target_gesture variable to collect images for different signs
import cv2
import time

target_gesture= 'thumbs_up'  # Change this to collect images for different signs
counts=0
max_images=200
cap=cv2.VideoCapture(0)
collecting=False

print(f"Collecting the images:{target_gesture.upper()}")

while counts< max_images:
    ret,frame=cap.read()
    if not ret:
        break
    frame=cv2.flip(frame,1)
    x1,y1,x2,y2=150,150,500,500
    cv2.rectangle(frame,(x1,y1),(x2,y2),(255,0,0),2)
    roi_frame=frame[y1:y2,x1:x2]
   

    if collecting:
        file_path=f"data/{target_gesture}/{counts}.jpg"
        cv2.imwrite(file_path,roi_frame)
        counts+=1
        time.sleep(0.05)  # Slight delay to avoid rapid captures

    status="RECORDING" if collecting else "NOT RECORDING"
    color = (0, 0, 255) if collecting else (0, 255, 0)
    
    cv2.putText(frame,f"Sign:{target_gesture} | Saved: {counts}/{max_images}", 
                (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
    
    cv2.imshow("Frame",frame)
    

    key=cv2.waitKey(1) 
    if key== ord('s'):
        collecting=True
    elif key== ord('q'):
        break


cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)

print(f"Done! Captured {counts} images for {target_gesture}.")





Collecting the images:THUMBS_UP
Done! Captured 200 images for thumbs_up.


In [9]:
#preprocessing the image
import cv2
import numpy as np

def preprocess_image(img):
    gray=cv2.cvtColor(img,cv2.Color_BGR2GRAY)
    blureed=cv2.GaussianBlur(gray,(5,5),0)
    resized=cv2.resize(blureed,(64,64))
    normalized=resized.astype('float32')/255.0
    final_img=np.expand_dims(normalized,axis=(0,-1))

    return final_img



In [12]:

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data preparation
datagen= ImageDataGenerator(
    rescale = 1./255,
    validation_split=0.2,
    horizontal_flip=True,
    rotation_range=10,
)

train_generator=datagen.flow_from_directory(
    'data',
    target_size=(64,64),
    batch_size=32,
    color_mode='grayscale',
    class_mode='categorical',
    subset='training',
    shuffle=True
)

validation_generator=datagen.flow_from_directory(
    'data',
    target_size=(64,64),
    batch_size=32,
    color_mode='grayscale',
    class_mode='categorical',
    subset='validation'
)





Found 800 images belonging to 5 classes.
Found 200 images belonging to 5 classes.


In [13]:
import os
print(sorted([f for f in os.listdir('data') if not f.startswith('.')]))

['fist', 'ok', 'palm', 'peace', 'thumbs_up']


In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model=Sequential([
    Conv2D(32,(3,3),activation='relu',input_shape=(64,64,1)),
    MaxPooling2D(2,2),
    Conv2D(64,(3,3),activation='relu'),
    MaxPooling2D(2,2),
    Conv2D(128,(3,3,),activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(128,activation='relu'),
    Dropout(0.5),
    Dense(5,activation='softmax')  # 5 classes for 5 signs
])

model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

model.fit(
    train_generator,
    epochs=15,
    validation_data=validation_generator
)

model.save('sign_language_model.h5')
print("Model trained and saved as sign_language_model.h5")



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/15
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 62ms/step - accuracy: 0.2150 - loss: 1.6156 - val_accuracy: 0.2000 - val_loss: 1.5989
Epoch 2/15
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step - accuracy: 0.4150 - loss: 1.4244 - val_accuracy: 0.8350 - val_loss: 1.1049
Epoch 3/15
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step - accuracy: 0.8200 - loss: 0.6094 - val_accuracy: 0.7400 - val_loss: 0.7797
Epoch 4/15
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step - accuracy: 0.9212 - loss: 0.2462 - val_accuracy: 0.8550 - val_loss: 0.8970
Epoch 5/15
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step - accuracy: 0.9575 - loss: 0.1864 - val_accuracy: 0.7550 - val_loss: 1.0141
Epoch 6/15
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step - accuracy: 0.9550 - loss: 0.1565 - val_accuracy: 0.8350 - val_loss: 0.8577
Epoch 7/15
[1m25/25[0m [32m━━━━



Model trained and saved as sign_language_model.h5


In [26]:
import cv2
import numpy as np
import time
from tensorflow.keras.models import load_model
from collections import Counter

# --- 1. SETUP ---
model = load_model('sign_language_model.h5')

# IMPORTANT: If your emojis are wrong, re-order this list! 
# Match it to the alphabetical order of your folders in 'data/'
gestures = ['fist', 'ok', 'palm', 'peace', 'thumbs_up'] 

emoji_assets = {}
for g in gestures:
    img = cv2.imread(f'emojis/{g}.png', cv2.IMREAD_UNCHANGED)
    if img is not None:
        emoji_assets[g] = img

# --- 2. STABILITY VARIABLES ---
history = []
HISTORY_SIZE = 15  # AI must be consistent for 15 frames
active_emoji = None
start_time = 0

# --- 3. REPAIRING THE OVERLAY (No-Crash Version) ---
def overlay_transparent(background, overlay, x, y, scale):
    scale = max(0.01, scale) # Safety floor
    if overlay is None: return background
    bg = background.astype(float)
    new_w, new_h = int(overlay.shape[1] * scale), int(overlay.shape[0] * scale)
    if new_w <= 1 or new_h <= 1: return background
    
    img = cv2.resize(overlay, (new_w, new_h))
    y1, y2 = max(0, y - new_h//2), min(background.shape[0], y + new_h//2)
    x1, x2 = max(0, x - new_w//2), min(background.shape[1], x + new_w//2)
    
    overlay_slice = img[0:y2-y1, 0:x2-x1]
    if overlay_slice.shape[2] < 4: return background
    
    alpha = overlay_slice[:, :, 3] / 255.0
    for c in range(0, 3):
        bg[y1:y2, x1:x2, c] = (alpha * overlay_slice[:, :, c] +
                              (1.0 - alpha) * bg[y1:y2, x1:x2, c])
    return bg.astype(np.uint8)

# --- 4. THE LOOP ---
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret: break
    frame = cv2.flip(frame, 1)

    x1, y1, x2, y2 = 150,150,500,500
    cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
    
    # PREDICTION
    roi = frame[y1:y2, x1:x2]
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    processed = cv2.GaussianBlur(gray, (5, 5), 0)
    resized = cv2.resize(processed, (64, 64)) / 255.0
    reshaped = resized.reshape(1, 64, 64, 1)

    if active_emoji is None:
        pred = model.predict(reshaped, verbose=0)
        class_id = np.argmax(pred)
        
        if pred[0][class_id] > 0.85:
            history.append(class_id)
        
        if len(history) > HISTORY_SIZE:
            history.pop(0) # Keep history fresh

        # VOTING: Only trigger if the most common guess appears > 80% of the time
        if len(history) == HISTORY_SIZE:
            most_common, count = Counter(history).most_common(1)[0]
            if count / HISTORY_SIZE > 0.8: # high confidence over time
                active_emoji = gestures[most_common]
                start_time = time.time()
                history = [] # Reset history after triggering
                print(f"Triggered: {active_emoji}")

    # ANIMATION LOGIC (Now set to 1.0 second)
    if active_emoji:
        elapsed = time.time() - start_time
        
        if elapsed < 1.0: # <--- Changed from 5.0 to 1.0
            # We speed up the math so the "pop" and "float" fit in 1 second
            # We multiply 'elapsed' by 3 to make the scaling happen faster
            scale = max(0.1, min(1.0, elapsed * 3)) + (0.05 * np.sin(elapsed * 15))
            
            # We increase the frequency of the Sine wave (from 4 to 12) 
            # so it still "bounces" within that 1 second
            bounce = int(25 * np.sin(elapsed * 12))
            
            frame = overlay_transparent(frame, emoji_assets.get(active_emoji), 
                                      frame.shape[1]//2, frame.shape[0]//2 + bounce, scale)
        else:
            active_emoji = None # Reset after 1 second
            history = []        # Clear history to prepare for the next gesture

    cv2.imshow("Stable Gesture Action", frame)
    if cv2.waitKey(1) == ord('q'): break

cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)



Triggered: thumbs_up
Triggered: palm
Triggered: palm
Triggered: palm
Triggered: peace
Triggered: palm
Triggered: palm
Triggered: thumbs_up
Triggered: palm
Triggered: palm
Triggered: peace
Triggered: peace
Triggered: peace
Triggered: peace
Triggered: peace
Triggered: thumbs_up
Triggered: thumbs_up
Triggered: peace
Triggered: palm
Triggered: palm
Triggered: thumbs_up
Triggered: peace
Triggered: fist
Triggered: fist
Triggered: peace
Triggered: palm
Triggered: thumbs_up
Triggered: peace
Triggered: palm
Triggered: peace
Triggered: palm
Triggered: thumbs_up
Triggered: thumbs_up
Triggered: thumbs_up
Triggered: thumbs_up
Triggered: thumbs_up
Triggered: thumbs_up


-1