# Dependencies

In [None]:
import tensorflow as tf
import cv2
import numpy as np
import mediapipe as mp
import os
import time
import sklearn




# Mediapipe And OpenCV

In [None]:
#mp_hands=mp.solutions.objectron
mp_holistic=mp.solutions.holistic
mp_drawing=mp.solutions.drawing_utils

In [None]:
def mediapipe_detector(image,model):
    image=cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
    image.flags.writeable=False
    results=model.process(image)
    image.flags.writeable=True
    image=cv2.cvtColor(image,cv2.COLOR_RGB2BGR)
    return image,results

In [None]:
def draw_landmarks(image,results):
    #mp_drawing.draw_landmarks(image,results.face_landmarks,mp_holistic.FACEMESH_CONTOURS)
    #mp_drawing.draw_landmarks(image,results.pose_landmarks,mp_holistic.POSE_CONNECTIONS)
    mp_drawing.draw_landmarks(image,results.left_hand_landmarks,mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image,results.right_hand_landmarks,mp_holistic.HAND_CONNECTIONS)

In [None]:
cam=cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5) as holistic:
    while cam.isOpened():
        ret,frame=cam.read()
        image,results=mediapipe_detector(frame,holistic)
        #print(results)
        draw_landmarks(image,results)
        cv2.imshow('Python Camera',image)
        if cv2.waitKey(10) & 0xFF==ord('q'):
            break
    cam.release()
    cv2.destroyWindow('Python Camera')
    cv2.waitKey(1)

In [None]:
len(results.pose_landmarks.landmark)

# Data to NP ARRAY

In [None]:
#pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
#face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)

In [None]:
def extract_keypoints(results):
    #face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
    #pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([lh, rh])

In [None]:
extract_keypoints(results)

# Save Data to file /Making data for the model

In [None]:
DATA_PATH = os.path.join('Final_Data')
no_of_sequences=40
sequence_length=45


In [None]:
actions=np.array(['Hi','I Love You','How Are You','Good Morning','What Is Your Name','Sorry','Thank You','India'])
for action in actions:
    for sequence in range(no_of_sequences):
        try:
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

In [None]:
cam = cv2.VideoCapture(0)

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    for action in actions:
        for sequence in range(no_of_sequences):
            for frame_no in range(sequence_length):
                ret, frame = cam.read()
                image, results = mediapipe_detector(frame, holistic)
                draw_landmarks(image, results)

                # Convert 0-based index to 1-based for display
                display_sequence = sequence + 1
                display_frame = frame_no + 1

                # Display start of collection message
                if frame_no == 0:
                    cv2.putText(image, '--- STARTING COLLECTION ---', (50, 100),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, f'Collecting frames for "{action}" | Video #{display_sequence}', (50, 150),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2, cv2.LINE_AA)
                    cv2.imshow('Python Camera', image)
                    cv2.waitKey(2000)  # Pause for 2 seconds
                else:
                    cv2.putText(image, f'Collecting "{action}" | Video #{display_sequence} | Frame {display_frame}', (50, 100),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2, cv2.LINE_AA)
                    cv2.imshow('Python Camera', image)

                # Save keypoints
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_no))
                np.save(npy_path, keypoints)

                # Break on 'q'
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break

cam.release()
cv2.destroyAllWindows()
cv2.waitKey(1)


In [None]:
cam.release()
cv2.destroyAllWindows('Python Camera')
cv2.waitKey(1)

# Labeling the data using SKlearn

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [None]:
label_map={label: num for num,label in enumerate(actions)}
print(label_map)

{'Hi': 0, 'I Love You': 1, 'How Are You': 2, 'Good Morning': 3, 'What Is Your Name': 4, 'Sorry': 5, 'Thank You': 6, 'India': 7}


In [None]:
sequences, labels = [], []
for action in actions:
    for sequence in range(no_of_sequences):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [None]:
X=np.array(sequences)

In [None]:
X.shape

(320, 45, 126)

In [None]:
y = to_categorical(labels).astype(int)

In [None]:
y

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.05)

In [None]:
type(X_train)

numpy.ndarray

# Making the Actual model

In [None]:
import tensorflow as tf
print(tf.test.is_gpu_available())

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
True


In [None]:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense
from tensorflow.keras.callbacks import TensorBoard

In [None]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, BatchNormalization
#from tensorflow.keras.optimizers import Adam

# Define the model
model = Sequential()

# LSTM layers with a moderate number of units, using Batch Normalization
model.add(LSTM(64, return_sequences=True, input_shape=(30, 258)))  # 30 frames, 258 features
model.add(BatchNormalization())
model.add(LSTM(128,return_sequences=True))  # No return_sequences since we're only interested in final prediction
model.add(BatchNormalization())
model.add(LSTM(64))
model.add(BatchNormalization())
# Dense layers for classification
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(4, activation='softmax'))  # Output layer for 4 gesture categories

# Compile the model
optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=1e-4, clipnorm=1.0)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])



In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU, BatchNormalization, Bidirectional

# Define the model
model = Sequential()

# 2 DNN Layers
#model.add(Dense(64, activation='relu', input_shape=(45, 1662)))
model.add(Dense(128, activation='relu',input_shape=(45, 126)))
model.add(Dense(64, activation='relu'))
# 3 Bidirectional GRU Layers
model.add(Bidirectional(GRU(128, return_sequences=True)))
#model.add(BatchNormalization())  # Between GRU layers

model.add(GRU(64, return_sequences=True))
#model.add(BatchNormalization())

model.add(GRU(32))
#model.add(BatchNormalization())

# Final Dense Layer for Classification
model.add(Dense(32,activation='relu'))
model.add(Dense(8, activation='softmax'))  # Change to 8 if you have 8 classes

# Compile the model
model.compile(optimizer='adam',loss='categorical_crossentropy',  metrics=['accuracy'])

# Model summary
model.summary()


In [None]:
optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=1e-4)

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'],)

NameError: name 'model' is not defined

In [None]:
model.fit(X_train, y_train, epochs=50,batch_size=4,callbacks=[tb_callback])

In [None]:
del model

In [None]:
model=tf.keras.models.load_model("UmaKeshav.h5")

In [None]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 45, 128)           16256     
                                                                 
 dense_3 (Dense)             (None, 45, 64)            8256      
                                                                 
 bidirectional (Bidirection  (None, 45, 256)           148992    
 al)                                                             
                                                                 
 gru_3 (GRU)                 (None, 45, 64)            61824     
                                                                 
 gru_4 (GRU)                 (None, 32)                9408      
                                                                 
 dense_4 (Dense)             (None, 32)                1056      
                                                      

In [None]:
res = model.predict(X_test)



In [None]:

for i in range(16):
    print(actions[np.argmax(res[i])],end="-->")
    print(actions[np.argmax(y_test[i])])

I Love You-->I Love You
How Are You-->How Are You
Hi-->Hi
Good Morning-->Good Morning
Sorry-->Sorry
I Love You-->I Love You
India-->India
India-->India
Hi-->I Love You
Sorry-->Sorry
Good Morning-->How Are You
How Are You-->How Are You
I Love You-->I Love You
India-->India
Sorry-->Sorry
Hi-->Hi


In [None]:
eval_result = model.evaluate(X_test, y_test)
#print(f"Test Loss: {eval_result[0]}, Test Accuracy: {eval_result[1]}")



In [None]:
for i in range(16):
    print(actions[np.argmax(y_test[i])])



In [None]:
model.save("UmaKeshav.h5")

# Real Time


In [None]:
colors = [
    (245, 117, 16),  # Orange
    (117, 245, 16),  # Green
    (16, 117, 245),  # Blue
    (245, 16, 117),  # Pink
    (117, 16, 245),  # Purple
    (16, 245, 117),  # Teal
    (245, 245, 16),  # Yellow
    (117, 16, 245),  # Purple
]
def prob_viz_top(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    top_idx = np.argmax(res)  # Index of the highest probability
    top_prob = res[top_idx]  # Value of the highest probability

    # Display the top prediction and its probability
    cv2.rectangle(output_frame, (0, 60), (int(top_prob * 300), 100), colors[top_idx], -1)
    cv2.putText(output_frame, f"{actions[top_idx]}: {top_prob:.2f}", (10, 90),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

    return output_frame


In [None]:
colors = [
    (245, 117, 16),  # Orange
    (117, 245, 16),  # Green
    (16, 117, 245),  # Blue
    (245, 16, 117),  # Pink
    (117, 16, 245),  # Purple
    (16, 245, 117),  # Teal
    (245, 245, 16),  # Yellow
    (117, 16, 245),  # Purple
]

def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)

    return output_frame

In [None]:
sequence = []
sentence = []
predictions = []
threshold = 0.5

cam = cv2.VideoCapture(0)
# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cam.isOpened():

        # Read feed
        ret, frame = cam.read()

        # Make detections
        image, results = mediapipe_detector(frame, holistic)
        #print(results)

        # Draw landmarks
        draw_landmarks(image, results)

        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-45:]

        if len(sequence) == 45:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            #print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))


        #3. Viz logic
            if np.unique(predictions[-10:])[0]==np.argmax(res):
                if res[np.argmax(res)] > threshold:

                    if len(sentence) > 0:
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5:
                sentence = sentence[-5:]

            # Viz probabilities
            image = prob_viz(res, actions, image, colors)

        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30),
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cam.release()
    cv2.destroyAllWindows()
    cv2.waitKey(1)

In [None]:
cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)

In [None]:
import tensorflow as tf

converter = tf.lite.TFLiteConverter.from_keras_model('UmaKeshav.h5')
tflite_model = converter.convert()

with open('gesture_model.tflite', 'wb') as f:
    f.write(tflite_model)


AttributeError: 'str' object has no attribute 'call'