<h1>sign laungauge interpertation</h1>



<h3>Import libraryes</h3>

In [8]:
import tensorflow as tf
import cv2
import matplotlib as plt
import numpy as np
import time
import mediapipe as mp

<h3>Recording frames & getting Landmarks</h3>


In [9]:
mp_hol=mp.solutions.holistic
mp_draw=mp.solutions.drawing_utils

In [10]:
def draw(frame,landmarks,mp_draw,mp_hol):
        mp_draw.draw_landmarks(frame,landmarks.face_landmarks,mp_hol.FACEMESH_CONTOURS,
                              mp_draw.DrawingSpec(color=(245,117,66), thickness=1, circle_radius=1),
                              mp_draw.DrawingSpec(color=(245,117,66), thickness=1, circle_radius=1))
        mp_draw.draw_landmarks(frame,landmarks.left_hand_landmarks,mp_hol.HAND_CONNECTIONS,
                              mp_draw.DrawingSpec(color=(245,117,66), thickness=1, circle_radius=1),
                              mp_draw.DrawingSpec(color=(245,117,66), thickness=1, circle_radius=1))
        mp_draw.draw_landmarks(frame,landmarks.right_hand_landmarks,mp_hol.HAND_CONNECTIONS,
                              mp_draw.DrawingSpec(color=(245,117,66), thickness=1, circle_radius=1),
                              mp_draw.DrawingSpec(color=(245,117,66), thickness=1, circle_radius=1))
        mp_draw.draw_landmarks(frame,landmarks.pose_landmarks,mp_hol.POSE_CONNECTIONS,
                              mp_draw.DrawingSpec(color=(245,117,66), thickness=1, circle_radius=1),
                              mp_draw.DrawingSpec(color=(245,117,66), thickness=1, circle_radius=1))

In [11]:
#just capture
cam=cv2.VideoCapture(0)

with mp_hol.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5) as hol:
    while cam.isOpened():
        ret,frame=cam.read()
        if not ret:
            break
        image=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
        landmarks=hol.process(image)
           
        #drawing on image
        draw(frame,landmarks,mp_draw,mp_hol)
        
        frame=cv2.flip(frame,1)
        cv2.imshow("capture",frame)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cam.release()
    cv2.destroyAllWindows()


<h3>Extracting landmarks</h3>

In [12]:

def extract_landmarks(landmarks):
    if landmarks.pose_landmarks:
        pose=np.array([[p.x,p.y,p.z,p.visibility] for p in landmarks.pose_landmarks.landmark]).flatten()
    else:
        pose=np.zeros(132,)
    if landmarks.left_hand_landmarks:
        left_hand=np.array([[p.x,p.y,p.z] for p in landmarks.left_hand_landmarks.landmark]).flatten()
    else:
        left_hand=np.zeros(63,)
    if landmarks.right_hand_landmarks:
        right_hand=np.array([[p.x,p.y,p.z] for p in landmarks.right_hand_landmarks.landmark]).flatten()
    else:
        right_hand=np.zeros(63,)
    if landmarks.face_landmarks:
        face=np.array([[p.x,p.y,p.z] for p in landmarks.face_landmarks.landmark]).flatten()
    else:
        face=np.zeros(1404,)

    return np.concatenate([face,pose,left_hand,right_hand])
        

In [32]:
print("pse shape: ",pose.shape,"r hand shape: ",right_hand.shape,"l hand shape: ",left_hand.shape,"face shape: ",face.shape)
extract_landmarks(landmarks).shape

pse shape:  (132,) r hand shape:  (63,) l hand shape:  (63,) face shape:  (1404,)


(1662,)

### collecting data

<h4>Making folders</h4>

In [13]:
import os

In [16]:
path_=os.path.join("Data")
no_frames=30
no_video=30
actions=["hello","thanks","-"]


In [17]:
for action in actions:
    for vid in range(no_video):
        try:
            os.makedirs(os.path.join(path_,action,str(vid)))
        except:
            pass

<h4>collecting frames</h4>

In [18]:
cam=cv2.VideoCapture(0)
flag=False
with mp_hol.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5) as hol:
    for action in actions:
        if flag:
                break
        for vid in range(no_video):
            if flag:
                break
            for frame_no in range(no_frames+1):
                
            
                ret,frame=cam.read()
                if not ret:
                    break
                image=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
                landmarks=hol.process(image)
                   
                #drawing on image
                draw(frame,landmarks,mp_draw,mp_hol)
                frame=cv2.flip(frame,1)
                
                if frame_no==0:
                    cv2.putText(frame,"Starting collection",(0,50),
                                cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,0),4,cv2.LINE_AA)
                    cv2.putText(frame,"collecting frame for {} video no {} Frame no {}".format(action,vid,frame_no),(120,20),
                                cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),1,cv2.LINE_AA)
                    cv2.imshow("capture",frame)
                    if cv2.waitKey(50000) or 0xFF == ord('c'): #change capture technique
                        continue
                else:
                    cv2.putText(frame,"collecting frame for {} video no {} Frame no {}".format(action,vid,frame_no),(120,20),
                                cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),1,cv2.LINE_AA)

                
                feature=extract_landmarks(landmarks)
                path=os.path.join(path_,action,str(vid),str(frame_no-1))
                np.save(path,feature)

                
                    
                cv2.imshow("capture",frame)
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    flag=True
                    break
        
    cam.release()
    cv2.destroyAllWindows()


In [38]:
cam.release()
cv2.destroyAllWindows()

<h3>Loading dataset</h3>

In [71]:
import os
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [102]:
label_map=[name for name in os.listdir(path_)] #getting label names

print(label_map)

['-', 'hello', 'thanks']


In [73]:
data=[]
labels=[]
for i,label in enumerate(label_map):
    for vid_no in range(no_video):
        vid=[]
        for frame_no in range(0,no_frames):
            res=np.load(os.path.join(path_,label,str(vid_no),str(frame_no)+".npy"))
            vid.append(res)
        data.append(vid)
        labels.append(i)
            
            

In [74]:
data=np.array(data)
labels=np.array(labels)

In [75]:
print(data.shape)
print(len(labels))


(90, 29, 1662)
90


In [76]:
labels=to_categorical(np.array(labels)).astype(int)

In [77]:
labels.shape

(90, 3)

In [78]:
xtrain,xtest,ytrain,ytest=train_test_split(data,labels,test_size=0.05)

In [79]:
ytest.shape

(5, 3)

<h3>Model Architecture</h3>

In [90]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [91]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [92]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(labels.shape[1], activation='softmax'))



In [93]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [84]:
model.fit(xtrain, ytrain, epochs=2000, callbacks=[tb_callback])

Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Epoch 8/2000
Epoch 9/2000
Epoch 10/2000
Epoch 11/2000
Epoch 12/2000
Epoch 13/2000
Epoch 14/2000
Epoch 15/2000
Epoch 16/2000
Epoch 17/2000
Epoch 18/2000
Epoch 19/2000
Epoch 20/2000
Epoch 21/2000
Epoch 22/2000
Epoch 23/2000
Epoch 24/2000
Epoch 25/2000
Epoch 26/2000
Epoch 27/2000
Epoch 28/2000
Epoch 29/2000
Epoch 30/2000
Epoch 31/2000
Epoch 32/2000
Epoch 33/2000
Epoch 34/2000
Epoch 35/2000
Epoch 36/2000
Epoch 37/2000
Epoch 38/2000
Epoch 39/2000
Epoch 40/2000
Epoch 41/2000
Epoch 42/2000
Epoch 43/2000
Epoch 44/2000
Epoch 45/2000
Epoch 46/2000
Epoch 47/2000
Epoch 48/2000
Epoch 49/2000

KeyboardInterrupt: 

In [85]:

model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_12 (LSTM)              (None, 29, 64)            442112    
                                                                 
 lstm_13 (LSTM)              (None, 29, 128)           98816     
                                                                 
 lstm_14 (LSTM)              (None, 64)                49408     
                                                                 
 dense_11 (Dense)            (None, 64)                4160      
                                                                 
 dense_12 (Dense)            (None, 32)                2080      
                                                                 
 dense_13 (Dense)            (None, 3)                 99        
                                                                 
Total params: 596,675
Trainable params: 596,675
Non-tr

<h3>saving</h3>

In [87]:
model.save(os.path.join("models","main1.h5"))

In [88]:
del model 

In [94]:
model.load_weights(os.path.join("models","main.h5"))

<h3>Evaluation</h3>

In [95]:
np.array([xtest[0]]).shape

(1, 29, 1662)

In [96]:
res=model.predict(xtest)



In [97]:
ytrue = np.argmax(ytest, axis=1).tolist()
res = np.argmax(res, axis=1).tolist()

In [98]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [99]:
multilabel_confusion_matrix(ytrue, res)

array([[[2, 0],
        [0, 3]],

       [[4, 0],
        [0, 1]],

       [[4, 0],
        [0, 1]]], dtype=int64)

In [100]:
accuracy_score(ytrue,res)

1.0

<h3>REALTIME TESTING</h3>

In [103]:
#just capture
cam=cv2.VideoCapture(0)
action=[]
text=[]
# predictions = []
trsh=0.9
res=np.array([0,0])

with mp_hol.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5) as hol:
    while cam.isOpened():
        ret,frame=cam.read()
        if not ret:
            break
        image=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
        landmarks=hol.process(image)

        points=extract_landmarks(landmarks)

        #getting 30 frames of action
        action.append(points)
        action=action[-30:]
        if len(action)==30:
            res=model.predict(np.expand_dims(action,axis=0))[0]
            # predictions.append(np.argmax(res))
            p_idx=np.argmax(res)
            # predictions=predictions[-10:]
            # print(predictions)
            # if np.unique(predictions)[-1]==np.argmax(res): 
               
            if  res[p_idx]>trsh:
                if len(text)>0:
                    if label_map[p_idx]!=text[-1]:
                        text.append(label_map[p_idx])
                else:
                    text.append(label_map[p_idx])

                
            
            if len(text)>5:
                text=text[-5:]
                
                
                
              
           
        #drawing on image
        draw(frame,landmarks,mp_draw,mp_hol)
        frame=cv2.flip(frame,1)

        cv2.rectangle(frame, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(frame, ' '.join(text), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.imshow("capture",frame)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cam.release()
    cv2.destroyAllWindows()




In [93]:
cam.release()
cv2.destroyAllWindows()
del res

In [94]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1
