In [None]:
import os
import shutil
import numpy as np
from matplotlib import pyplot as plt
import cv2
import tensorflow as tf
import tensorflow.keras.layers as tfl
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, BatchNormalization, ReLU, Dropout, GRU, ConvLSTM2D, Conv3D, Flatten
from tensorflow.keras.utils import to_categorical
import mediapipe as mp
import pickle as pk
import glob
from collections import Counter


In [None]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [None]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return  image,results

In [None]:
def normalize_zscore(pose, face, lh, rh):
       
    m = pose.mean(axis=0)
    std  = pose.std(axis=0) + 1e-7
    pose = (pose - m)/std
    
    # print(pose.shape,m.shape,std.shape)
    
    m = face.mean(axis=0)
    std  = face.std(axis=0) + 1e-7
    face = (face - m)/std
    
    m = lh.mean(axis=0)
    std  = lh.std(axis=0) + 1e-7
    lh = (lh - m)/std
    
    m = rh.mean(axis=0)
    std  = rh.std(axis=0) + 1e-7
    rh = (rh - m)/std
    
    return pose, face, lh, rh

In [None]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y,res.z] for res in results.pose_landmarks.landmark]) if results.pose_landmarks else np.zeros(33*3)
    face = np.array([[res.x, res.y,res.z] for res in results.face_landmarks.landmark]) if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y,res.z] for res in results.left_hand_landmarks.landmark]) if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y,res.z] for res in results.right_hand_landmarks.landmark]) if results.right_hand_landmarks else np.zeros(21*3)

    pose, face, lh, rh = normalize_zscore(pose, face, lh, rh)

    return np.concatenate([pose.flatten(), face.flatten(), lh.flatten(), rh.flatten()])

In [None]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [None]:
holistic = mp_holistic.Holistic(min_detection_confidence=0.25, min_tracking_confidence=0.25) 


In [None]:
PATH="C:/Users/Abdelrahman Rashad/American_Sign_Language_Recognition-main"
def createFolders(videoName):
    if not os.path.exists(PATH+"/videos/"+videoName):
        os.makedirs(os.path.join(PATH, "videos",videoName))
        os.makedirs(os.path.join(PATH, "videosFiltered",videoName))
    

In [None]:
def extractKeyPointsFromVideo(videoName,typeofvideo='word'):
    if typeofvideo=='word':
        videoPath=PATH+"/all/"+videoName
    else:
        videoPath=PATH+"/MyTestVideos/"+videoName

    cap=cv2.VideoCapture(videoPath)
    count=0
    with mp_holistic.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5) as holistic:
        while True:
            success,frame=cap.read()
            print(count)
            if success:
                frame = cv2.resize(frame, (1920,1080), interpolation = cv2.INTER_AREA)
                image,results = mediapipe_detection(frame, holistic)
                eres = extract_keypoints(results)
                draw_styled_landmarks(frame, results)
                plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                plt.show()
                npy_path = os.path.join(PATH, "videos",videoName,str(count))
                count+=1
                np.save(npy_path, eres)
            else:
                break

        cap.release()
        cv2.destroyAllWindows()

        

In [None]:
sequence_length = 35

def filterKeyPointsUsingMotionDetection(videoName):
    initialState=None
    actionMagnitude=[]
    videoPath=PATH+"/all/"+videoName
    cap=cv2.VideoCapture(videoPath)
    while True:
        success,frame=cap.read()
        motion=0
        if success:

            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            gaussian_frame=cv2.GaussianBlur(gray_frame,(21,21),0)
            if initialState is None:
                initialState=gaussian_frame
                continue
            diff_frame=cv2.absdiff(initialState,gaussian_frame)
            actionMagnitude.append(diff_frame.sum())
            initialState=gaussian_frame
        else:
            break
    cap.release()
    cv2.destroyAllWindows()
    
    startIndex=0
    maxMagnitude=0

    for i in range(0,len(actionMagnitude)-sequence_length):
        sumActionMagnitude=sum(actionMagnitude[i:i+sequence_length])
        if(sumActionMagnitude>maxMagnitude):
            startIndex=i
            maxMagnitude=sumActionMagnitude
    print("Start Index = "+str(startIndex))
    print("Max Magnitude = "+str(maxMagnitude))
    for i in range(startIndex,startIndex+sequence_length):
        source = "{}/videos/{}/{}.npy".format(PATH,videoName,i)
        destination = "{}/videosFiltered/{}/{}.npy".format(PATH,videoName,str(i-startIndex)) #renamed to be all starting from 0
        shutil.copyfile(source, destination)
    

In [None]:
def loadKeyPoints(videoName,typeofvideo='word'): 
    allKeyPointsForOneVideo=[]
    if typeofvideo=='word':
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(PATH, "videosFiltered", videoName, "{}.npy".format(frame_num)))
            allKeyPointsForOneVideo.append(res)
        allKeyPointsForOneVideo=np.expand_dims(np.array(allKeyPointsForOneVideo), axis=0)
    else:
        allfiles=glob.glob("C:/Users/Abdelrahman Rashad/American_Sign_Language_Recognition-main/videos/{}/*.npy".format(videoName))
        for frame_num in range(len(allfiles)):
            res = np.load(os.path.join(PATH, "videos", videoName, "{}.npy".format(frame_num)))
            allKeyPointsForOneVideo.append(res)
        allKeyPointsForOneVideo=np.expand_dims(np.array(allKeyPointsForOneVideo), axis=0)
    return allKeyPointsForOneVideo

In [None]:
words=['Opaque','Red','Green','Yellow','Bright','Light-blue	','Colors','Pink','Women','Enemy','Son','Man','Away','Drawer','Born','Learn','Call','Skimmer','Bitter','Sweet milk','Milk','Water','Food','Argentina','Uruguay','Country','Last name','Where','Mock','Birthday','Breakfast','Photo','Hungry','Map','Coin','Music','Ship','None','Name','Patience','Perfume','Deaf','Trap','Rice','Barbecue','Candy','Chewing-gum','Spaghetti','Yogurt','Accept','Thanks','Shut down','Appear','To land','Catch','Help','Dance','Bathe','Buy','Copy','Run','Realize','Give','Find']
words=np.array(words)
len(words)

In [None]:
reconstructed_model = tf.keras.models.load_model("/Users/Abdelrahman Rashad/American_Sign_Language_Recognition-main/modelAsl/v2")

# Word Video

In [None]:
videoName="001_003_005.mp4"
createFolders(videoName)
extractKeyPointsFromVideo(videoName,typeofvideo="word")
filterKeyPointsUsingMotionDetection(videoName)
allKeyPointsForOneVideo=loadKeyPoints(videoName,typeofvideo="word") 
p = reconstructed_model.predict(allKeyPointsForOneVideo)

In [None]:
print(words[np.argmax(p)])

# Sentence Video

In [None]:
result=[]
videoName="RedGreenYello.mp4"
# extractKeyPointsFromVideo(videoName,typeofvideo="sentence")
allKeyPointsForOneVideo=loadKeyPoints(videoName,typeofvideo='sentence')
for i in range(0,allKeyPointsForOneVideo.shape[1]-sequence_length):
    p = reconstructed_model.predict(allKeyPointsForOneVideo[:,i:i+sequence_length,:])
    result.append(words[np.argmax(p)]) 

In [None]:
Counter(result)

In [None]:
sentence=[]
for i in Counter(result):
    if Counter(result)[i]>35:
        sentence.append(i)

FinalResult=' '.join(sentence)
FinalResult

# Live

In [None]:
sequence=[]
sentence=[]
threshold=0.96
cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.2,min_tracking_confidence=0.2) as holistic:
    
      while cap.isOpened():

        ret,frame=cap.read()

        image,results=mediapipe_detection(frame,holistic)

        draw_styled_landmarks(image,results)
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence=sequence[-35:]
        
        if len(sequence)==35:
            res=reconstructed_model.predict(np.expand_dims(sequence,axis=0))[0]
            print(words[np.argmax(res)])
            print(res[np.argmax(res)])
            if res[np.argmax(res)]>threshold:
                if len(sentence)>0:
                    if words[np.argmax(res)]!= sentence[-1]:
                        sentence.append(words[np.argmax(res)])
                else:
                    sentence.append(words[np.argmax(res)])
                    
            if len(sentence)>5:
                sentence=sentence[-5:]
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30),cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)    


        cv2.imshow('OpenCV Feed',image)

        if cv2.waitKey(10)& 0xFF==ord('q'):
            break;
      cap.release()
      cv2.destroyAllWindows()