# globals



In [1]:
# lib
import os
import mediapipe as mp
import cv2
import numpy as np

# utils
from utils import actions,arg_max

# models & realTime
from pytorch_model import PytorchPredictor
from keras_model import KerasPredictor
# from RealTime_predector import RealTime


n_classes = len(actions)

WEIGHTS_PATH=os.path.join("..","..","sign_language_detection","ensemble","V1")
KERAS_WEIGHTS_PATH = os.path.join(WEIGHTS_PATH,"keras_weights","V1.h5")
TORCH_WEIGHTS_PATH = os.path.join(WEIGHTS_PATH,"pytorch_weights.tar")

# pytorch Model

In [2]:
pytorch_predictor = PytorchPredictor(path=TORCH_WEIGHTS_PATH)

# Keras model

In [3]:
keras_predictor = KerasPredictor(path=KERAS_WEIGHTS_PATH)

No GPU found
Num GPUs Available:  0


2022-03-10 18:01:01.133383: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-10 18:01:01.133924: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/mina/.local/lib/python3.8/site-packages/cv2/../../lib64:
2022-03-10 18:01:01.133964: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/mina/.local/lib/python3.8/site-packages/cv2/../../lib64:
2022-03-10 18:01:01.133997: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcufft.so.10'; dlerror: libcufft.so.

# Real Time

In [None]:
import cv2
import numpy as np
import mediapipe as mp

mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

num_hand_marks = 21
num_pose_marks = 33

pose_selected_landmarks = [
    [0,2,5,11,13,15,12,14,16],
    [0,2,4,5,8,9,12,13,16,17,20],
    [0,2,4,5,8,9,12,13,16,17,20],
]



def mediapipe_detection(image,model):
    image  = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image  = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)
    return image,results
    
    

class RealTime:
    def __init__(self, holistic, fsize=(512, 512)):
        self.fsize = fsize
        self.listed_frames = []
        self.holistic = holistic
        
        # current values
        self.frame_left_hand = None
        self.frame_right_hand = None
        
        # previous values
        self.last_frame_left_hand = None
        self.last_frame_right_hand = None
        
    def read_frame(self,frame):
        
        frame = cv2.resize(frame, self.fsize)
        image, results = mediapipe_detection(frame, self.holistic)
        self.draw_styled_landmarks(image, results)
        frame_left_hand, frame_right_hand = self.extract_keypoints(results,left=self.last_frame_left_hand,right=self.last_frame_right_hand)
        
        
        self.frame_left_hand = frame_left_hand.sum().round(2)
        self.frame_right_hand = frame_right_hand.sum().round(2)
        return frame,image
        
    
    def update_last_frame(self):
        self.last_frame_left_hand = self.frame_left_hand
        self.last_frame_right_hand = self.frame_right_hand
    
    def add_listed_frame(self, frame):
        self.listed_frames.append(frame)
    

    def considered_frame(self,right_hand_diff_threshold=0.5, left_hand_diff_threshold=0.5):
        try:
            right_hand_diff = np.abs(self.last_frame_right_hand - self.frame_right_hand).round(2)
            left_hand_diff = np.abs(self.last_frame_left_hand - self.frame_left_hand).round(2)

            if right_hand_diff >= right_hand_diff_threshold or left_hand_diff >= left_hand_diff_threshold:
                return True
            return False
        except:
            return True


    def extract_keypoints(self, results,left,right):
        
        # extract left hand
        if results.left_hand_landmarks:
            left_hand = np.array([ [res.x,res.y] for res in results.left_hand_landmarks.landmark ]).flatten()
        else:
            if type(left) == np.ndarray:
                left_hand = left
            else:
                left_hand = np.zeros(num_hand_marks*2)
            
            
        # extract right hand
        if results.right_hand_landmarks:
            right_hand = np.array([ [res.x,res.y] for res in results.right_hand_landmarks.landmark ]).flatten()
        else:
            if type(right) == np.ndarray:
                right_hand = right
            else:
                right_hand = np.zeros(num_hand_marks*2)
            
        
        return left_hand, right_hand

    def draw_styled_landmarks(self, image, results):
        # Draw pose connections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                                mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                ) 
        # Draw left hand connections
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                                mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                ) 
        # Draw right hand connections  
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                                mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                ) 
    
    def get_frames_indices(self, frames_no):
        self.listed_frames = self.listed_frames[1:]
        return np.linspace(0, len(self.listed_frames)-1, frames_no, dtype=np.int16)
    
    def truncate_listed_frames(self):
        self.listed_frames = []

    def __getitem__(self, idx):
        return self.listed_frames[idx]

    def __len__(self):
        return len(self.listed_frames)
    
    def get_data(self):
        return dict({
            "L": self.frame_left_hand,
            "R": self.frame_right_hand,
            "L2": self.last_frame_left_hand,
            "R2": self.last_frame_right_hand,
            "L-D": np.abs(self.last_frame_left_hand - self.frame_left_hand).round(2),
            "R-D": np.abs(self.last_frame_right_hand - self.frame_right_hand).round(2)
        })



In [4]:
holistic = mp.solutions.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)
fsize = (512,512)

real_time = RealTime(holistic, fsize)

# Predections

In [5]:
cap = cv2.VideoCapture(0)


def display_sentence(frame,sentence):
    cv2.rectangle(frame, (0,0), (640, 40), (245, 117, 16), -1)
    cv2.putText(frame, ' '.join(sentence), (3,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)


def dispay_probability(frame,data):
    TEXT_COLOR = (0,0,255)
    
    cv2.putText(frame, "L:"+str(data["L"]), (0, 85+0*40), cv2.FONT_HERSHEY_SIMPLEX, 1, TEXT_COLOR, 2, cv2.LINE_8)
    cv2.putText(frame, "R:"+str(data["R"]), (0, 85+1*40), cv2.FONT_HERSHEY_SIMPLEX, 1, TEXT_COLOR, 2, cv2.LINE_8)

    cv2.putText(frame, "L2:"+str(data["L2"]), (0, 85+4*40), cv2.FONT_HERSHEY_SIMPLEX, 1, TEXT_COLOR, 2, cv2.LINE_8)
    cv2.putText(frame, "R2:"+str(data["R2"]), (0, 85+5*40), cv2.FONT_HERSHEY_SIMPLEX, 1, TEXT_COLOR, 2, cv2.LINE_8)


    cv2.putText(frame, "L-D:"+str(data["L-D"]), (0, 85+8*40), cv2.FONT_HERSHEY_SIMPLEX, 1, TEXT_COLOR, 2, cv2.LINE_8)
    cv2.putText(frame, "R-D:"+str(data["R-D"]), (0, 85+9*40), cv2.FONT_HERSHEY_SIMPLEX, 1, TEXT_COLOR, 2, cv2.LINE_8)


def display_counters(frame,counter,discarded_frames):
    cv2.putText(frame, str(counter), (250, 85+5*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (100,250,150), 2, cv2.LINE_8)
    cv2.putText(frame, str(discarded_frames), (250, 85+6*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (100,250,150), 2, cv2.LINE_8)


sentence = []
predictions = []



counter = 0
discarded_frames = 0

while cap.isOpened():

    # Read feed
    ret, frame = cap.read()
    if(not ret):
        break
    
    frame,image = real_time.read_frame(frame)
    

    if real_time.considered_frame():
        counter += 1
        real_time.update_last_frame()
        discarded_frames = 0
        real_time.add_listed_frame(frame)
    else:
        discarded_frames += 1
        if discarded_frames == 10:
            counter = 0
            discarded_frames = 0
            if len(real_time) >= 16:
                frame_list = real_time.get_frames_indices(frames_no=16)

                for frame_idx in frame_list:
                    pytorch_predictor.add_frame(real_time[frame_idx])
                    keras_predictor.add_frame(real_time[frame_idx])

                res1 = pytorch_predictor.predict()
                res2 = keras_predictor.predict()
                res = res1 + res2
                arg_max = np.argmax(res)
                predictions.append(arg_max)
                predictions = predictions[-16:]
                print(predictions)
                real_time.truncate_listed_frames()
                sentence.append(actions[arg_max])
                sentence = sentence[-4:]
            else:
                real_time.truncate_listed_frames()


    display_sentence(image,sentence)
    dispay_probability(image,real_time.get_data())
    display_counters(image,counter,discarded_frames)


    cv2.imshow("Real-Time", image)

    if cv2.waitKey(50) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


[0]
[0, 3]
[0, 3, 3]
[0, 3, 3, 3]
[0, 3, 3, 3, 6]
[0, 3, 3, 3, 6, 2]
[0, 3, 3, 3, 6, 2, 3]
[0, 3, 3, 3, 6, 2, 3, 0]
[0, 3, 3, 3, 6, 2, 3, 0, 4]


In [6]:
cap.release()
cv2.destroyAllWindows()