In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

## 2. keypoints from dataset

In [2]:
mp_holistic = mp.solutions.holistic 
mp_drawing = mp.solutions.drawing_utils 

In [3]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
    image.flags.writeable = False                  
    results = model.process(image)                 
    image.flags.writeable = True                    
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 
    return image, results

In [4]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) 
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

In [1]:
def draw_styled_landmarks(image, results):

    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 

    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
 
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

## 3. Extract Keypoint Values

In [16]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, lh, rh])

In [17]:
result_test = extract_keypoints(results)
result_test.shape

(258,)

## 4. Setup Folders for Collection

In [18]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('keypoint_data_noface_2') 

# Actions that we try to detect
actions = np.array([ 
        "ali_ni_goy_amttai_ve", 
        "ger_buliin_bagts_baina_uu",
        "kheden_minutiin_daraa_garakh_ve",
        "naizuudiin_bagts_baina_uu",
        "no_action", 
        "sain_baina_uu",
        "salfetka_avay",
        "uuchlaarai",
        "00_khaana_ve"
     ])

# Ten videos worth of data
no_sequences = 99

# Videos are going to be 45 frames in length
sequence_length = 45

In [19]:
for action in actions: 
    for sequence in range(no_sequences):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

In [None]:
for action in actions:  
    for sequence in range(no_sequences):
        fileName = f"raw_data/{action}/{sequence+1}.mp4"
        cap = cv2.VideoCapture(fileName)
        with mp_holistic.Holistic(min_detection_confidence=0.8, min_tracking_confidence=0.8) as holistic:
            for frame_num in range(sequence_length):
                ret, frame = cap.read()
                image, results = mediapipe_detection(frame, holistic)
                draw_styled_landmarks(image, results)
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                print(npy_path)
                np.save(npy_path, keypoints)
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
            cap.release()
            cv2.destroyAllWindows()

In [17]:
cap.release()
cv2.destroyAllWindows()


## 5. Preprocessing

In [20]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

2022-04-01 18:19:52.663945: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/saruul/.local/lib/python3.9/site-packages/cv2/../../lib64:
2022-04-01 18:19:52.663966: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [21]:
label_map = {label:num for num, label in enumerate(actions)}

In [23]:
import os
print(os.path.abspath("."))

/home/saruul/dcomp/main/Mongolian-Handsign-Recognition


In [26]:
(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))

'keypoint_data_noface_2/00_khaana_ve/29/29.npy'

In [25]:
sequences, labels = [], []
for action in actions:
    for sequence in range(no_sequences):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [29]:
np.set_printoptions(threshold=np.inf)
dummy_data = np.load("keypoint_data_noface/00_khaana_ve/0/5.npy")
np.info(dummy_data)
print(dummy_data.ndim)
print(dummy_data.size)

class:  ndarray
shape:  (258,)
strides:  (8,)
itemsize:  8
aligned:  True
contiguous:  True
fortran:  True
data pointer: 0x56498b90d340
byteorder:  little
byteswap:  False
type: float64
1
258


In [30]:
X = np.array(sequences)

In [31]:
X.shape

(270, 30, 258)

In [32]:
y = to_categorical(labels).astype(int)

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [34]:
y_test.shape

(27, 9)

## 6. LSTM Neural Network

In [35]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
from tensorflow import keras

In [36]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [39]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,258)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [38]:
opt = keras.optimizers.Adam(learning_rate=0.001)

In [40]:
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [41]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_3 (LSTM)               (None, 30, 64)            82688     
                                                                 
 lstm_4 (LSTM)               (None, 30, 128)           98816     
                                                                 
 lstm_5 (LSTM)               (None, 64)                49408     
                                                                 
 dense_3 (Dense)             (None, 32)                2080      
                                                                 
 dense_4 (Dense)             (None, 16)                528       
                                                                 
 dense_5 (Dense)             (None, 9)                 153       
                                                                 
Total params: 233,673
Trainable params: 233,673
Non-tr

In [None]:
model.fit(X_train, y_train, batch_size=128, epochs=300, callbacks=[tb_callback])

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

<keras.callbacks.History at 0x7fa753225160>

## 7. Make Predictions

In [42]:
res = model.predict(X_test)

## 8. Save Weights

In [43]:
model.load_weights('batch128_ep300_noface.h5')

## 9. Evaluation using Confusion Matrix and Accuracy

In [44]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [45]:
yhat = model.predict(X_test)

In [46]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [47]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[25,  0],
        [ 0,  2]],

       [[23,  0],
        [ 0,  4]],

       [[24,  0],
        [ 1,  2]],

       [[25,  0],
        [ 0,  2]],

       [[24,  0],
        [ 0,  3]],

       [[25,  0],
        [ 0,  2]],

       [[22,  0],
        [ 0,  5]],

       [[23,  1],
        [ 0,  3]],

       [[24,  0],
        [ 0,  3]]])

In [48]:
accuracy_score(ytrue, yhat)

0.9629629629629629

## 10. Test in Real Time

In [49]:
from scipy import stats

In [47]:
def pro_vis(input_frame):
    return input_frame.copy() 

In [66]:
# We have no cyrillic supporting font in OpenCV Puttext.
# So we decided  to do mapping on latin text
cryl_mapping = {
"uuchlaarai": " ...",
"salfetka_avay": "салфетка авъя",
"ger_buliin_bagts_baina_uu": " гэр бүлийн багц байна уу",
"kheden_minutiin_daraa_garakh_ve": " хэдэн минутын дараа гарах вэ",
"00_khaana_ve": " 00 хаана вэ",
"ali_ni_goy_amttai_ve": " аль нь гоё амттай вэ",
"sain_baina_uu": " сайн байна уу",
"naizuudiin_bagts_baina_uu": " найзуудын багц байна уу",
"no_action": " ..."
}

In [67]:
import cv2
import datetime
import imutils
import numpy as np

classNames = []
classFile = "coco.names"
with open(classFile, 'rt') as f:
    classNames = f.read().rstrip("\n").split("\n")

configPath = "ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt"
weightsPath = "frozen_inference_graph.pb" 

net = cv2.dnn_DetectionModel(weightsPath, configPath)

net.setInputSize(320, 320)
net.setInputScale(1.0/127.5)
net.setInputMean((127.5, 127.5, 127.5))
net.setInputSwapRB(True) 


<dnn_Model 0x7f78205f5770>

In [68]:
import cv2
import datetime
import imutils
import numpy as np

sequence = []
sentence = []
predictions = []
threshold = 0.5
classNames = []

cap = cv2.VideoCapture(0)

classFile = "coco.names"
with open(classFile, 'rt') as f:
    classNames = f.read().rstrip("\n").split("\n")

configPath = "ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt"
weightsPath = "frozen_inference_graph.pb" 

net = cv2.dnn_DetectionModel(weightsPath, configPath)

net.setInputSize(320, 320)
net.setInputScale(1.0/127.5)
net.setInputMean((127.5, 127.5, 127.5))
net.setInputSwapRB(True) 

flag = 0 

with mp_holistic.Holistic(
    min_detection_confidence=0.8, 
    min_tracking_confidence=0.8) as holistic:
    while cap.isOpened():

        ret, frame = cap.read()
        
        image, results = mediapipe_detection(frame, holistic)
        # print(results)

        classIds, confs, bbox = net.detect(image, confThreshold=0.5)

        # print(classIds, confs, bbox)
        
        if len(classIds) != 0:
            for classId, confidence, box in zip(classIds.flatten(), confs.flatten(), bbox):
                cv2.rectangle(image, box, color=(0, 255, 0), thickness=1)
                flag = 1


        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))
            
            
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold and res[np.argmax(res)] < 0.8: 
                    
                    if len(sentence) > 0: 
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

        #if res[np.argmax(res)] > 0.6:
        # cv2.putText(image, "Conf: " + str(res[np.argmax(res)])[:4] + str(cryl_mapping[actions[np.argmax(res)]]), (15,30), 
                    #    cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
    
        cv2.putText(image, str(cryl_mapping[actions[np.argmax(res)]]), (box[0] + 10, box[1] + 30),
                    cv2.FONT_HERSHEY_COMPLEX, 1, (2, 255, 0), 2)
        #else:
        # cv2.putText(image, " ", (15,30), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        resize = cv2.resize(image, (800, 600)) 
    
        cv2.imshow('Real Time test', resize)

        # Break bitch
        if cv2.waitKey(10) & 0xFF == ord('C'):
            break
    cap.release()
    cv2.destroyAllWindows()
    cv2.waitKey(1)

sain_baina_uu
sain_baina_uu
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
uuchlaarai
uuchlaarai
uuchlaarai
sain_baina_uu
sain_baina_uu
sain_baina_uu
sain_baina_uu
uuchlaarai
uuchlaarai
ger_buliin_bagts_baina_uu
00_khaana_ve
00_khaana_ve
ger_buliin_bagts_baina_uu
ger_buliin_bagts_baina_uu
ger_buliin_bagts_baina_uu
ger_buliin_bagts_baina_uu
ger_buliin_bagts_baina_uu
ger_buliin_bagts_baina_uu
ger_buliin_bagts_baina_uu
ger_buliin_bagts_baina_uu
ger_buliin_bagts_baina_uu
ger_buliin_bagts_baina_uu
00_khaana_ve
ger_buliin_bagts_baina_uu
00_khaana_ve
ger_buliin_bagts_baina_uu
ger_buliin_bagts_baina_uu
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
ger_buliin_bagts_baina_uu
sain_baina_uu
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00_khaana_ve
00

In [None]:
cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)

-1