In [2]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

In [3]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [4]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [5]:
def draw_styled_landmarks(image, results):
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [6]:
def view_detections():    
    cap = cv2.VideoCapture(0)
    # Set mediapipe model 
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        while cap.isOpened():

            # Read feed
            ret, frame = cap.read()

            # Make detections
            image, results = mediapipe_detection(frame, holistic)

            # Draw landmarks
            draw_styled_landmarks(image, results)

            # Show to screen
            cv2.imshow('OpenCV Feed', image)

            # Break gracefully
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
        cap.release()
        cv2.destroyAllWindows()

In [7]:
#extracting the x,y co-ordinates of landmarks detected by mediapipe and storing in numpy arrays, here left and right hands with the posture
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    left_hand = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    right_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose,left_hand,right_hand])

In [29]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data') 

# Actions that we try to detect | null implies none of the meaningful actions were detected in current frames
actions = np.array(['null','apply-throttle', 'release-throttle', 'apply-brake','release-brake'])

#next batch additional size
seq_batch = 5

#the size of last sequence of videos batch
dirmax = np.max(np.array([eval(i) for i in os.listdir(os.path.join(DATA_PATH, action))]))

# Thirty videos worth of data
sequence_limit = dirmax + seq_batch + 1

# Videos are going to be 30 frames in length
frame_limit = 30


In [30]:
#This block creates the respective folders to store the data(the numpy files)
# each action(ex: throttle-apply) -> video(sequence) -> frames(each one is a .np file containing x,y coordinates of the landmarks)
for action in actions:
    for sequence in range(dirmax+1,sequence_limit):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

In [39]:
actions[2:3]

array(['release-throttle'], dtype='<U16')

In [None]:
cap = cv2.VideoCapture(0)

is_recording = False  # Flag to track recording state
is_done = False  # Flag to track the initial pause

# Keyboard event handler
def key_press(key):
    global is_recording,is_done
    #click s to set specific flags
    if key == ord(' '):
        is_done = False
        is_recording = True

#copying the actions array to a temp array to slice to train for a few actions if required 
actions_temp = actions.copy()
actions_temp = actions_temp[2:3]
#Start value for sequence
sequence_limit = 20
start_seq = 15
#Intial values of frame and video number
sequence = start_seq
frame_num = 0
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        # Read feed
        ret, frame = cap.read()
        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        # Draw landmarks
        draw_styled_landmarks(image, results)
        # Display "Collecting..." message after pressing 's'
        if frame_num >=1 and is_recording:
            cv2.putText(image, "Collecting - frame {}".format(frame_num), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4,cv2.LINE_AA)
        if is_done:
            cv2.putText(image, 'action: {} | sequence: {} '.format(actions_temp[-1] if len(actions_temp) else 'empty',sequence), (20,20), 
                                   cv2.FONT_HERSHEY_SIMPLEX, 0.632, (0,255, 0), 4, cv2.LINE_AA)
        
        cv2.imshow('OpenCV Feed', image)
        #taking input from user
        key_selected = cv2.waitKey(1)
        # Store landmarks when 's' key is pressed
        key_press(key_selected)
        #click "s" to initiate video/frame collection
        if  is_recording and len(actions)>0:
                    keypoints = extract_keypoints(results)
                    npy_path = os.path.join(DATA_PATH, actions_temp[-1], str(sequence), str(frame_num))
                    np.save(npy_path, keypoints)
                    print('action: {} | sequence: {} | frame: {} '.format(actions_temp[-1] if len(actions_temp) else 'empty',sequence,frame_num))
                    frame_num += 1
                    if frame_num == frame_limit:
                        frame_num=0
                        is_recording = False
                        is_done = True
                        sequence+=1
                    if sequence==sequence_limit:
                        sequence=start_seq
                        actions_temp = actions_temp[:-1]
        elif key_selected == ord('q'):
            break
        else:
            pass
        
    cap.release()
    cv2.destroyAllWindows()

In [41]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [42]:
actions_temp = actions.copy()
actions_temp = actions_temp[0:3]
label_map = {label:num for num, label in enumerate(actions_temp)}

In [43]:
label_map

{'null': 0, 'apply-throttle': 1, 'release-throttle': 2}

In [45]:
sequences, labels = [], []
for action in actions_temp:
    for sequence in range(sequence_limit):
        window = []
        for frame_num in range(frame_limit):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [46]:
X = np.array(sequences)

In [47]:
y = to_categorical(labels).astype(int)

In [48]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

In [49]:
X.shape

(60, 30, 258)

In [50]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard


In [51]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [52]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30, 258)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions_temp.shape[0], activation='softmax'))

In [53]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [55]:
model.fit(X_train, y_train, epochs=540, callbacks=[tb_callback])

Epoch 1/540
Epoch 2/540
Epoch 3/540
Epoch 4/540
Epoch 5/540
Epoch 6/540
Epoch 7/540
Epoch 8/540
Epoch 9/540
Epoch 10/540
Epoch 11/540
Epoch 12/540
Epoch 13/540
Epoch 14/540
Epoch 15/540
Epoch 16/540
Epoch 17/540
Epoch 18/540
Epoch 19/540
Epoch 20/540
Epoch 21/540
Epoch 22/540
Epoch 23/540
Epoch 24/540
Epoch 25/540
Epoch 26/540
Epoch 27/540
Epoch 28/540
Epoch 29/540
Epoch 30/540
Epoch 31/540
Epoch 32/540
Epoch 33/540
Epoch 34/540
Epoch 35/540
Epoch 36/540
Epoch 37/540
Epoch 38/540
Epoch 39/540
Epoch 40/540
Epoch 41/540
Epoch 42/540
Epoch 43/540
Epoch 44/540
Epoch 45/540
Epoch 46/540
Epoch 47/540
Epoch 48/540
Epoch 49/540
Epoch 50/540
Epoch 51/540
Epoch 52/540
Epoch 53/540
Epoch 54/540
Epoch 55/540
Epoch 56/540
Epoch 57/540
Epoch 58/540
Epoch 59/540
Epoch 60/540
Epoch 61/540
Epoch 62/540
Epoch 63/540
Epoch 64/540
Epoch 65/540
Epoch 66/540
Epoch 67/540
Epoch 68/540
Epoch 69/540
Epoch 70/540
Epoch 71/540
Epoch 72/540
Epoch 73/540
Epoch 74/540


Epoch 75/540
Epoch 76/540
Epoch 77/540
Epoch 78/540
Epoch 79/540
Epoch 80/540
Epoch 81/540
Epoch 82/540
Epoch 83/540
Epoch 84/540
Epoch 85/540
Epoch 86/540
Epoch 87/540
Epoch 88/540
Epoch 89/540
Epoch 90/540
Epoch 91/540
Epoch 92/540
Epoch 93/540
Epoch 94/540
Epoch 95/540
Epoch 96/540
Epoch 97/540
Epoch 98/540
Epoch 99/540
Epoch 100/540
Epoch 101/540
Epoch 102/540
Epoch 103/540
Epoch 104/540
Epoch 105/540
Epoch 106/540
Epoch 107/540
Epoch 108/540
Epoch 109/540
Epoch 110/540
Epoch 111/540
Epoch 112/540
Epoch 113/540
Epoch 114/540
Epoch 115/540
Epoch 116/540
Epoch 117/540
Epoch 118/540
Epoch 119/540
Epoch 120/540
Epoch 121/540
Epoch 122/540
Epoch 123/540
Epoch 124/540
Epoch 125/540
Epoch 126/540
Epoch 127/540
Epoch 128/540
Epoch 129/540
Epoch 130/540
Epoch 131/540
Epoch 132/540
Epoch 133/540
Epoch 134/540
Epoch 135/540
Epoch 136/540
Epoch 137/540
Epoch 138/540
Epoch 139/540
Epoch 140/540
Epoch 141/540
Epoch 142/540
Epoch 143/540
Epoch 144/540
Epoch 145/540
Epoch 146/540
Epoch 147/540


Epoch 148/540
Epoch 149/540
Epoch 150/540
Epoch 151/540
Epoch 152/540
Epoch 153/540
Epoch 154/540
Epoch 155/540
Epoch 156/540
Epoch 157/540
Epoch 158/540
Epoch 159/540
Epoch 160/540
Epoch 161/540
Epoch 162/540
Epoch 163/540
Epoch 164/540
Epoch 165/540
Epoch 166/540
Epoch 167/540
Epoch 168/540
Epoch 169/540
Epoch 170/540
Epoch 171/540
Epoch 172/540
Epoch 173/540
Epoch 174/540
Epoch 175/540
Epoch 176/540
Epoch 177/540
Epoch 178/540
Epoch 179/540
Epoch 180/540
Epoch 181/540
Epoch 182/540
Epoch 183/540
Epoch 184/540
Epoch 185/540
Epoch 186/540
Epoch 187/540
Epoch 188/540
Epoch 189/540
Epoch 190/540
Epoch 191/540
Epoch 192/540
Epoch 193/540
Epoch 194/540
Epoch 195/540
Epoch 196/540
Epoch 197/540
Epoch 198/540
Epoch 199/540
Epoch 200/540
Epoch 201/540
Epoch 202/540
Epoch 203/540
Epoch 204/540
Epoch 205/540
Epoch 206/540
Epoch 207/540
Epoch 208/540
Epoch 209/540
Epoch 210/540
Epoch 211/540
Epoch 212/540
Epoch 213/540
Epoch 214/540
Epoch 215/540
Epoch 216/540
Epoch 217/540
Epoch 218/540
Epoch 

Epoch 221/540
Epoch 222/540
Epoch 223/540
Epoch 224/540
Epoch 225/540
Epoch 226/540
Epoch 227/540
Epoch 228/540
Epoch 229/540
Epoch 230/540
Epoch 231/540
Epoch 232/540
Epoch 233/540
Epoch 234/540
Epoch 235/540
Epoch 236/540
Epoch 237/540
Epoch 238/540
Epoch 239/540
Epoch 240/540
Epoch 241/540
Epoch 242/540
Epoch 243/540
Epoch 244/540
Epoch 245/540
Epoch 246/540
Epoch 247/540
Epoch 248/540
Epoch 249/540
Epoch 250/540
Epoch 251/540
Epoch 252/540
Epoch 253/540
Epoch 254/540
Epoch 255/540
Epoch 256/540
Epoch 257/540
Epoch 258/540
Epoch 259/540
Epoch 260/540
Epoch 261/540
Epoch 262/540
Epoch 263/540
Epoch 264/540
Epoch 265/540
Epoch 266/540
Epoch 267/540
Epoch 268/540
Epoch 269/540
Epoch 270/540
Epoch 271/540
Epoch 272/540
Epoch 273/540
Epoch 274/540
Epoch 275/540
Epoch 276/540
Epoch 277/540
Epoch 278/540
Epoch 279/540
Epoch 280/540
Epoch 281/540
Epoch 282/540
Epoch 283/540
Epoch 284/540
Epoch 285/540
Epoch 286/540
Epoch 287/540
Epoch 288/540
Epoch 289/540
Epoch 290/540
Epoch 291/540
Epoch 

Epoch 294/540
Epoch 295/540
Epoch 296/540
Epoch 297/540
Epoch 298/540
Epoch 299/540
Epoch 300/540
Epoch 301/540
Epoch 302/540
Epoch 303/540
Epoch 304/540
Epoch 305/540
Epoch 306/540
Epoch 307/540
Epoch 308/540
Epoch 309/540
Epoch 310/540
Epoch 311/540
Epoch 312/540
Epoch 313/540
Epoch 314/540
Epoch 315/540
Epoch 316/540
Epoch 317/540
Epoch 318/540
Epoch 319/540
Epoch 320/540
Epoch 321/540
Epoch 322/540
Epoch 323/540
Epoch 324/540
Epoch 325/540
Epoch 326/540
Epoch 327/540
Epoch 328/540
Epoch 329/540
Epoch 330/540
Epoch 331/540
Epoch 332/540
Epoch 333/540
Epoch 334/540
Epoch 335/540
Epoch 336/540
Epoch 337/540
Epoch 338/540
Epoch 339/540
Epoch 340/540
Epoch 341/540
Epoch 342/540
Epoch 343/540
Epoch 344/540
Epoch 345/540
Epoch 346/540
Epoch 347/540
Epoch 348/540
Epoch 349/540
Epoch 350/540
Epoch 351/540
Epoch 352/540
Epoch 353/540
Epoch 354/540
Epoch 355/540
Epoch 356/540
Epoch 357/540
Epoch 358/540
Epoch 359/540
Epoch 360/540
Epoch 361/540
Epoch 362/540
Epoch 363/540
Epoch 364/540
Epoch 

Epoch 367/540
Epoch 368/540
Epoch 369/540
Epoch 370/540
Epoch 371/540
Epoch 372/540
Epoch 373/540
Epoch 374/540
Epoch 375/540
Epoch 376/540
Epoch 377/540
Epoch 378/540
Epoch 379/540
Epoch 380/540
Epoch 381/540
Epoch 382/540
Epoch 383/540
Epoch 384/540
Epoch 385/540
Epoch 386/540
Epoch 387/540
Epoch 388/540
Epoch 389/540
Epoch 390/540
Epoch 391/540
Epoch 392/540
Epoch 393/540
Epoch 394/540
Epoch 395/540
Epoch 396/540
Epoch 397/540
Epoch 398/540
Epoch 399/540
Epoch 400/540
Epoch 401/540
Epoch 402/540
Epoch 403/540
Epoch 404/540
Epoch 405/540
Epoch 406/540
Epoch 407/540
Epoch 408/540
Epoch 409/540
Epoch 410/540
Epoch 411/540
Epoch 412/540
Epoch 413/540
Epoch 414/540
Epoch 415/540
Epoch 416/540
Epoch 417/540
Epoch 418/540
Epoch 419/540
Epoch 420/540
Epoch 421/540
Epoch 422/540
Epoch 423/540
Epoch 424/540
Epoch 425/540
Epoch 426/540
Epoch 427/540
Epoch 428/540
Epoch 429/540
Epoch 430/540
Epoch 431/540
Epoch 432/540
Epoch 433/540
Epoch 434/540
Epoch 435/540
Epoch 436/540
Epoch 437/540
Epoch 

Epoch 440/540
Epoch 441/540
Epoch 442/540
Epoch 443/540
Epoch 444/540
Epoch 445/540
Epoch 446/540
Epoch 447/540
Epoch 448/540
Epoch 449/540
Epoch 450/540
Epoch 451/540
Epoch 452/540
Epoch 453/540
Epoch 454/540
Epoch 455/540
Epoch 456/540
Epoch 457/540
Epoch 458/540
Epoch 459/540
Epoch 460/540
Epoch 461/540
Epoch 462/540
Epoch 463/540
Epoch 464/540
Epoch 465/540
Epoch 466/540
Epoch 467/540
Epoch 468/540
Epoch 469/540
Epoch 470/540
Epoch 471/540
Epoch 472/540
Epoch 473/540
Epoch 474/540
Epoch 475/540
Epoch 476/540
Epoch 477/540
Epoch 478/540
Epoch 479/540
Epoch 480/540
Epoch 481/540
Epoch 482/540
Epoch 483/540
Epoch 484/540
Epoch 485/540
Epoch 486/540
Epoch 487/540
Epoch 488/540
Epoch 489/540
Epoch 490/540
Epoch 491/540
Epoch 492/540
Epoch 493/540
Epoch 494/540
Epoch 495/540
Epoch 496/540
Epoch 497/540
Epoch 498/540
Epoch 499/540
Epoch 500/540
Epoch 501/540
Epoch 502/540
Epoch 503/540
Epoch 504/540
Epoch 505/540
Epoch 506/540
Epoch 507/540
Epoch 508/540
Epoch 509/540
Epoch 510/540
Epoch 

Epoch 513/540
Epoch 514/540
Epoch 515/540
Epoch 516/540
Epoch 517/540
Epoch 518/540
Epoch 519/540
Epoch 520/540
Epoch 521/540
Epoch 522/540
Epoch 523/540
Epoch 524/540
Epoch 525/540
Epoch 526/540
Epoch 527/540
Epoch 528/540
Epoch 529/540
Epoch 530/540
Epoch 531/540
Epoch 532/540
Epoch 533/540
Epoch 534/540
Epoch 535/540
Epoch 536/540
Epoch 537/540
Epoch 538/540
Epoch 539/540
Epoch 540/540


<keras.callbacks.History at 0x29fe8c3c3d0>

In [56]:
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 30, 64)            82688     
                                                                 
 lstm_1 (LSTM)               (None, 30, 128)           98816     
                                                                 
 lstm_2 (LSTM)               (None, 64)                49408     
                                                                 
 dense (Dense)               (None, 64)                4160      
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 3)                 99        
                                                                 
Total params: 237,251
Trainable params: 237,251
Non-trai

In [57]:
model.save('actions.h5')

In [58]:
model.load_weights('actions.h5')

In [59]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [60]:
yhat = model.predict(X_test)



In [61]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [62]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[1, 1],
        [0, 1]],

       [[1, 0],
        [1, 1]]], dtype=int64)

In [63]:
accuracy_score(ytrue, yhat)

0.6666666666666666

In [64]:
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [65]:
# 1. New detection variables
sequence = []
sentence = []
threshold = 0.75

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)

        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
#         sequence.insert(0,keypoints)
#         sequence = sequence[:30]
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            
            
        #3. Viz logic
            if res[np.argmax(res)] > threshold: 
                if len(sentence) > 0: 
                    if actions_temp[np.argmax(res)] != sentence[-1]:
                        sentence.append(actions_temp[np.argmax(res)])
                else:
                    sentence.append(actions_temp[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            image = prob_viz(res, actions_temp, image, colors)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()













