# pip installs

In [None]:
pip install tensorflow

In [None]:
pip install joblib

In [None]:
pip install win10toast

In [None]:
pip install opencv-python

In [None]:
pip install matplotlib

In [None]:
pip install sklearn

In [None]:
pip install mediapipe

# Import

In [1]:
import numpy as np
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
import time
import os
import mediapipe as mp #trial: for now we will try using mediapipe as pretrained model
import math as m
%matplotlib inline

In [2]:
#using holistic model to additionally detect face landmark rather than pose only
#if it turns out too heavy, then switch holistic to pose in the future
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils #for drawing the landmark to the screen (opencv)

In [3]:
def draw_points(img, holisticOut, mp_holistic):
    '''
    Draw the detected result to opencv bgr image
    
    image: A three channel BGR image represented as numpy ndarray.
    holisticOut: the detected result of the holistic model
    
    no return, since img.flags.writeable is assumed to be True (from the mp_predict() below)
    '''
    #draw all: face, pose (body), right and left hand
    
    #skip the face part, because we will draw from long range
    mp_drawing.draw_landmarks(img, holisticOut.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
                             mp_drawing.DrawingSpec(color=(80,112,4), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(4,176,176), thickness=1, circle_radius=1)
                             ) # face'''
    mp_drawing.draw_landmarks(img, holisticOut.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(3,37,205), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(3,158,205), thickness=2, circle_radius=2)
                             ) # pose
    
    #left and right hand
    mp_drawing.draw_landmarks(img, holisticOut.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                              mp_drawing.DrawingSpec(color=(255,64,90), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(255,154,167), thickness=2, circle_radius=2)
                             ) 
    mp_drawing.draw_landmarks(img, holisticOut.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(255,123,21), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(255,192,144), thickness=2, circle_radius=2)
                             ) 
    '''
    if(holisticOut.pose_landmarks):
        h,w = img.shape[:2]
        lm = holisticOut.pose_landmarks
        lmPose = mp_holistic.PoseLandmark
        cv2.line(img, (int(lm.landmark[lmPose.LEFT_SHOULDER].x * w), int(lm.landmark[lmPose.LEFT_SHOULDER].y * h)), (int(lm.landmark[lmPose.LEFT_EAR].x * w), int(lm.landmark[lmPose.LEFT_EAR].y * h)), (3,158,205), 2)
        #cv2.line(img, (int(lm.landmark[lmPose.LEFT_SHOULDER].x * w), int(lm.landmark[lmPose.LEFT_SHOULDER].y * h)), (int(lm.landmark[lmPose.LEFT_SHOULDER].x * w), int(lm.landmark[lmPose.LEFT_SHOULDER].y * h) - 100), (3,158,205), 2)
        #cv2.line(img, (int(lm.landmark[lmPose.LEFT_HIP].x * w), int(lm.landmark[lmPose.LEFT_HIP].y * h)), (int(lm.landmark[lmPose.LEFT_SHOULDER].x * w), int(lm.landmark[lmPose.LEFT_SHOULDER].y * h)), (3,158,205), 2)
        #cv2.line(img, (int(lm.landmark[lmPose.LEFT_HIP].x * w), int(lm.landmark[lmPose.LEFT_HIP].y * h)), (int(lm.landmark[lmPose.LEFT_HIP].x * w), int(lm.landmark[lmPose.LEFT_HIP].y * h) - 100), (3,158,205), 2)
    '''

In [4]:
def mp_predict(img, holistic):
    '''
    launched one cycle of holistic prediction in mediapipe
    
    image: A three channel BGR image represented as numpy ndarray.
    holistic: model 
    '''
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #mediapipe works in rgb, convert first
    
    #beware! Image in Opencv is passed by reference, any modification to the data will change it
    #use img.flags.writeable = False to turn off
    img.flags.writeable = False                  
    holisticOut = holistic.process(img)                   # Make prediction, requires rgb
    img.flags.writeable = True
    
    #convert back
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    return img, holisticOut #holisticOut will be passed to the draw_points

In [5]:
#helper functions
def findDistance(x1,y1,x2,y2):
    dist = m.sqrt((x2-x1)**2 + (y2-y1)**2)
    return dist
def findAngle(x1, y1, x2, y2):
    theta = m.acos( (y2 -y1)*(-y1) / (m.sqrt((x2 - x1)**2 + (y2 - y1)**2 ) * y1))
    #degree = int(180/m.pi)*theta
    #return degree
    return theta

In [11]:
cap = cv2.VideoCapture(0) #create opencv capture object

holistic_params = {
    'min_detection_confidence' : 0.5, 
    'min_tracking_confidence': 0.5
}
firstTime = True
with mp_holistic.Holistic(min_tracking_confidence=holistic_params['min_tracking_confidence'], min_detection_confidence=holistic_params['min_detection_confidence']) as holistic:
    while cap.isOpened():
        ret, frame = cap.read() #read one image in webcam
        #print(frame.shape)
        if firstTime:
            print(f'width = {cap.get(3)}')
            print(f'height = {cap.get(4)}')
            firstTime = False
        # begin detecting pipeline
        img, holisticOut = mp_predict(frame, holistic)
        draw_points(img, holisticOut, mp_holistic)

        cv2.imshow('Holistic Predictions', img) #return to OpenCV Drawing window
        
        # stop opencv
        if cv2.waitKey(10) == ord('q'):
            break
            
    #opencv release pipeline
    cap.release()
    cv2.destroyAllWindows()

width = 640.0
height = 480.0


#checking how many points are ther ein holisticOut
print(len(holisticOut.face_landmarks.landmark)) #468 points
print(len(holisticOut.pose_landmarks.landmark)) #33 points
#print(len(holisticOut.left_hand_landmarks)) #21 points
print(len(holisticOut.right_hand_landmarks.landmark)) #21 points

#Beware! If it doesn't detect any, return shape will be None

In [6]:
def extract_point_data(holisticOut, mp_holistic, h, w):
    #will be in order of [pose, face, left_hand, right_hand]
    #face  468 * [x,y,z]
    #pose 33 * [x,y,z,visibility]
    #left hand right hand 21 [x,y,z]
    pose_data = np.array([[hOut.x, hOut.y, hOut.z, hOut.visibility] for hOut in holisticOut.pose_landmarks.landmark]).flatten() if holisticOut.pose_landmarks else np.zeros(33*4)
    face_data = np.array([[hOut.x, hOut.y, hOut.z] for hOut in holisticOut.face_landmarks.landmark]).flatten() if holisticOut.face_landmarks else np.zeros(468*3)
    lh_data = np.array([[hOut.x, hOut.y, hOut.z] for hOut in holisticOut.left_hand_landmarks.landmark]).flatten() if holisticOut.left_hand_landmarks else np.zeros(21*3)
    rh_data = np.array([[hOut.x, hOut.y, hOut.z] for hOut in holisticOut.right_hand_landmarks.landmark]).flatten() if holisticOut.right_hand_landmarks else np.zeros(21*3)
    lm = holisticOut.pose_landmarks
    lmPose = mp_holistic.PoseLandmark
    neck_angle_data = np.array([findAngle(int(lm.landmark[lmPose.LEFT_SHOULDER].x * w), int(lm.landmark[lmPose.LEFT_SHOULDER].y * h), int(lm.landmark[lmPose.LEFT_EAR].x * w), int(lm.landmark[lmPose.LEFT_EAR].y * h))]) if holisticOut.pose_landmarks else np.zeros(1)
    return np.concatenate([pose_data, face_data, lh_data, rh_data])
    #return np.concatenate([pose_data, lh_data, rh_data, neck_angle_data])

## Data Collection

In [7]:
#BodyPostureDetection
actions = np.array(['good', 'hand_on_cheek', 'hand_on_forehead'])
# Path for exported data, numpy arrays
datasets_origin_path = os.path.join('Datasets') 
# for amount of data collection, we collect 30 videos per actions, and we have 30 frame per videos
number_of_videos = 250
frame_per_videos = 30
print(datasets_origin_path)

Datasets


In [8]:
#prepare folders, os.mkdir cant create multilevel directory, so make one at one time

#start from datasets_origin_path
if not(os.path.exists(datasets_origin_path)):
    # create the directory you want to save to
    os.mkdir(datasets_origin_path)

#iterate from each file
for action in actions:
    curPath = os.path.join(datasets_origin_path, action)
    if not(os.path.exists(curPath)):
        os.mkdir(curPath)
    for video_idx in range(number_of_videos):
        curPath = os.path.join(datasets_origin_path, action, str(video_idx))
        if not(os.path.exists(curPath)):
            os.mkdir(curPath)
        #for frame_idx in range(frame_per_videos):
            #curPath = os.path.join(datasets_origin_path, action, str(video_idx), str(frame_idx))
            #if not(os.path.exists(curPath)):
                #os.mkdir(curPath)

In [13]:
filenamingstart = 250
#prepare folders, os.mkdir cant create multilevel directory, so make one at one time

#start from datasets_origin_path
if not(os.path.exists(datasets_origin_path)):
    # create the directory you want to save to
    os.mkdir(datasets_origin_path)

#iterate from each file
for action in actions:
    curPath = os.path.join(datasets_origin_path, action)
    if not(os.path.exists(curPath)):
        os.mkdir(curPath)
    for video_idx in range(number_of_videos):
        curPath = os.path.join(datasets_origin_path, action, str(video_idx+filenamingstart))
        if not(os.path.exists(curPath)):
            os.mkdir(curPath)
        #for frame_idx in range(frame_per_videos):
            #curPath = os.path.join(datasets_origin_path, action, str(video_idx), str(frame_idx))
            #if not(os.path.exists(curPath)):
                #os.mkdir(curPath)

In [None]:
cap = cv2.VideoCapture(0) 
customBreak = False
from win10toast import ToastNotifier
toast = ToastNotifier()

framerate = 4410
play_time_seconds = 1

with mp_holistic.Holistic(min_detection_confidence=holistic_params['min_detection_confidence'], min_tracking_confidence=holistic_params['min_tracking_confidence']) as holistic:
    for action in actions:
        #videos and frames start counting from 0
        for video_idx in range(number_of_videos):#number of videos
            for frame_idx in range(frame_per_videos):#number of frames per videos
                ret, frame = cap.read() #read one image in webcam
                img, holisticOut = mp_predict(frame, holistic)
                draw_points(img, holisticOut, mp_holistic)
                '''
                format for putText(...):
                    img: bgr np.array, opencv image
                    txt: what text to print to the screen
                    org: coordinates of the bottom-left corner of the text string in the image: (X, Y).
                    font: font enumerations
                    fontScale: Font scale factor that is multiplied by the font-specific base size.
                    color: It is the color of text string to be drawn. BGR.
                    thickness: It is the thickness of the line in px.
                    lineType: methods of text printing: use antialiased line to reduce pixelation
                '''
                #print instruction to the screen
                if frame_idx == 0:
                    toast.show_toast('Train done', 'done!', duration = 2, threaded = True)
                    cv2.putText(img, f'STARTING COLLECTION {action}', (90,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(img, f'Current Action: {action}, Video Number: {video_idx}', (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    
                    cv2.imshow('Data Collection Process', img)
                    cv2.waitKey(4000)
                    ret, frame = cap.read() #read one image in webcam
                    img, holisticOut = mp_predict(frame, holistic)
                    draw_points(img, holisticOut, mp_holistic)
                else: 
                    cv2.putText(img, f'Current Action: {action}, Video Number: {video_idx}', (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    cv2.imshow('Data Collection Process', img)
                
                #saving data
                h,w = img.shape[:2]
                np_point_data = extract_point_data(holisticOut, mp_holistic, h, w) #np array that has been flattened for result
                full_path = os.path.join(datasets_origin_path, action, str(video_idx), str(frame_idx))
                #print(np_point_data)
                np.save(full_path, np_point_data) #syntax: np.save(path_for_file, np_array)

                # stop opencv
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    customBreak = True
                    break
            if customBreak:
                break
        if customBreak:
            break
                    
    cap.release()
    cv2.destroyAllWindows()

In [16]:
cap = cv2.VideoCapture(0) 
customBreak = False
from win10toast import ToastNotifier
toast = ToastNotifier()
filenamingstart = 250
framerate = 4410
play_time_seconds = 1
with mp_holistic.Holistic(min_detection_confidence=holistic_params['min_detection_confidence'], min_tracking_confidence=holistic_params['min_tracking_confidence']) as holistic:
    action = actions[2]
    toast.show_toast('Train begin', f'{action}', duration = 2, threaded = True)
    cv2.waitKey(4000)
    #videos and frames start counting from 0
    for video_idx in range(number_of_videos):#number of videos
        for frame_idx in range(frame_per_videos):#number of frames per videos
            ret, frame = cap.read() #read one image in webcam
            img, holisticOut = mp_predict(frame, holistic)
            draw_points(img, holisticOut, mp_holistic)
            '''
            format for putText(...):
                img: bgr np.array, opencv image
                txt: what text to print to the screen
                org: coordinates of the bottom-left corner of the text string in the image: (X, Y).
                font: font enumerations
                fontScale: Font scale factor that is multiplied by the font-specific base size.
                color: It is the color of text string to be drawn. BGR.
                thickness: It is the thickness of the line in px.
                lineType: methods of text printing: use antialiased line to reduce pixelation
            '''
            #print instruction to the screen
            '''
            if frame_idx == 0:
                toast.show_toast('Train done', 'done!', duration = 2, threaded = True)
                cv2.putText(img, f'STARTING COLLECTION {action}', (90,200), 
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                cv2.imshow('Data Collection Process', img)
                cv2.waitKey(2000)
                ret, frame = cap.read() #read one image in webcam
                img, holisticOut = mp_predict(frame, holistic)
                draw_points(img, holisticOut, mp_holistic)
            '''
            cv2.putText(img, f'Current Action: {action}, Video Number: {video_idx+filenamingstart}', (15,12), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
            cv2.imshow('Data Collection Process', img)
            #saving data
            h,w = img.shape[:2]
            np_point_data = extract_point_data(holisticOut, mp_holistic, h, w) #np array that has been flattened for result
            full_path = os.path.join(datasets_origin_path, action, str(video_idx+filenamingstart), str(frame_idx))
            #print(np_point_data)
            np.save(full_path, np_point_data) #syntax: np.save(path_for_file, np_array)

            # stop opencv
            if cv2.waitKey(10) & 0xFF == ord('q'):
                customBreak = True
                break
        if customBreak:
            break
    toast.show_toast('Train done', 'done!', duration = 2, threaded = True)         
    cap.release()
    cv2.destroyAllWindows()

# Preprocess Data, Output labelling

In [17]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [18]:
label_map = {label:num for num, label in enumerate(actions)}
label_map

{'good': 0, 'hand_on_cheek': 1, 'hand_on_forehead': 2}

In [19]:
sequences, labels = [], []
new_number_of_videos = 500
for action in actions:
    for sequence in range(new_number_of_videos):
        window = []
        for frame_idx in range(frame_per_videos):
            res = np.load(os.path.join(datasets_origin_path, action, str(sequence), f"{frame_idx}.npy"))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [20]:
X = np.array(sequences)
print(f'Xshape = {X.shape}')
print(f'Yshape = {np.array(labels).shape}')
y = to_categorical(labels).astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
print(f'shape of ytrain: {y_train.shape}')
print(f'shape of ytest: {y_test.shape}')

Xshape = (1500, 30, 1662)
Yshape = (1500,)
shape of ytrain: (1350, 3)
shape of ytest: (150, 3)


In [24]:
from sklearn.preprocessing import StandardScaler
import joblib
#preprocess data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(X_train.shape[0], -1)).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(X_test.shape[0], -1)).reshape(X_test.shape)
joblib.dump(scaler, 'scaler.skl') 

['scaler.skl']

# Model Trial

In [25]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import TensorBoard, EarlyStopping

In [26]:
def buildModel(optimizer_params = 'Adam', loss_params = 'categorical_crossentropy', metrics_params = ['categorical_accuracy']):
    model = Sequential()
    model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(X.shape[1],X.shape[2])))
    model.add(LSTM(128, return_sequences=True, activation='relu'))
    model.add(LSTM(64, return_sequences=False, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(actions.shape[0], activation='softmax'))
    model.compile(optimizer=optimizer_params, loss=loss_params, metrics=metrics_params)
    print(model.summary())
    return model

In [28]:
tf.keras.utils.set_random_seed(100)
model = buildModel()
log_path = os.path.join('Logs')
tensorboard = TensorBoard(log_dir=log_path)
#es = EarlyStopping(monitor = 'val_loss', patience = 10, restore_best_weights = True)
model.fit(X_train, y_train, epochs=150, callbacks=[tensorboard], validation_data=(X_test, y_test))
model.save('studyface_most.h5') #save the weights

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_6 (LSTM)               (None, 30, 64)            442112    
                                                                 
 lstm_7 (LSTM)               (None, 30, 128)           98816     
                                                                 
 lstm_8 (LSTM)               (None, 64)                49408     
                                                                 
 dense_6 (Dense)             (None, 64)                4160      
                                                                 
 dense_7 (Dense)             (None, 32)                2080      
                                                                 
 dense_8 (Dense)             (None, 3)                 99        
                                                                 
Total params: 596,675
Trainable params: 596,675
Non-tr

Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78/150
Epoch 79/150
Epoch 80/150
Epoch 81/150
Epoch 82/150
Epoch 83/150
Epoch 84/150
Epoch 85/150
Epoch 86/150
Epoch 87/150
Epoch 88/150
Epoch 89/150
Epoch 90/150
Epoch 91/150
Epoch 92/150


Epoch 93/150
Epoch 94/150
Epoch 95/150
Epoch 96/150
Epoch 97/150
Epoch 98/150
Epoch 99/150
Epoch 100/150
Epoch 101/150
Epoch 102/150
Epoch 103/150
Epoch 104/150
Epoch 105/150
Epoch 106/150
Epoch 107/150
Epoch 108/150
Epoch 109/150
Epoch 110/150
Epoch 111/150
Epoch 112/150
Epoch 113/150
Epoch 114/150
Epoch 115/150
Epoch 116/150
Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150
Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150
Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150


Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150


In [29]:
#load the weights
#del model
model = buildModel()
#model.load_weights('studyface_more.h5')
model.load_weights('studyface_most.h5')

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_9 (LSTM)               (None, 30, 64)            442112    
                                                                 
 lstm_10 (LSTM)              (None, 30, 128)           98816     
                                                                 
 lstm_11 (LSTM)              (None, 64)                49408     
                                                                 
 dense_9 (Dense)             (None, 64)                4160      
                                                                 
 dense_10 (Dense)            (None, 32)                2080      
                                                                 
 dense_11 (Dense)            (None, 3)                 99        
                                                                 
Total params: 596,675
Trainable params: 596,675
Non-tr

# Trying on test set

In [30]:
#testScaler = joblib.load('scaler.skl')

res = model.predict(X_test)
ans = []
true_ans = []
corr = 0
for i in range(res.shape[0]):
    ans.append(np.argmax(res[i]))
    true_ans.append(np.argmax(y_test[i]))
    if(ans[i] == true_ans[i]):
        corr+=1
print(corr)
print(res.shape[0])
print(corr/res.shape[0])

139
150
0.9266666666666666


# Test in real time

In [31]:
a = [1,2,3]
print(a[-3:])

[1, 2, 3]


In [32]:
colors = [(245,117,16), (117,245,16), (16,117,245), (100,100,100)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [35]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5
currentScaler = joblib.load('scaler.skl')
cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=holistic_params['min_detection_confidence'], min_tracking_confidence=holistic_params['min_tracking_confidence']) as holistic:
    while cap.isOpened():
        ret, frame = cap.read() #read one image in webcam
        
        img, holisticOut = mp_predict(frame, holistic)
        draw_points(img, holisticOut, mp_holistic)
        
        # 2. Prediction logic
        h,w = img.shape[:2]
        np_point_data = extract_point_data(holisticOut, mp_holistic, h, w)
        sequence.append(np_point_data)
        sequence = sequence[-frame_per_videos:]
        
        if len(sequence) == frame_per_videos:
            toBePredicted = np.expand_dims(sequence, axis = 0)
            toBePredicted = currentScaler.transform(toBePredicted.reshape(toBePredicted.shape[0], -1)).reshape(toBePredicted.shape)
            res = model.predict(toBePredicted, verbose = 0)[0] #we need to use [0] because dimens
            #print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))
            
            
        #3. Viz logic
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            img = prob_viz(res, actions, img, colors)
            
        cv2.rectangle(img, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(img, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', img)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
hand_on_cheek
hand_on_cheek
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
good
hand_on_cheek
hand_on_cheek
good
hand_on_forehead
good
good
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_cheek
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead

hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_cheek
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead
hand_on_forehead


In [34]:
#not normalized one
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=holistic_params['min_detection_confidence'], min_tracking_confidence=holistic_params['min_tracking_confidence']) as holistic:
    while cap.isOpened():
        ret, frame = cap.read() #read one image in webcam
        
        img, holisticOut = mp_predict(frame, holistic)
        draw_points(img, holisticOut, mp_holistic)
        
        # 2. Prediction logic
        h,w = img.shape[:2]
        np_point_data = extract_point_data(holisticOut, mp_holistic, h, w)
        sequence.append(np_point_data)
        sequence = sequence[-frame_per_videos:]
        
        if len(sequence) == frame_per_videos:
            res = model.predict(np.expand_dims(sequence, axis=0), verbose = 0)[0] #we need to use [0] because dimens
            #print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))
            
            
        #3. Viz logic
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            img = prob_viz(res, actions, img, colors)
            
        cv2.rectangle(img, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(img, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', img)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()