In [1]:
!pip install tensorflow==2.13.0 opencv-python mediapipe pygame numpy

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import sys
import cv2
import numpy as np
import os
import time
import mediapipe as mp
import pygame

pygame 2.5.2 (SDL 2.28.3, Python 3.11.5)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [3]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [4]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [5]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [6]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

In [7]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data') 

# Actions that we try to detect. actions[0] == 'Guru1', actions[1] == 'Guru2' and so on...
actions = np.array(['Guru1', 'Guru2', 'Guru3', 'Guru4', 'Guru5', 'Guru6', 'Guru7', 'Guru8', 'Stand'])

# Videos are going to be 30 frames in length
sequence_length = 30

In [8]:
label_map = {label:num for num, label in enumerate(actions)}

In [9]:
label_map

{'Guru1': 0,
 'Guru2': 1,
 'Guru3': 2,
 'Guru4': 3,
 'Guru5': 4,
 'Guru6': 5,
 'Guru7': 6,
 'Guru8': 7,
 'Stand': 8}

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [11]:
# Once training is done, u do not have to train it again (no need to call the fit() finction). Just need to setup the model
# and compile it. then load the saved weights.
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [12]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [13]:
model.load_weights('action.h5')

# Prediction Cell

In [14]:
import time
sequence = []
threshold = 0.9
threshold1 = 0.8
threshold2 = 0.6

pygame.init()
pygame.mixer.init()
pose_achieved=pygame.mixer.Sound("pach.wav")
introduction=pygame.mixer.Sound("introduction.wav")
instructions = [None] * 8
instructions[0]=pygame.mixer.Sound("instruct1.wav")
instructions[1]=pygame.mixer.Sound("instruct2.wav")
instructions[2]=pygame.mixer.Sound("instruct3.wav")
instructions[3]=pygame.mixer.Sound("instruct4.wav")
instructions[4]=pygame.mixer.Sound("instruct5.wav")
instructions[5]=pygame.mixer.Sound("instruct6.wav")
instructions[6]=pygame.mixer.Sound("instruct7.wav")
instructions[7]=pygame.mixer.Sound("instruct8.wav")

BeepOnePerSec=pygame.mixer.Sound("BeepOnePerSec.wav")
BeepTwoPerSec=pygame.mixer.Sound("BeepTwoPerSec.wav")

startatstep = 0
stopatstep = 1
step = startatstep

# Once we call instructions.play() - the audio will start playing. And the code will move on to the
# next line.
introduction.play()
time.sleep(10)
instructions[step].play()
time.sleep(10)


cap = cv2.VideoCapture(0)
# Set mediapipe model, note  that we set a high bar for detection confidence. this is just
# saying that, we want the model to be absolutely sure that it detected a pose (with landmarks)
# before it claims victory. we would rather not  have a "match" than have it approximate. the
# goal here is to be as close to the pose as possible!
with mp_holistic.Holistic(min_detection_confidence=0.95) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections, this will call the model with the actions.h5 data from the training
        # and send us an array saying what was the match ratio for each of the poses it knows about.
        image, results = mediapipe_detection(frame, holistic)
        # print(results)
        
        # Draw landmarks, so the user can see it on screen. This won't be helpful for a blind user
        # this is just here for the developers/us to make sure the code is working correctly and 
        # landmarks are being detected.
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)
        
        # If we have captured 30 frames of data, use the last 30 frames for prediction
        # this is what makes our model detect the fluidity of motion... it will detect the way
        # we train it... 30 frames == 1 sec. So, our pose can be a 1 second video - which will be
        # matched with the trained data.
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(res)
            # Did we have a match?
            if res[step] > threshold:
                print(res)
                pose_achieved.play()
                cv2.waitKey(10000)
                # wait 10 seconds and then go to next step.
                step = step + 1
                if step <= stopatstep:
                    instructions[step].play()
                    time.sleep(10)
                    continue
                else:
                    # All steps are done. break from loop
                    break
            # Did we have a close match? If yes, beep faster.
            elif res[step] > threshold1:
                print(res)
                BeepTwoPerSec.play()
                cv2.waitKey(1000)
                BeepTwoPerSec.stop()
            # Did we have a rough match? Beep slower, so the user knows they are getting close.
            elif res[step] > threshold2:
                print(res)
                BeepOnePerSec.play()
                cv2.waitKey(1000)
                BeepOnePerSec.stop()
                
        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.0

[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.0

[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.02403913 0.08931383]
[0.4005169  0.11806531 0.08367512 0.10925855 0.05126428 0.08608708
 0.03777977 0.0

[3.5214610e-20 1.8634324e-13 1.0606599e-11 2.8321359e-18 5.4624190e-25
 3.2285115e-01 6.7714888e-01 3.0905869e-18 9.4602950e-13]
[1.5091843e-27 2.2461358e-16 2.5401797e-17 2.6300697e-23 3.7938661e-30
 4.1861254e-05 9.9995816e-01 1.0695165e-24 6.9527343e-18]
[5.3774307e-34 3.1018236e-19 1.0697709e-21 6.1875826e-29 4.9915893e-36
 1.1403295e-07 9.9999988e-01 1.8085844e-30 5.5927865e-22]
[0.0000000e+00 3.9435997e-21 2.4558241e-26 9.2290151e-35 0.0000000e+00
 5.8088929e-11 1.0000000e+00 7.4691455e-37 1.8329753e-25]
[0.0000000e+00 1.0000000e+00 4.8382522e-37 0.0000000e+00 0.0000000e+00
 1.3923905e-34 2.8429653e-26 0.0000000e+00 0.0000000e+00]
[4.0783055e-23 2.5804536e-20 2.3794290e-02 1.3113890e-02 6.3095082e-20
 1.7778466e-05 6.3505471e-01 3.2801926e-01 2.4212205e-19]
[0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
 0.000000e+00 5.312534e-26 0.000000e+00 0.000000e+00]
[0.0000000e+00 1.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
 0.0000000e+00 3.4857002e-22 0.0000

[2.4372202e-06 3.9261000e-08 9.9991381e-01 1.0364130e-05 4.4035969e-11
 5.4232940e-05 2.5466775e-07 1.3579579e-06 1.7518440e-05]
[2.0807310e-07 4.5514878e-07 9.9892920e-01 2.6958932e-07 2.0241712e-12
 1.0500343e-03 5.7185744e-06 3.8428585e-08 1.4067604e-05]
[4.20023753e-07 4.10272041e-03 9.95315313e-01 1.05814055e-11
 9.57147437e-19 5.81372587e-04 1.05151543e-07 9.15981375e-13
 1.68342040e-09]
[6.2882714e-06 5.7364502e-08 9.9996889e-01 1.1408175e-05 1.7733277e-12
 1.1025919e-05 1.5384556e-09 1.8889116e-06 3.1992940e-07]
[5.7624402e-05 2.2764031e-07 9.9594253e-01 9.6034310e-05 1.6458370e-09
 3.0863232e-03 8.0262662e-06 1.5047081e-05 7.9415529e-04]
[7.0312090e-06 8.8896576e-07 9.7480857e-01 1.8606745e-05 1.2638071e-09
 2.1091387e-02 5.2547344e-04 1.6663691e-06 3.5463015e-03]
[1.4804708e-05 1.2907128e-06 8.3407837e-01 1.1214137e-05 5.8339178e-10
 1.5554951e-01 1.4004277e-03 4.3654609e-07 8.9439088e-03]
[1.5105748e-05 2.3729428e-06 4.3062246e-01 2.9834871e-06 1.1780955e-10
 5.5061275e-01 3