In [2]:
!pip install tensorflow==2.12.0 opencv-python mediapipe pygame numpy

Defaulting to user installation because normal site-packages is not writeable


In [3]:
import sys
import cv2
import numpy as np
import os
import time
import mediapipe as mp
import pygame

pygame 2.5.2 (SDL 2.28.3, Python 3.11.5)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [4]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [5]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [6]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [7]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

In [8]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data') 

# Actions that we try to detect. actions[0] == 'Guru1', actions[1] == 'Guru2' and so on...
actions = np.array(['Guru1', 'Guru2', 'Guru3', 'Guru4', 'Guru5', 'Guru6', 'Guru7', 'Guru8', 'Stand'])

# Videos are going to be 30 frames in length
sequence_length = 30

In [9]:
label_map = {label:num for num, label in enumerate(actions)}

In [10]:
label_map

{'Guru1': 0,
 'Guru2': 1,
 'Guru3': 2,
 'Guru4': 3,
 'Guru5': 4,
 'Guru6': 5,
 'Guru7': 6,
 'Guru8': 7,
 'Stand': 8}

In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [12]:
# Once training is done, u do not have to train it again (no need to call the fit() finction). Just need to setup the model
# and compile it. then load the saved weights.
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [13]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [14]:
model.load_weights('action.h5')

# Prediction Cell

In [16]:
sequence = []
threshold = 0.9
threshold1 = 0.8
threshold2 = 0.6

pygame.init()
pygame.mixer.init()
pose_achieved=pygame.mixer.Sound("pach.wav")
introduction=pygame.mixer.Sound("introduction.wav")
instructions = [None] * 8
instructions[0]=pygame.mixer.Sound("instruct1.wav")
instructions[1]=pygame.mixer.Sound("instruct2.wav")
instructions[2]=pygame.mixer.Sound("instruct3.wav")
instructions[3]=pygame.mixer.Sound("instruct4.wav")
instructions[4]=pygame.mixer.Sound("instruct5.wav")
instructions[5]=pygame.mixer.Sound("instruct6.wav")
instructions[6]=pygame.mixer.Sound("instruct7.wav")
instructions[7]=pygame.mixer.Sound("instruct8.wav")

BeepOnePerSec=pygame.mixer.Sound("BeepOnePerSec.wav")
BeepTwoPerSec=pygame.mixer.Sound("BeepTwoPerSec.wav")

start-at-step=0
stop-at-step=1
# Once we call instructions.play() - the audio will start playing. And the code will move on to the
# next line.
introduction.play()
instructions[step].play()

cap = cv2.VideoCapture(0)
# Set mediapipe model, note  that we set a high bar for detection confidence. this is just
# saying that, we want the model to be absolutely sure that it detected a pose (with landmarks)
# before it claims victory. we would rather not  have a "match" than have it approximate. the
# goal here is to be as close to the pose as possible!
with mp_holistic.Holistic(min_detection_confidence=0.95) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections, this will call the model with the actions.h5 data from the training
        # and send us an array saying what was the match ratio for each of the poses it knows about.
        image, results = mediapipe_detection(frame, holistic)
        # print(results)
        
        # Draw landmarks, so the user can see it on screen. This won't be helpful for a blind user
        # this is just here for the developers/us to make sure the code is working correctly and 
        # landmarks are being detected.
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)
        
        # If we have captured 30 frames of data, use the last 30 frames for prediction
        # this is what makes our model detect the fluidity of motion... it will detect the way
        # we train it... 30 frames == 1 sec. So, our pose can be a 1 second video - which will be
        # matched with the trained data.
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(res)
            # Did we have a match?
            if res[start-at-step] > threshold:
                print(res)
                pose_achieved.play()
                cv2.waitKey(10000)
                # wait 10 seconds and then go to next step.
                start-at-step=start-at-step+1
                if step <= 7:
                    instructions[step].play()
                    continue
                else
                    # All steps are done. break from loop
                    break
            # Did we have a close match? If yes, beep faster.
            elif res[start-at-step] > threshold1 :
                print(res)
                BeepTwoPerSec.play()
                cv2.waitKey(1000)
                BeepTwoPerSec.stop()
            # Did we have a rough match? Beep slower, so the user knows they are getting close.
            elif res[start-at-step] > threshold2:
                print(res)
                BeepOnePerSec.play()
                cv2.waitKey(1000)
                BeepOnePerSec.stop()
                
        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.1

[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.1

[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.1

[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.1

[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.1

[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.11710218 0.10582926]
[0.10675415 0.11355001 0.11279819 0.1141344  0.10808243 0.10853579
 0.11321355 0.1