# **Hand Gesture Recognition System for controlling Applications**

The goal of this project is to train a Machine Learning algorithm capable of classifying images of different hand gestures, such as a 'thumbs up', 'palm' and control a media player to perform certain actions based on gestures. 

In this project, we are using MediaPipe framework, Neural Networks based on Tensorflow and Keras to train the model. 



In [1]:
# import necessary packages for hand gesture recognition project 
import os
import time
import subprocess
import cv2
import vlc
import numpy as np
import mediapipe as mp
import tensorflow as tf
from tensorflow.keras.models import load_model

2023-01-15 00:18:21.767226: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Function to capture frames from a webcam





In [2]:
def capture_frames(webcam):
    _, frame = webcam.read()       # Read each frame from the webcam
    frame = cv2.flip(frame, 1)     # Flip the frame vertically
    cv2.imshow("Output", frame)    # Show the final output
    return frame
    

### Function to detect hand keypoints(Landmarks)

In [3]:
def get_hand_landmarks(hands, frame):
    x , y, c = frame.shape
    framergb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(framergb)      # Get hand landmark prediction
  
    if not result.multi_hand_landmarks:      # no hands detected     
        return []
    
    # post process the result
    landmarks = []
    for handslms in result.multi_hand_landmarks:
        for lm in handslms.landmark:
            lmx = int(lm.x * x)
            lmy = int(lm.y * y)
            landmarks.append([lmx, lmy])
            
    return landmarks

In [4]:
#  def get_hand_crops_(hands, frame):
    # import pdb; pdb.set_trace()

### Function to get hand landmark prediction and detect hand frame

In [5]:
def get_hand_crops(hands, frame):
    height , width, c = frame.shape
    framergb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    # Get hand landmark prediction
    result = hands.process(framergb)
    
    if not result.multi_hand_landmarks:
        return []  # no hands detected
    
    # post process the result
            
    for hand_landmark in result.multi_hand_landmarks:
        
        xList = [landmark.x*width for landmark in hand_landmark.landmark]
        yList = [landmark.y*height for landmark in hand_landmark.landmark]
        
        xmin, xmax = int(min(xList)), int(max(xList))
        ymin, ymax = int(min(yList)), int(max(yList))
        bbox = [xmin, ymin, xmax, ymax]
        x1, y1, x2, y2 = bbox
        
        bboxInfo = {"bbox": bbox}  
        delta = 0
        crop = frame[y1-delta:y2+delta, x1-delta:x2+delta].copy()       
       
        return crop

In [6]:
 # def identify_gesture(model, landmarks, classNames):
#     # Predict gesture
#     prediction = model.predict([landmarks])
#     # print(prediction)
#     classID = np.argmax(prediction)
#     className = classNames[classID]
#      return className
    

### Pre-processing croped hand image for prediction

In [7]:
def image_preprocess(image):
    hand_crop_resize = cv2.resize(image, (224, 224))
    hand_cropGRY = cv2.cvtColor(hand_crop_resize, cv2.COLOR_BGR2GRAY)
    hand_cropGRY_3C = cv2.cvtColor(hand_cropGRY, cv2.COLOR_GRAY2RGB)
    hand_cropGRY_3C = hand_cropGRY_3C/255
    return hand_cropGRY_3C

### Function to predict gesture from croped hand image

In [8]:
def get_gesture(model, hand_crop, classNames):
    image = image_preprocess(hand_crop)
    prediction = model.predict(image.reshape(1,224,224,3)) 
    classID = np.argmax(prediction) #find classID
    gesture_name = classNames[classID] #find hand gesture
    return gesture_name
                               
                                 

### Function to control media player operations

In [9]:
def invoke_music_controls(media_player, class_name):
    
    print(f"{class_name} invoked") #print gesture name
    if class_name == "play":
        
        media_player.play()
    
    if class_name == "stop":
       
        media_player.stop()

    

### Function to Invoke Media player

In [10]:
def get_media_player(player_name="vlc"):
    player = vlc.MediaPlayer("StarWars60.wav")
    return player

### An Event loop that monitor user inputs continously, This function performs following operations:


*  Capture frames upon input from User

*  Get hand landmarks from the captured frames

*  Using lanmarks, crop the region where hand is detected
*  Predict the gesture using croped hand image


*  Use predicted result to invoke music player






In [11]:
def run_loop(media_player, webcam, hands, model, class_names, delay=0.5):
    while True:
        gesture_name = None
        landmarks = None
        crop = None
        
        # Read each frame from the webcam
        frames = capture_frames(webcam)
        landmarks = get_hand_landmarks(hands, frames)
        
        if landmarks:
            crop = get_hand_crops(hands, frames)
            cv2.imshow("icrop", image_preprocess(crop))
            
        if crop is not None:
            gesture_name = get_gesture(model, crop, class_names)
        
            
        if gesture_name:
            invoke_music_controls(media_player, gesture_name)
    
        if cv2.waitKey(1) == ord('q'):
                break

        time.sleep(delay)
    

### This function will do following operations:



*   Initialize Mediapipe
*   Load Neural Network Model for Gesture Recognition
*   Load the file for Gesture Classification
*   Initialize Webcam
*   Media Player operations based on hand gestures
*   Finally release webcam and close all windows after operations







In [12]:
def monitor_for_hand_gesture():

    # initialize MediaPipe
    mpHands = mp.solutions.hands
    hands = mpHands.Hands(max_num_hands=1, min_detection_confidence=0.7)
    mpDraw = mp.solutions.drawing_utils
    hand_crop = None
    
    # Load Hand Gesture Recognizer model
    h_model = load_model('model_hand_gesture.h5')

    # Load Class Names(Gesture Names)
    f = open('gesture.names', 'r')
    class_names = [l.strip() for l in f.readlines()]
    f.close()
    
    # Initialize the Webcam
    webcam = cv2.VideoCapture(0)
    
    # get Media player
    media_player = get_media_player()

    try:
        #Event loop that monitor user inputs continously
        run_loop(media_player, webcam, hands, h_model, class_names)

    except Exception as e:
        raise
        
    finally:
        # release the webcam and destroy all active windows
        webcam.release()
        cv2.destroyAllWindows()
        #cv2.waitKey(1) 


In [13]:
monitor_for_hand_gesture()


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
2023-01-15 00:18:29.308517: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


play invoked
play invoked
play invoked
stop invoked
stop invoked
play invoked
play invoked
stop invoked
stop invoked
stop invoked
play invoked
play invoked
stop invoked
stop invoked
