In [2]:
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
from tensorflow.keras.models import load_model
import math

In [3]:
# initialize mediapipe
mpHands = mp.solutions.hands
hands = mpHands.Hands(max_num_hands=1, min_detection_confidence=0.7)
mpDraw = mp.solutions.drawing_utils

In [4]:
# Load the gesture recognizer model
model = load_model('mp_hand_gesture')

In [5]:
# Load class names
f = open('gesture.names', 'r')
classNames = f.read().split('\n')
f.close()
print(classNames)

['okay', 'peace', 'thumbs up', 'thumbs down', 'call me', 'stop', 'rock', 'live long', 'fist', 'smile']


In [6]:
class HandDetector:
    def __init__(self, max_num_hands=2, min_detection_confidence=0.5, min_tracking_confidence=0.5):
        #when the mediapipe is first started, it detects the hands. After that it tries to track the hands
        #as detecting is more time consuming than tracking. If the tracking confidence goes down than the
        #specified value then again it switches back to detection
        self.hands = mpHands.Hands(max_num_hands=max_num_hands, min_detection_confidence=min_detection_confidence,
                                   min_tracking_confidence=min_tracking_confidence)


    def findHandLandMarks(self, image, handNumber=0, draw=False):
        originalImage = image
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # mediapipe needs RGB
        results = self.hands.process(image)
        landMarkList = []

        if results.multi_hand_landmarks:  # returns None if hand is not found
            hand = results.multi_hand_landmarks[handNumber] #results.multi_hand_landmarks returns landMarks for all the hands

            for id, landMark in enumerate(hand.landmark):
                # landMark holds x,y,z ratios of single landmark
                imgH, imgW, imgC = originalImage.shape  # height, width, channel for image
                xPos, yPos = int(landMark.x * imgW), int(landMark.y * imgH)
                landMarkList.append([id, xPos, yPos])

            if draw:
                mpDraw.draw_landmarks(originalImage, hand, mpHands.HAND_CONNECTIONS)

        return landMarkList

In [6]:
# Initialize the webcam
cap = cv2.VideoCapture(0)

while True:
    # Read each frame from the webcam
    _, frame = cap.read()

    x, y, c = frame.shape

    # Flip the frame vertically
    frame = cv2.flip(frame, 1)
    framergb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Get hand landmark prediction
    result = hands.process(framergb)

    # print(result)
    
    className = ''

    # post process the result
    if result.multi_hand_landmarks:
        landmarks = []
        for handslms in result.multi_hand_landmarks:
            for lm in handslms.landmark:
                # print(id, lm)
                # lm.x and lm.y are x and y co-ordinates normalized w.r.t image width and height resp
                lmx = int(lm.x * x) 
                lmy = int(lm.y * y)

                landmarks.append([lmx, lmy])
            
            # Drawing landmarks on frames
            mpDraw.draw_landmarks(frame, handslms, mpHands.HAND_CONNECTIONS)

            # Predict gesture
            prediction = model.predict([landmarks])
            # print(prediction)
            classID = np.argmax(prediction)
            className = classNames[classID]

    # show the prediction on the frame
    cv2.putText(frame, className, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 
                   1, (0,0,255), 2, cv2.LINE_AA)

    # Show the final output
    cv2.imshow("Output", frame) 

    if cv2.waitKey(1) == ord('q'):
        break

# release the webcam and destroy all active windows
cap.release()

cv2.destroyAllWindows()

In [9]:
# hand detector instance
handDetector = HandDetector(min_detection_confidence=0.7)

# Initialize the webcam
cap = cv2.VideoCapture(0)

while True:
    # Read each frame from the webcam
    _, frame = cap.read()
    x, y, c = frame.shape
    # Flip the frame vertically
    frame = cv2.flip(frame, 1)
    framergb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    handLandmarks = handDetector.findHandLandMarks(image=frame, draw=True)
    className = ''
    if len(handLandmarks) != 0:
        pass
#         # Predict gesture
#         prediction = model.predict([handLandmarks])
#         # print(prediction)
#         classID = np.argmax(prediction)
#         className = classNames[classID]

#     # show the prediction on the frame
#     cv2.putText(frame, className, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 
#                    1, (0,0,255), 2, cv2.LINE_AA)

    cv2.imshow("Gesture detection", frame)
    if cv2.waitKey(1) == ord('q'):
        break

# release the webcam and destroy all active windows
cap.release()
cv2.destroyAllWindows()

In [10]:
h[1:] for h in handLandmarks

[[0, 485, 467],
 [1, 422, 450],
 [2, 368, 406],
 [3, 332, 365],
 [4, 297, 337],
 [5, 394, 322],
 [6, 368, 268],
 [7, 349, 237],
 [8, 333, 207],
 [9, 432, 310],
 [10, 412, 245],
 [11, 400, 205],
 [12, 387, 168],
 [13, 469, 315],
 [14, 459, 252],
 [15, 452, 213],
 [16, 443, 176],
 [17, 507, 332],
 [18, 506, 285],
 [19, 505, 254],
 [20, 502, 222]]

In [7]:
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume

# audio controlling utilities
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(
IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))

# hand detector instance
handDetector = HandDetector(min_detection_confidence=0.7)

# Initialize the webcam
cap = cv2.VideoCapture(0)

while True:
    # Read each frame from the webcam
    _, frame = cap.read()
    x, y, c = frame.shape
    # Flip the frame vertically
    frame = cv2.flip(frame, 1)
    framergb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    handLandmarks = handDetector.findHandLandMarks(image=frame, draw=True)
    if len(handLandmarks) != 0:
        x1, y1 = handLandmarks[4][1], handLandmarks[4][2] # index finger
        x2, y2 = handLandmarks[8][1], handLandmarks[8][2] # thumb
        length = math.hypot(x2-x1, y2-y1)

        #Hand range(length): 50-250
        #Volume Range: (-65.25, 0.0)

        volumeValue = np.interp(length, [50, 250], [-65.25, 0.0]) #coverting length to proportionate to volume range
        volume.SetMasterVolumeLevel(volumeValue, None)


        cv2.circle(frame, (x1, y1), 15, (255, 0, 255), cv2.FILLED)
        cv2.circle(frame, (x2, y2), 15, (255, 0, 255), cv2.FILLED)
        cv2.line(frame, (x1, y1), (x2, y2), (255, 0, 255), 3)

    cv2.imshow("Volume", frame)
    if cv2.waitKey(1) == ord('q'):
        break

# release the webcam and destroy all active windows
cap.release()
cv2.destroyAllWindows()

[1, 394, 388]
88.28363381737297
[1, 433, 390]
162.68988905276197
[1, 437, 391]
154.33081351434652
[1, 434, 401]
142.1724305201258
[1, 432, 403]
142.44297104455524
[1, 437, 406]
135.72398461583717
[1, 437, 405]
135.35878250043476
[1, 438, 406]
129.6919426949878
[1, 440, 407]
127.88275880665071
[1, 442, 403]
130.9427355755179
[1, 441, 404]
130.64838307457157
[1, 443, 403]
131.60547101089682
[1, 445, 405]
129.9884610263542
[1, 451, 401]
20.591260281974
[1, 452, 397]
17.4928556845359
[1, 455, 398]
24.351591323771842
[1, 454, 400]
65.96969000988257
[1, 452, 401]
103.40696301506975
[1, 452, 401]
115.43396380615195
[1, 451, 400]
129.3560976529518
[1, 453, 401]
34.785054261852174
[1, 456, 400]
26.570660511172843
[1, 460, 398]
20.615528128088304
[1, 458, 398]
18.110770276274835
[1, 460, 400]
16.1245154965971
[1, 460, 398]
16.278820596099706
[1, 459, 398]
15.297058540778355
[1, 453, 398]
35.60898762952971
[1, 455, 395]
38.600518131237564
[1, 458, 393]
36.796738985948195
[1, 456, 392]
33.97057550

Collecting pycaw
  Downloading pycaw-20181226.tar.gz (5.7 kB)
Collecting enum34
  Downloading enum34-1.1.10-py3-none-any.whl (11 kB)
Collecting psutil
  Using cached psutil-5.9.0-cp39-cp39-win_amd64.whl (245 kB)
Collecting future
  Using cached future-0.18.2.tar.gz (829 kB)
Building wheels for collected packages: pycaw, future
  Building wheel for pycaw (setup.py): started
  Building wheel for pycaw (setup.py): finished with status 'done'
  Created wheel for pycaw: filename=pycaw-20181226-py3-none-any.whl size=6491 sha256=a0929a60f7ec65e8f440141949f88f12664e8c25058f76fa4dd815c817ea58ba
  Stored in directory: c:\users\hamza.usman\appdata\local\pip\cache\wheels\e7\55\05\655307d9f243cb16a97e664b2eeb0d1c3d41e041d49e4ddff0
  Building wheel for future (setup.py): started
  Building wheel for future (setup.py): finished with status 'done'
  Created wheel for future: filename=future-0.18.2-py3-none-any.whl size=491070 sha256=69f411560a89de1fc81449e0200232b3197590b927a6e5f166e8654f325eee65
  St