In [2]:
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
from tensorflow.keras.models import load_model
import math

In [3]:
# Load the gesture recognizer model
model = load_model('mp_hand_gesture')

In [4]:
# Load class names
f = open('gesture.names', 'r')
classNames = f.read().split('\n')
f.close()
print(classNames)

['okay', 'peace', 'thumbs up', 'thumbs down', 'call me', 'stop', 'rock', 'live long', 'fist', 'smile']


In [5]:
mpHands = mp.solutions.hands
mpDraw = mp.solutions.drawing_utils

class HandDetector:
    def __init__(self, max_num_hands=2, min_detection_confidence=0.5, min_tracking_confidence=0.5):
        self.hands = mpHands.Hands(max_num_hands=max_num_hands, min_detection_confidence=min_detection_confidence,
                                   min_tracking_confidence=min_tracking_confidence)


    def findHandLandMarks(self, image, handNumber=0, draw=False):
        originalImage = image
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # mediapipe needs RGB
        results = self.hands.process(image)
        landMarkList = []

        if results.multi_hand_landmarks:  # returns None if hand is not found
            hand = results.multi_hand_landmarks[handNumber] #results.multi_hand_landmarks returns landMarks for all the hands

            for id, landMark in enumerate(hand.landmark):
                # landMark holds x,y,z ratios of single landmark
                imgH, imgW, imgC = originalImage.shape  # height, width, channel for image
                xPos, yPos = int(landMark.x * imgW), int(landMark.y * imgH)
                landMarkList.append([id, xPos, yPos])

            if draw:
                mpDraw.draw_landmarks(originalImage, hand, mpHands.HAND_CONNECTIONS)

        return landMarkList

In [13]:
# hand detector instance
handDetector = HandDetector(min_detection_confidence=0.7)

# Initialize the webcam
cap = cv2.VideoCapture(0)

while True:
    # Read each frame from the webcam
    _, frame = cap.read()
    x, y, c = frame.shape
    # Flip the frame vertically
    frame = cv2.flip(frame, 1)
    framergb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    handLandmarks = handDetector.findHandLandMarks(image=frame, draw=True)
    className = ''
    if len(handLandmarks) != 0:
        # removing hand id 
        handLandmarks = [h[1:] for h in handLandmarks]
        # Predict gesture
        prediction = model.predict([handLandmarks])
        # print(prediction)
        classID = np.argmax(prediction)
        className = classNames[classID]

    # show the prediction on the frame
    cv2.putText(frame, className, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 
                   1, (0,0,255), 2, cv2.LINE_AA)

    cv2.imshow("Gesture detection", frame)
    if cv2.waitKey(1) == ord('q'):
        break

# release the webcam and destroy all active windows
cap.release()
cv2.destroyAllWindows()

In [20]:
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume

# audio controlling utilities
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))

# hand detector instance
handDetector = HandDetector(min_detection_confidence=0.7)

# Initialize the webcam
cap = cv2.VideoCapture(0)

while True:
    # Read each frame from the webcam
    _, frame = cap.read()
    x, y, c = frame.shape
    # Flip the frame vertically
    frame = cv2.flip(frame, 1)
    framergb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    handLandmarks = handDetector.findHandLandMarks(image=frame, draw=True)
    if len(handLandmarks) != 0:
        # removing hand id 
        handLandmarks = [h[1:] for h in handLandmarks]
        x1, y1 = handLandmarks[4] #  thumb
        x2, y2 = handLandmarks[8] # index
        length = math.hypot(x2-x1, y2-y1)

        #Hand range(length): 50-250
        #Volume Range: (-65.25, 0.0)

        volumeValue = np.interp(length, [50, 250], [-65.25, 0.0]) #coverting length to proportionate to volume range
        volume.SetMasterVolumeLevel(volumeValue, None)


        cv2.circle(frame, (x1, y1), 15, (255, 0, 255), cv2.FILLED)
        cv2.circle(frame, (x2, y2), 15, (255, 0, 255), cv2.FILLED)
        cv2.line(frame, (x1, y1), (x2, y2), (255, 0, 255), 3)

    cv2.imshow("Volume", frame)
    if cv2.waitKey(1) == ord('q'):
        break

# release the webcam and destroy all active windows
cap.release()
cv2.destroyAllWindows()

In [18]:
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume

# audio controlling utilities
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))

# hand detector instance
handDetector = HandDetector(min_detection_confidence=0.7)

# Initialize the webcam
cap = cv2.VideoCapture(0)

circle_radius = 25
centroid = (110,210)
circle_color = (0,255,0)

while True:
    # Read each frame from the webcam
    _, frame = cap.read()
    # Flip the frame vertically
    frame = cv2.flip(frame, 1)
    # constant circle
    cv2.circle(frame, centroid, circle_radius ,circle_color, cv2.FILLED)
    framergb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    handLandmarks = handDetector.findHandLandMarks(image=frame, draw=False)
    if len(handLandmarks) != 0:
        index_finger = handLandmarks[8][1], handLandmarks[8][2] #  index finger
        cv2.circle(frame, index_finger, 5, (0, 0, 0), cv2.FILLED)
        dist = math.dist(centroid, index_finger)
#         print(dist)
        print(index_finger, centroid, "------", dist)
        if dist <= circle_radius:
            circle_color = (0,0,255)
            print("pressed")
        else:
            circle_color = (0,255,0)
    cv2.imshow("Change in color", frame)
    if cv2.waitKey(1) == ord('q'):
        break

# release the webcam and destroy all active windows
cap.release()
cv2.destroyAllWindows()