In [2]:
import cv2
import numpy as np
import mediapipe as mp
import matplotlib.pyplot as plt
from pycaw.pycaw import AudioUtilities,IAudioEndpointVolume
from comtypes import CLSCTX_ALL
from ctypes import cast,POINTER

In [3]:
# First step is to initialize the Hands class an store it in a variable
mp_hands = mp.solutions.hands

# Now second step is to set the hands function which will hold the landmarks points
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.3)

# Last step is to set up the drawing function of hands landmarks on the image
mp_drawing = mp.solutions.drawing_utils

In [4]:
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
volMin, volMax = volume.GetVolumeRange()[:2]
print(devices,'\n',interface,'\n',volume,'\n',volMin,'\t',volMax)

<POINTER(IMMDevice) ptr=0x1e210063100 at 1e211e99c40> 
 <POINTER(IUnknown) ptr=0x1e20fc129a0 at 1e211e99bc0> 
 <POINTER(IAudioEndpointVolume) ptr=0x1e20fc129a0 at 1e211e99cc0> 
 -65.25 	 0.0


In [5]:
def distance(x1,y1,x2,y2):
    return (((x2-x1)**2+(y2-y1)**2)**(0.5))

In [None]:
capture = cv2.VideoCapture(0)
istrue, frame = capture.read()
image_height,image_width,_=frame.shape
print(image_height,image_width)
while True:
    istrue, frame = capture.read()
    results = hands.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

    if results.multi_hand_landmarks:
        for hand_no, hand_landmarks in enumerate(results.multi_hand_landmarks):
            # mp_drawing.draw_landmarks(image=frame, landmark_list=hand_landmarks,
            #                           connections=mp_hands.HAND_CONNECTIONS)
            thumb_x=int(hand_landmarks.landmark[mp_hands.HandLandmark(4).value].x * image_width)
            thumb_y=int(hand_landmarks.landmark[mp_hands.HandLandmark(4).value].y * image_height)
            index_x=int(hand_landmarks.landmark[mp_hands.HandLandmark(8).value].x * image_width)
            index_y=int(hand_landmarks.landmark[mp_hands.HandLandmark(8).value].y * image_height)
            # index_z=hand_landmarks.landmark[mp_hands.HandLandmark(8).value].z

            cv2.line(frame,(thumb_x,thumb_y),(index_x,index_y),(255,0,0),10)
            cv2.circle(frame,(thumb_x,thumb_y), 15,(0, 255, 0),-1,8)
            cv2.circle(frame,(index_x,index_y), 15,(0, 255, 0),-1,8)
            dist=distance(thumb_x,thumb_y,index_x,index_y)
            cv2.putText(frame,str(dist),(index_x,index_y),cv2.FONT_HERSHEY_SIMPLEX,1,(155,155,0),5)
            # cv2.putText(frame,str(index_z),(50,50),cv2.FONT_HERSHEY_SIMPLEX,1,(155,0,0),5)
            vol = np.interp(dist, [20, 220], [volMin, volMax])
            volume.SetMasterVolumeLevel(vol, None)
            # print(vol, dist)
        # print(f'z: {hand_landmarks.landmark[mp_hands.HandLandmark(i).value].z * image_width}\n')
    cv2.imshow('hand_landmark',frame)
    if cv2.waitKey(1) & 0xFF==ord('q'):
        break
capture.release()
cv2.destroyAllWindows()