Using PyCAW library to control system audio

In [68]:
import time as time
import cv2
import mediapipe as mp
import HandTrackingModule as htm
import math
import numpy as np

### Importing pyCAW and inserting template code from the pycaw file

In [69]:
import pycaw

template code from : https://github.com/AndreMiras/pycaw

#### PyCaw related imports

In [70]:
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume

Use volume.GetVolumeRange() to see what the volume range is for our system

In [104]:
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(
    IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
#volume.SetMasterVolumeLevel(0, None)

#### Obtaining the volume Range

In [105]:
volRange = volume.GetVolumeRange()
minVol = volRange[0]
maxVol = volRange[1]

In [106]:
wcam = 1080
hcam = 720
pTime = 0
cTime = 0

PyCAW (Python Core Audio Windows library) is a Python library that provides a high-level interface for accessing the Windows Core Audio API. This library allows Python programs to enumerate and control audio devices, sessions, and their properties on Windows machines.

In [107]:
cap = cv2.VideoCapture(0)
cap.set(3,wcam)
cap.set(4,hcam)

#Changing detection confidence in order to prevent model from picking up noise
detector = htm.HandDetector(detectionCon=0.7)
vol = 0
volBar = 400
volPer = 0

while True:
    success, img = cap.read() 
    img = detector.findHands(img)
    landmark_list = detector.findPosition(img,draw=False)

    #For gesture recognition , we need:
    # Value No.4 - tip of thumb
    # Value No 8 - tip of index finger
    if landmark_list:
        #print(landmark_list[4],landmark_list[8])

        x1,y1 = landmark_list[4][1], landmark_list[4][2]
        x2,y2 = landmark_list[8][1], landmark_list[8][2]

        # Make sure we are getting the current indices by drawing circles
        cv2.circle(img,(x1,y1),5,(255,0,0),cv2.FILLED)
        cv2.circle(img,(x2,y2),5,(255,0,0),cv2.FILLED)

        # Draw a line between these points
        cv2.line(img,(x1,y1),(x2,y2),(0,255,255),2)

        # Getting the centre of this line
        cx, cy = (x1+x2) // 2, (y1+y2)  // 2
        cv2.circle(img,(cx,cy),5,(0,255,0),cv2.FILLED)

        # Finding the distance between the points(length of line)
        # returns square root of sum of squares of arguments
        length = math.hypot(x2-x1,y2-y1)
        #print(length)

        # Hand range 50 -300 (Based on the length we obtain)
        # This now needs to be converted to volume range( minVol - maxVol)
        # np.interp(input_value, original_range, desired_range)

        vol = np.interp(length,[20,250],[minVol,maxVol])
        #print(int(length),vol)

        # While we have already scaled it once, we scale it again so that the
        # the values stay within the confines of the image
        # Length between thumb and index - length of sound bar
        volBar = np.interp(length,[20,250],[400,150])

        # Volume level in terms of percentage
        volPer = np.interp(length,[20,250],[0,100])

        print(f'vol {vol} volBar {volBar} volPer {volPer}')
        
        volume.SetMasterVolumeLevel(vol,None)

        if length < 150:
            cv2.circle(img,(cx,cy),5,(0,0,255),cv2.FILLED)

    # Visual representation of sound bar on the image
    cv2.rectangle(img,(50,150),(80,400),(0,255,0),3)
    cv2.rectangle(img,(50,int(volBar)),(80,400),(0,255,0),cv2.FILLED)
    cv2.putText(img,f'{int(volPer)} %',(40,450), cv2.FONT_HERSHEY_COMPLEX, 1,
                (0,255,0),3)

    # Frame Rate
    cTime = time.time()
    fps = 1/(cTime-pTime)
    pTime =cTime
    cv2.putText(img,str(int(fps)),(18,78),cv2.FONT_HERSHEY_COMPLEX,3,(255,0,255),4)

    cv2.imshow('Video',img)
    #cv2.waitKey(1)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

vol -48.14861123127277 volBar 334.47743766771174 volPer 26.2090249329153
vol -60.392726909905164 volBar 381.3897582754987 volPer 7.444096689800508
vol -62.9804347826087 volBar 391.30434782608694 volPer 3.4782608695652173
vol -62.48903490641276 volBar 389.4215896797424 volPer 4.231364128103048
vol -59.22684164906294 volBar 376.9227649389385 volPer 9.230894024424611
vol -63.097819972828475 volBar 391.75409951275276 volPer 3.2983601948988888
vol -60.59723542879231 volBar 382.1733158191276 volPer 7.130673672348953
vol -63.564211342934755 volBar 393.5410396281025 volPer 2.583584148758997
vol -63.657759017521855 volBar 393.89945983724846 volPer 2.440216065100606
vol -62.75564566791356 volBar 390.44308685024356 volPer 3.822765259902581
vol -64.644042081383 volBar 397.67832215089277 volPer 0.9286711396428932
vol -65.25 volBar 400.0 volPer 0.0
vol -63.814520377210926 volBar 394.5000780736051 volPer 2.199968770557972
vol -64.44222788924843 volBar 396.9050876982699 volPer 1.2379649206920547
vol -

In [52]:
cv2.destroyAllWindows()