In [1]:
import cv2
import mediapipe as mp
import math
import time
import numpy as np
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume

wCam, hCam = 640, 480

cap = cv2.VideoCapture(0)
cap.set(3, wCam)
cap.set(4, hCam)
pTime = 0

mpHands = mp.solutions.hands
hands = mpHands.Hands()
mpDraw = mp.solutions.drawing_utils

devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))

volRange = volume.GetVolumeRange()
minVol = volRange[0]
maxVol = volRange[1]
volBar = 400
volPer = 0
colorVol = (255, 0, 0)

while True:
    success, img = cap.read()
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = hands.process(imgRGB)

    lmlist = []
    if results.multi_hand_landmarks:
        for handLms in results.multi_hand_landmarks:
            for id, lm in enumerate(handLms.landmark):
                h, w, c = img.shape
                cx, cy = int(lm.x * w), int(lm.y * h)
                lmlist.append([id, cx, cy])
            mpDraw.draw_landmarks(img, handLms, mpHands.HAND_CONNECTIONS)

        if len(lmlist) != 0:
            # Find distance between index and thumb
            length = math.hypot(lmlist[4][1] - lmlist[8][1], lmlist[4][2] - lmlist[8][2])

            # Convert volume
            vol = np.interp(length, [30, 300], [minVol, maxVol])
            volBar = np.interp(length, [30, 300], [350, 150])
            volPer = np.interp(length, [30, 300], [0, 100])

            # Set volume
            volume.SetMasterVolumeLevel(int(vol), None)

            # Reduce resolution to make it smoother
            smoothness = 10
            volPer = smoothness * round(volPer / smoothness)

            # Check if pinky finger is down
            pinky_finger = lmlist[20][2] < lmlist[18][2]

            if not pinky_finger:
                colorVol = (0, 255, 0)
                cv2.circle(img, (lmlist[4][1], lmlist[4][2]), 15, (0, 255, 0), cv2.FILLED)
            else:
                colorVol = (255, 0, 0)

    # Drawing
    cv2.rectangle(img, (50, 150), (85, 400), (0, 255, 0), 3)
    cv2.rectangle(img, (50, int(volBar)), (85, 400), (255, 0, 0), cv2.FILLED)
    cv2.putText(img, f'{int(volPer)}%', (40, 450), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 250, 0), 3)
    cv2.putText(img, f'Vol set: {int(volPer)}', (400, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, colorVol, 3)

    # Frame rate
    cTime = time.time()
    fps = 1 / (cTime - pTime)
    pTime = cTime
    cv2.putText(img, f'FPS: {int(fps)}', (40, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 3)

    cv2.imshow("Volume Controller", img)
    
    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) == ord('q'):
        break
        
# Release the webcam
cap.release()

# Close all OpenCV windows
cv2.destroyAllWindows()
