In [2]:
import cv2      #library for image
import mediapipe as mp  #tasks like hand tracking, pose estimation, object detection, and more. 
from math import hypot  #hypot - returns the Euclidean norm (distance from the origin to the coordinates given)
from ctypes import cast, POINTER    #enable you to define the necessary data types, call functions from shared libraries, pass data between Python and C, and handle error conditions.
from comtypes import CLSCTX_ALL     
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
import numpy as np



#connect to the default camera
cap = cv2.VideoCapture(0)   


#initialize mediapipe hands
mp_Hands = mp.solutions.hands   #detect the landmarks of the hands in an image
hands = mp_Hands.Hands()
mp_Draw = mp.solutions.drawing_utils       # functions for visualizing and drawing landmarks


#accessing the speakers using pycaw
devices = AudioUtilities.GetSpeakers()      #retrieves the collection of audio devices, specifically the speakers

interface = devices.Activate(IAudioEndpointVolume.id, CLSCTX_ALL, None)      #Activate() method is called on the speakers to activate the audio endpoint volume control interface. It takes three arguments: the interface identifier 

volume = cast(interface, POINTER(IAudioEndpointVolume))     #cast() function is used to convert the interface object to a pointer of type 
                                                            #IAudioEndpointVolume. It allows you to access the methods and properties of the IAudioEndpointVolume interface.


# finds the volume range between the minimum and maximum volume. We place it outside the while loop because we need to find the volume range once.
volMin, volMax = volume.GetVolumeRange()[:2]       # [:2] , it selects the first two elements of the iterable returned by volume.GetVolumeRange().


#capturing an image from the camera
while True:                            #allows for continuous processing of frames.
    status, image = cap.read()        #status indicates whether the frame was successfully read, and image contains the captured frame data.
    imageRGB = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)   # converts the color space of the captured frame from BGR (Blue-Green-Red) to RGB (Red-Green-Blue).
    results = hands.process(imageRGB)       # performs hand detection on the image and returns the results, which may include the detected hand landmarks, gestures, or other relevant information.


#check if there are multiple hands in the input
lmlist = []
if results.multi_hand_landmarks:                    #creates an empty list that will store the list of elements of the hands detected by the mediapipe hand module
    
    #create loop for multiple hands
    for handlandmark in results.multi_hand_landmarks:          #allows accessing each individual hand's landmarks.
        for id, lm in enumerate(handlandmark.landmark):        #iterates over the landmarks within each detected hand. It assigns an index (id) and the landmark object (lm) to each iteration.
            h, w, c = image.shape                              # retrieves the height (h), width (w), and number of channels (c) of the image captured from the video source.
            cx, cy = int(lm, x*w), int(lm.y*h)                 #calculates the pixel coordinates (cx, cy) of the current landmark (lm) by multiplying the normalized landmark coordinates (lm.x, lm.y) with the width and height of the image, respectively. The coordinates are then converted to integers.
            lmlist.append([id, cx, cy])                        #used to store and process the landmark information.    
        mp_Draw.Draw_landmarks(image, handlandmark, mp_Hands.HAND.CONNECTIONS)      #draws the landmarks and connections on the image



#Specifying the points of the thumb and middle finger 
if lmList != []:                            #checks if the lmList is not empty as lmList is assumed to be a list containing hand landmark information.
    x1, y1 = lmList[4][1], lmList[4][2]     # assigns the x-coordinate (x1) and y-coordinate (y1) values of the landmark at index 4 to variables x1 and y1, respectively. 
    x2, y2 = lmList[8][1], lmList[8][2]     #assigns the x-coordinate (x2) and y-coordinate (y2) values of the landmark at index 8 to variables x2 and y2, respectively.



#Drawing a circle between the tip of the thumb and the tip of the index finger     
cv2.circle(img, (x1, y1), 15, (255, 0, 0), cv2.FILLED)      #draws a filled circle on the img image using the cv2.circle() function. #cv2.FILLED parameter specifies that the circle should be filled rather than just an outline.
                                                            #circle is centered at the coordinates (x1, y1) and has a radius of 15 pixels. 
                                                            #The (255, 0, 0) tuple represents the color of the circle in BGR format, where (255, 0, 0) corresponds to blue.
cv2.circle(img, (x2, y2), 15, (255, 0, 0), cv2.FILLED)      #draws another filled circle on the img image. 


#Drawing a line between points 4 and 8
cv2.line(img, (x1, y1), (x2, y2), (255, 0, 0), 3)       #draws a line on the img
                                                        #connects two points: (x1, y1) and (x2, y2). 
                                                        #The (255, 0, 0) tuple represents the color of the line in BGR format, where (255, 0, 0) corresponds to blue. 
                                                        #The 3 parameter specifies the thickness of the line in pixels.



#Finding the distance between points 4 and 8
length = hypot(x2 - x1, y2 - y1)                         #calculated value of the hypotenuse, which represents the distance between the two points


#Converting the hand range to the volume range
vol = np.interp(length, [15, 220], [volMin, volMax])    # this interpolate the length value. # Interpolation is a method for estimating values within a given range based on known data points.
                                                        #[15, 220] represents the range of hand measurements
print(vol, length)                        



#setting the master volume
volume.SetMasterVolumeLevel(vol, None)                  # there is no specific output device specified for the volume control.



#Displaying the video output used to interact with the user
cv2.imshow('Image', image)

 


cv2.waitKey()                      #it means 0.1 second it means we are clicking photograph 10 times in 1 second
cv2.destroyAllWindows()

AttributeError: type object 'IAudioEndpointVolume' has no attribute 'id'

In [7]:
pip install --upgrade protobuf

Collecting protobuf
  Downloading protobuf-4.23.3-cp39-cp39-win_amd64.whl (422 kB)
Installing collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.19.1
    Uninstalling protobuf-3.19.1:
      Successfully uninstalled protobuf-3.19.1
Successfully installed protobuf-4.23.3
Note: you may need to restart the kernel to use updated packages.


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
mediapipe 0.10.0 requires protobuf<4,>=3.11, but you have protobuf 4.23.3 which is incompatible.


In [2]:
pip install pycaw

Collecting pycaw
  Downloading pycaw-20230407-py3-none-any.whl (24 kB)
Installing collected packages: pycaw
Successfully installed pycaw-20230407
Note: you may need to restart the kernel to use updated packages.
