In [None]:
import cv2
import mediapipe as mp
import numpy as np
import math
import time
from collections import deque

# --------- Volume Control (Windows via pycaw) ----------
from ctypes import POINTER, cast
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume


In [None]:

devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
vol_range = volume.GetVolumeRange()
min_vol, max_vol = vol_range[0], vol_range[1]

# --------- Hand Detection Setup -----------
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)
mp_draw = mp.solutions.drawing_utils

# --------- Camera ----------
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
vol_history = deque(maxlen=5)
prev_time = time.time()



In [None]:
print("ðŸ”§ Calibration: Move thumb and index from closed to open for few seconds...")

calib_frames = 30
min_dist, max_dist = float('inf'), 0.0

while True:
    success, frame = cap.read()
    if not success:
        break
    frame = cv2.flip(frame, 1)
    h, w, _ = frame.shape

    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb)

    if results.multi_hand_landmarks:
        hand = results.multi_hand_landmarks[0]
        mp_draw.draw_landmarks(frame, hand, mp_hands.HAND_CONNECTIONS)

        lm = hand.landmark
        x1, y1 = int(lm[4].x * w), int(lm[4].y * h)   # Thumb tip
        x2, y2 = int(lm[8].x * w), int(lm[8].y * h)   # Index tip

        dist = math.hypot(x2 - x1, y2 - y1)

        if calib_frames > 0:
            calib_frames -= 1
            min_dist = min(min_dist, dist)
            max_dist = max(max_dist, dist)
            cv2.putText(frame, f"Calibrating... {calib_frames}", (10, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
        else:
            # Map distance to volume (0â€“100%)
            dist = np.clip(dist, min_dist, max_dist)
            vol_percent = np.interp(dist, [min_dist, max_dist], [0, 100])
            vol_history.append(vol_percent)
            smooth_vol = sum(vol_history) / len(vol_history)

            # Convert to dB and set volume
            vol_db = np.interp(smooth_vol, [0, 100], [min_vol, max_vol])
            volume.SetMasterVolumeLevel(vol_db, None)

            # Draw visuals
            cv2.line(frame, (x1, y1), (x2, y2), (255, 0, 0), 3)
            cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
            cv2.circle(frame, (cx, cy), 8, (255, 0, 0), -1)
            cv2.rectangle(frame, (30, 100), (60, 400), (255, 255, 255), 2)
            vol_bar = np.interp(smooth_vol, [0, 100], [400, 100])
            cv2.rectangle(frame, (30, int(vol_bar)), (60, 400), (0, 255, 0), -1)
            cv2.putText(frame, f"{int(smooth_vol)}%", (25, 430),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)

    # FPS
    curr_time = time.time()
    fps = 1 / (curr_time - prev_time)
    prev_time = curr_time
    cv2.putText(frame, f"FPS: {int(fps)}", (w - 120, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (200, 200, 200), 2)

    cv2.imshow("Gesture Volume Controller", frame)
    key = cv2.waitKey(1)
    if key == ord('q') or key == 27:  # q or ESC
        break

cap.release()
cv2.destroyAllWindows()
