In [3]:
import cv2
import mediapipe as mp
from math import hypot
import screen_brightness_control as sbc
import numpy as np
from collections import deque
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
import time
import ctypes

# this is refer to play/pause
last_play_toggle = 0.0

# Mediapipe init
mpHands = mp.solutions.hands
hands = mpHands.Hands(min_detection_confidence=0.75, min_tracking_confidence=0.75)
Draw = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)

# Smoothing history
history_brightness = deque(maxlen=5)
history_volume = deque(maxlen=5)

# --------- Get the default audio device ---------
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))

# Mute gesture state
prev_mute_gesture = False
last_mute_toggle = 0.0
PINCH_ON = 28   # pinch threshold
PINCH_OFF = 40  # release threshold

def smooth_value(history, value):
    history.append(value)
    return int(sum(history) / len(history))

# Track play/pause state
is_playing = True  # Assume media is playing at start

VK_MEDIA_PLAY_PAUSE = 0xB3

def send_play_pause():
    global is_playing
    # Send play/pause key event
    ctypes.windll.user32.keybd_event(VK_MEDIA_PLAY_PAUSE, 0, 0, 0)
    ctypes.windll.user32.keybd_event(VK_MEDIA_PLAY_PAUSE, 0, 2, 0)
    # Toggle state for OSD
    is_playing = not is_playing


while True:
    _, frame = cap.read()
    frame = cv2.flip(frame, 1)
    frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    process = hands.process(frameRGB)

    landmarkList = []
    if process.multi_hand_landmarks:
        for handlm in process.multi_hand_landmarks:
            for _id, lm in enumerate(handlm.landmark):
                h, w, c = frame.shape
                x, y = int(lm.x * w), int(lm.y * h)
                landmarkList.append([_id, x, y])

            Draw.draw_landmarks(frame, handlm, mpHands.HAND_CONNECTIONS)

    if landmarkList:
        # Thumb tip
        x_thumb, y_thumb = landmarkList[4][1], landmarkList[4][2]

        # Index finger tip (Brightness)
        x_index, y_index = landmarkList[8][1], landmarkList[8][2]

        # Middle finger tip (Volume)
        x_middle, y_middle = landmarkList[12][1], landmarkList[12][2]

        # Ring finger tip (Mute toggle)
        x_ring, y_ring = landmarkList[16][1], landmarkList[16][2]

        # ---------- Brightness ----------
        length_b = hypot(x_index - x_thumb, y_index - y_thumb)
        brightness = np.interp(length_b, [15, 220], [0, 100])
        brightness = smooth_value(history_brightness, brightness)
        sbc.set_brightness(brightness)

        # ---------- Volume ----------
        length_v = hypot(x_middle - x_thumb, y_middle - y_thumb)
        vol = np.interp(length_v, [15, 220], [0, 100])  # normalized
        vol = smooth_value(history_volume, vol)
        volume.SetMasterVolumeLevelScalar(vol / 100, None)

        # ---------- Mute (edge-triggered with hysteresis) ----------
        length_m = hypot(x_ring - x_thumb, y_ring - y_thumb)

        if prev_mute_gesture:
            mute_gesture = length_m < PINCH_OFF
        else:
            mute_gesture = length_m < PINCH_ON

        now = time.time()
        if mute_gesture and not prev_mute_gesture and (now - last_mute_toggle) > 0.4:
            currently_muted = bool(volume.GetMute())
            volume.SetMute(0 if currently_muted else 1, None)
            last_mute_toggle = now

        prev_mute_gesture = mute_gesture
        is_muted = bool(volume.GetMute())

        # ---------- Play/Pause (Thumb + Pinky pinch) ----------
        x_pinky, y_pinky = landmarkList[20][1], landmarkList[20][2]
        length_p = hypot(x_pinky - x_thumb, y_pinky - y_thumb)
        
        if length_p < 30:  # Pinch threshold
            now = time.time()
            if (now - last_play_toggle) > 0.5:  # debounce
                send_play_pause()
                last_play_toggle = now


        # ---------- On-Screen Display ----------
        # Brightness bar (left)
        bx, by = 50, 150
        bar_height = 200
        bar_width = 30
        cv2.rectangle(frame, (bx, by), (bx + bar_width, by + bar_height), (255, 255, 255), 2)
        b_fill = int((brightness / 100) * bar_height)
        cv2.rectangle(frame, (bx, by + bar_height - b_fill), (bx + bar_width, by + bar_height), (255, 0, 0), -1)
        cv2.putText(frame, f"{brightness}%", (bx - 5, by + bar_height + 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
        cv2.putText(frame, "Brightness", (bx - 20, by - 20),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        # Volume bar (right of brightness)
        vx = bx + 80
        cv2.rectangle(frame, (vx, by), (vx + bar_width, by + bar_height), (255, 255, 255), 2)
        v_fill = int((vol / 100) * bar_height)
        cv2.rectangle(frame, (vx, by + bar_height - v_fill), (vx + bar_width, by + bar_height), (0, 0 ,255), -1)
        cv2.putText(frame, f"{int(vol)}%", (vx - 5, by + bar_height + 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
        cv2.putText(frame, "Volume", (vx - 5, by - 20),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        # Mute indicator
        if is_muted:
            cv2.putText(frame, "MUTE", (200, 100),
                        cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 3)

        # ---------- Play/Pause OSD ----------
        if is_playing:
            cv2.putText(frame, "PLAYING", (300, 100),
                cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 3)
        else:
            cv2.putText(frame, "PAUSED", (300, 100),
                cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 3)


    cv2.imshow("Multi Control OSD", frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()
