In [1]:
import cv2
import mediapipe as mp


In [2]:
# MediaPipe Hands setup
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Open default camera (0)
cap = cv2.VideoCapture(0)

# Create Hands object
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.9,
    min_tracking_confidence=0.9,
)

print("Press 'q' in the video window to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    
    # --- MIRROR THE IMAGE ---
    frame = cv2.flip(frame, 1)
    
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)

    h, w, _ = frame.shape

    # Store the fingertip coordinates for left/right hands
    coords = {
        "Left": {"thumb": None, "index": None},
        "Right": {"thumb": None, "index": None},
    }

    if results.multi_hand_landmarks and results.multi_handedness:
        for hand_landmarks, handedness in zip(
            results.multi_hand_landmarks, results.multi_handedness
        ):
            label = handedness.classification[0].label  # "Left" or "Right"

            # Get thumb tip and index tip
            thumb_tip = hand_landmarks.landmark[4]
            index_tip = hand_landmarks.landmark[8]

            # Convert normalized coords → pixel coords
            t_x, t_y = int(thumb_tip.x * w), int(thumb_tip.y * h)
            i_x, i_y = int(index_tip.x * w), int(index_tip.y * h)

            coords[label]["thumb"] = (t_x, t_y)
            coords[label]["index"] = (i_x, i_y)

            # Draw markers on each fingertip
            cv2.circle(frame, (t_x, t_y), 8, (0, 255, 0), -1)   # thumb
            cv2.circle(frame, (i_x, i_y), 8, (0, 0, 255), -1)   # index

            # Draw individual hand thumb ↔ index line
            cv2.line(frame, (t_x, t_y), (i_x, i_y), (255, 0, 0), 2)

            # Draw hand skeleton (optional)
            mp_drawing.draw_landmarks(
                frame,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS
            )

    # ---- Draw cross-hand connections if both hands detected ----

    left_thumb = coords["Left"]["thumb"]
    right_thumb = coords["Right"]["thumb"]
    left_index = coords["Left"]["index"]
    right_index = coords["Right"]["index"]

    # Thumb ↔ Thumb
    if left_thumb and right_thumb:
        cv2.line(frame, left_thumb, right_thumb, (0, 255, 0), 2)  # yellow

    # Index ↔ Index
    if left_index and right_index:
        cv2.line(frame, left_index, right_index, (0, 0, 255), 2)  # cyan

    cv2.imshow("Hand demo – press q to quit", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()
hands.close()

Press 'q' in the video window to quit.


I0000 00:00:1764786864.091041 2689231 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 90.5), renderer: Apple M1 Max
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1764786864.099908 2705686 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1764786864.105137 2705686 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1764786867.677423 2705690 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


In [None]:
import cv2
import mediapipe as mp
import math
import time
import rtmidi

# ---------------- MIDI SETUP ---------------- #

MIDI_PORT_NAME_SUBSTRING = "IAC"  # part of your IAC port name
MIDI_CHANNEL = 0                  # 0 = MIDI channel 1

# Assign CC numbers for each line
CC_LEFT_HAND = 20         # left thumb-index distance
CC_RIGHT_HAND = 21        # right thumb-index distance
CC_THUMB_TO_THUMB = 22    # left thumb - right thumb
CC_INDEX_TO_INDEX = 23    # left index - right index

# rough distance range in pixels (tweak for your framing)
MIN_DIST_PX = 20
MAX_DIST_PX = 400

def find_midi_port():
    midi_out = rtmidi.MidiOut()
    ports = midi_out.get_ports()
    print("Available MIDI ports:", ports)
    for i, name in enumerate(ports):
        if MIDI_PORT_NAME_SUBSTRING in name:
            midi_out.open_port(i)
            print(f"Opened MIDI port: {name}")
            return midi_out

    # fallback: virtual port
    midi_out.open_virtual_port("PoseHandControl")
    print("Opened virtual MIDI port: PoseHandControl (no IAC found)")
    return midi_out

def normalize_to_cc(dist_px, min_d=MIN_DIST_PX, max_d=MAX_DIST_PX):
    d = max(min_d, min(max_d, dist_px))  # clamp
    norm = (d - min_d) / (max_d - min_d)
    return int(norm * 127)

def send_cc(midi_out, cc_number, value, channel=MIDI_CHANNEL):
    status = 0xB0 | (channel & 0x0F)
    value = max(0, min(127, int(value)))
    msg = [status, cc_number & 0x7F, value]
    midi_out.send_message(msg)

midi_out = find_midi_port()

# -------------- MEDIAPIPE SETUP -------------- #

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)

hands = mp_hands.Hands(
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5,
)

print("Press 'q' to quit.")

last_send = 0
send_interval = 1/30.0  # send MIDI ~30 times per second

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Mirror frame
    frame = cv2.flip(frame, 1)

    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)

    h, w, _ = frame.shape

    # Store fingertip coordinates
    coords = {
        "Left": {"thumb": None, "index": None},
        "Right": {"thumb": None, "index": None},
    }

    if results.multi_hand_landmarks and results.multi_handedness:
        for hand_landmarks, handedness in zip(
            results.multi_hand_landmarks, results.multi_handedness
        ):
            label = handedness.classification[0].label  # "Left" or "Right"

            thumb_tip = hand_landmarks.landmark[4]
            index_tip = hand_landmarks.landmark[8]

            t_x, t_y = int(thumb_tip.x * w), int(thumb_tip.y * h)
            i_x, i_y = int(index_tip.x * w), int(index_tip.y * h)

            coords[label]["thumb"] = (t_x, t_y)
            coords[label]["index"] = (i_x, i_y)

            # draw fingertip markers
            cv2.circle(frame, (t_x, t_y), 8, (0, 255, 0), -1)   # thumb: green
            cv2.circle(frame, (i_x, i_y), 8, (0, 0, 255), -1)   # index: red

            # thumb ↔ index line on this hand
            cv2.line(frame, (t_x, t_y), (i_x, i_y), (255, 0, 0), 2)

            # Optional: skeleton
            mp_drawing.draw_landmarks(
                frame,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS
            )

    # Helper: compute distance & angle and draw label
    def draw_line_info(p1, p2, label_text, color, cc_number=None):
        if p1 is None or p2 is None:
            return None

        x1, y1 = p1
        x2, y2 = p2

        # distance in pixels
        dist = math.sqrt((x2 - x1)**2 + (y2 - y1)**2)

        # angle in degrees (atan2 handles full circle)
        # Note: y axis is inverted (top-left origin), but it's OK as long as we're consistent.
        angle = math.degrees(math.atan2((y2 - y1), (x2 - x1)))

        # draw the line
        cv2.line(frame, (x1, y1), (x2, y2), color, 2)

        # text near midpoint
        mid_x = int((x1 + x2) / 2)
        mid_y = int((y1 + y2) / 2) - 10
        text = f"{label_text} d={normalize_to_cc(dist):.1f}px a={angle:.1f}°"
        cv2.putText(
            frame, text, (mid_x, mid_y),
            cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, cv2.LINE_AA
        )

        # optional MIDI
        if cc_number is not None and midi_out is not None:
            cc_val = normalize_to_cc(dist)
            send_cc(midi_out, cc_number, cc_val)

        return dist, angle

    now = time.time()
    send_midi_now = (now - last_send) > send_interval

    # Left and right fingertips
    left_thumb = coords["Left"]["thumb"]
    left_index = coords["Left"]["index"]
    right_thumb = coords["Right"]["thumb"]
    right_index = coords["Right"]["index"]

    # Same-hand lines (left, right)
    if send_midi_now:
        draw_line_info(left_thumb, left_index,  "L T-I", (255, 0, 0), CC_LEFT_HAND)
        draw_line_info(right_thumb, right_index, "R T-I", (255, 0, 0), CC_RIGHT_HAND)

        # Cross-hand: thumb–thumb
        draw_line_info(left_thumb, right_thumb, "T-T", (0, 255, 255), CC_THUMB_TO_THUMB)

        # Cross-hand: index–index
        draw_line_info(left_index, right_index, "I-I", (255, 255, 0), CC_INDEX_TO_INDEX)

        last_send = now
    else:
        # draw without sending MIDI (angle/dist still useful visually)
        draw_line_info(left_thumb, left_index,  "L T-I", (255, 0, 0), None)
        draw_line_info(right_thumb, right_index, "R T-I", (255, 0, 0), None)
        draw_line_info(left_thumb, right_thumb, "T-T", (0, 255, 255), None)
        draw_line_info(left_index, right_index, "I-I", (255, 255, 0), None)

    cv2.imshow("Hand demo – distances, angles, MIDI", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()
hands.close()

Available MIDI ports: ['IAC Driver Bus 1', 'IAC Driver PoseCV', 'MPK mini 3']
Opened MIDI port: IAC Driver Bus 1
Press 'q' to quit.


I0000 00:00:1764786913.333907 2689231 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 90.5), renderer: Apple M1 Max
W0000 00:00:1764786913.341232 2707866 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1764786913.345977 2707866 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
