In [6]:
!pip install pygame


Collecting pygame
  Downloading pygame-2.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading pygame-2.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.0 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.0/14.0 MB[0m [31m5.3 MB/s[0m  [33m0:00:02[0ma [36m0:00:01[0m[36m0:00:01[0m02[0m
[?25hInstalling collected packages: pygame
Successfully installed pygame-2.6.1


In [4]:
import sys
print(sys.version)

3.11.13 (main, Jun  5 2025, 13:12:00) [GCC 11.2.0]


In [None]:
import cv2
import time
import os
import numpy as np
import pygame
try:
    import mediapipe as mp
except Exception as e:
    raise ImportError(
        "Could not import mediapipe. Install with 'pip install mediapipe'.\n" + str(e)
    )

pygame.mixer.init()

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

CHORD_KEYS = {
    ord('1'): 'A', ord('2'): 'B', ord('3'): 'C', ord('4'): 'D',
    ord('5'): 'E', ord('6'): 'F', ord('7'): 'G',
    ord('a'): 'A', ord('b'): 'B', ord('c'): 'C', ord('d'): 'D',
    ord('e'): 'E', ord('f'): 'F', ord('g'): 'G',
}

chords = ['A','B','C','D','E','F','G']
selected_chord = 'A'

CHORD_IMG_DIR = 'chords'
SOUNDS_DIR = 'sounds'

STRUM_TOP_RATIO = 0.35
STRUM_BOTTOM_RATIO = 0.75
NUM_STRINGS = 6

HITBOX_TOP_RATIO = 0.38
HITBOX_BOTTOM_RATIO = 0.72

strum_cooldown = 0.18 

STRING_WAV_MAPPING = {
    0: 'E3.wav',  
    1: 'A3.wav',
    2: 'D44.wav',
    3: 'G44.wav',
    4: 'B44.wav',
    5: 'E5.wav',  
}

def load_chord_image(ch):
    path = os.path.join(CHORD_IMG_DIR, f"{ch}.png")
    if os.path.exists(path):
        img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
        if img is None:
            return None
        if img.shape[2] == 4:
            alpha = img[:,:,3] / 255.0
            img_rgb = img[:,:,:3].astype(np.float32)
            for c in range(3):
                img[:,:,c] = (alpha * img_rgb[:,:,c] + (1-alpha)*255).astype(np.uint8)
            img = img[:,:,:3]
        return img
    return None

chord_images = {c: load_chord_image(c) for c in chords}


last_play_times = [0]*NUM_STRINGS  

def play_string(string_index):
    now = time.time()
    if now - last_play_times[string_index] < strum_cooldown:
        return
    last_play_times[string_index] = now
    path = os.path.join(SOUNDS_DIR, STRING_WAV_MAPPING.get(string_index, 'E3.wav'))
    if not os.path.exists(path):
        print("Sound not found:", path)
        return
    try:
        sound = pygame.mixer.Sound(path)
        sound.play()
    except Exception as e:
        print("Error playing sound:", e)

cap = cv2.VideoCapture(0)
if not cap.isOpened():
    raise RuntimeError("Could not open webcam.")


is_in_top = False
top_cross_time = None

with mp_hands.Hands(
    max_num_hands=2,
    min_detection_confidence=0.6,
    min_tracking_confidence=0.6
) as hands:
    print("Controls: Press 1..7 or a..g to select chord A..G. Press 'q' to quit.")
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to read frame.")
            break

        frame = cv2.flip(frame, 1)
        h, w, _ = frame.shape

    
        strum_top = int(h * STRUM_TOP_RATIO)
        strum_bottom = int(h * STRUM_BOTTOM_RATIO)
        cv2.rectangle(frame, (0, strum_top-10), (w, strum_bottom+10), (230,230,230), -1)

        string_positions = []
        for i in range(NUM_STRINGS):
            y = int(strum_top + (i / (NUM_STRINGS-1)) * (strum_bottom - strum_top))
            string_positions.append(y)
            cv2.line(frame, (0, y), (w, y), (60,60,60), 2)

        hit_top = int(h * HITBOX_TOP_RATIO)
        hit_bottom = int(h * HITBOX_BOTTOM_RATIO)
        cv2.line(frame, (0, hit_top), (w, hit_top), (0,120,255), 2)
        cv2.line(frame, (0, hit_bottom), (w, hit_bottom), (0,120,255), 2)

        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb)

        if results.multi_hand_landmarks and results.multi_handedness:
            for hand_landmarks, handedness in zip(results.multi_hand_landmarks, results.multi_handedness):
                label = handedness.classification[0].label
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                lm12 = hand_landmarks.landmark[12]
                x_px = int(lm12.x * w)
                y_px = int(lm12.y * h)
                cv2.circle(frame, (x_px, y_px), 6, (0,255,0), -1)
                cv2.putText(frame, f"{label}", (x_px+8, y_px-8), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1)

                if label.lower() == "right":
                    for i, y_str in enumerate(string_positions):
                        if abs(y_px - y_str) < 12:  
                            play_string(i)
                            break

        instr_w = int(w * 0.20)
        instr_h = int(h * 0.25)
        instr_img = chord_images.get(selected_chord)
        if instr_img is not None:
            ih, iw = instr_img.shape[:2]
            scale = min(instr_w / iw, instr_h / ih)
            new_w = int(iw * scale)
            new_h = int(ih * scale)
            resized = cv2.resize(instr_img, (new_w, new_h))
            frame[5:5+new_h, 5:5+new_w] = resized
        else:
            cv2.rectangle(frame, (5,5), (5+instr_w, 5+instr_h), (50,50,50), -1)
            cv2.putText(frame, f"{selected_chord}", (12, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255,255,255), 2)

        cv2.putText(frame, f"Selected chord: {selected_chord}   (press 1..7 or a..g)", (10, h-20),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,255), 2)

        cv2.imshow("Virtual Guitar Prototype (press q to quit)", frame)

        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break
        if key in CHORD_KEYS:
            selected_chord = CHORD_KEYS[key]
            print("Selected chord:", selected_chord)

cap.release()
cv2.destroyAllWindows()


Controls: Press 1..7 or a..g to select chord A..G. Press 'q' to quit.


I0000 00:00:1759239786.599019   27915 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1759239786.601511   32787 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 Mesa 25.0.7-0ubuntu0.24.04.1), renderer: AMD Radeon Vega 3 Graphics (radeonsi, raven2, ACO, DRM 3.61, 6.14.0-29-generic)
W0000 00:00:1759239786.623168   32783 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1759239786.649673   32783 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Selected chord: A
