In [1]:
from ultralytics import YOLO
from glob import glob
import cv2
import numpy as np
from midiutil import MIDIFile

model = YOLO('models/notes_detection/V2/train/weights/best.pt')

In [2]:
NUM = 5

def show_img(image, name):
    cv2.namedWindow(name, cv2.WINDOW_NORMAL)
    cv2.imshow(name, image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
def get_black_poses(arr):
    poses = []
    for i in range(len(arr)):
        if arr[i] in range(0, 21):
            poses.append(i)
    return poses    
    
detected_staffs_path = 'detected_staffs/'
staffs_file_names = glob(detected_staffs_path + '*.jpg')

In [3]:
staff = cv2.imread(staffs_file_names[NUM])
detected_notes = model(staff)

# Boxes
boxes = detected_notes[0].boxes.cpu().numpy()
boxes_coords = {}
for i, box in enumerate(boxes):
    cls = int(box.cls[0])
    xywh = box.xywh

    x = xywh[0, 0]
    y = xywh[0, 1]
    w = xywh[0, 2]
    h = xywh[0, 3]

    boxes_coords.update({(i, cls): [x, y, w, h]})
     
sorted_boxes_coords = dict(sorted(boxes_coords.items(), key=lambda item: item[1][0]))  

# Get lines
lines = []
for key, value in sorted_boxes_coords.items():
    if key[1] == 12:
        lines.append(value)

left_help_lines_coords = []
right_help_lines_coords = []

for line in lines:
    x = line[0]
    y = line[1]
    w = line[2]
    h = line[3]
    #img = cv2.imread(staffs_file_names[NUM], cv2.IMREAD_GRAYSCALE)
    staff_g = cv2.cvtColor(staff, cv2.COLOR_BGR2GRAY)
    
    #print('Img', type(img))
    print('Staff', type(staff))
    
    left_column = staff_g[int(y-h/2) : int(y+h/2), int(x-w/4)]
    right_column = staff_g[int(y-h/2) : int(y+h/2), int(x+w/4)]

    print('Left Column', type(left_column), left_column)
    
    left_line = get_black_poses(left_column)
    right_line = get_black_poses(right_column)
    
    # main lines
    if len(left_line) == 5:
        left_main_lines_coords = [(int(x+w/4), int(y-h/2 + pose)) for pose in left_line]
        left_help_lines_coords.append(left_main_lines_coords)

    if len(right_line) == 5:
        right_main_lines_coords = [(int(x+w/4), int(y-h/2 + pose)) for pose in right_line]
        right_help_lines_coords.append(right_main_lines_coords)
         
        
y_lines_coords = [item[1] for item in right_help_lines_coords[0]]        
print('Y:', y_lines_coords)        


note_line1_y = y_lines_coords[0]
note_line2_y = y_lines_coords[1]
note_line3_y = y_lines_coords[2]
note_line4_y = y_lines_coords[3]
note_line5_y = y_lines_coords[4]

h = note_line2_y - note_line1_y

# alpha
# midi_pitch: line_number
alphabet = {
    21: 26.5, 23: 26, 24: 25.5, 26: 25, 28: 24.5, 29: 24,
    31: 23.5, 33: 23, 35: 22.5, 36: 22, 38: 21.5, 40: 21,
    41: 20.5, 43: 20, 45: 19.5, 47: 19, 48: 18.5, 50: 18,
    52: 17.5, 53: 17, 55: 16.5, 57: 16, 59: 15.5, 60: 15,
    62: 14.5, 64: 14, 65: 13.5, 67: 13, 69: 12.5, 71: 12,
    72: 11.5, 74: 11, 76: 10.5, 77: 10, 79: 9.5, 81: 9, 
    83: 8.5,  84: 8, 86: 7.5, 88: 7, 89: 6.5, 91: 6,
    93: 5.5, 95: 5, 96: 4.5, 98: 4, 100: 3.5, 101: 3,
    103: 2.5, 105: 2, 107: 1.5, 108: 1
}

# find all clefs
clef_classes = [0, 1, 27]
clef_dict = {}
for key, value in sorted_boxes_coords.items():
    if key[1] in clef_classes:
        clef_dict[key] = value

# Notes
notes_classes = [10, 11, 14, 15, 17]
notes_elements = {}

for key, value in sorted_boxes_coords.items():
    if key[1] in notes_classes:
        notes_elements.update({key: value}) 

# Create all 26 lines
all_lines_y = {}
first_key = list(clef_dict.keys())[0][1]

h = y_lines_coords[1] - y_lines_coords[0]
all_keys = np.arange(1.5, 25.6, 0.5)

for key in notes_elements.keys():
    if first_key == 1:     
        # Bass-clef
        all_lines_y[1] = y_lines_coords[0] - 15*h
        all_lines_y[26.5] = y_lines_coords[4] + 6*h
    else:
        # NOT a Bass-clef
        all_lines_y[1] = y_lines_coords[0] - 9*h
        all_lines_y[26.5] = y_lines_coords[4] + 12*h
        
    for i, key in enumerate(all_keys):
        all_lines_y[key] = all_lines_y[1] + h*(i+1)/2   
        
all_lines_y = dict(sorted(all_lines_y.items()))   
numbered_all_lines_dict = {i: (key, value) for i, (key, value) in enumerate(all_lines_y.items(), start=0)}
  
# Get notes y-es
lines_y_values = np.array(list(all_lines_y.values()))

notes_classes = [10, 11, 14, 15, 17]
note_line_dict = {}

for key, value in notes_elements.items():
    if key[1] in notes_classes:
        line_ind = np.argmin(np.abs(lines_y_values-value[1])) 
        note_line_dict.update({key[0]: numbered_all_lines_dict[line_ind][0]})

# Get midi seq
midi_seq = []
for id, line_num in note_line_dict.items():
    for pitch, line_num2 in alphabet.items():
        if line_num == line_num2:
            midi_seq.append(pitch)
            
# generate midi file  
def create_midi(pitch_array, duration, output_file):
    midi = MIDIFile(1)

    track = 0
    time = 0

    midi.addTrackName(track, time, "Sample Track") 
    midi.addTempo(track, time, 120)

    channel = 0
    volume = 100

    for p in pitch_array:
        midi.addNote(track, channel, p, time, duration, volume)
        time += 1
        
    with open(output_file, "wb") as f:
        midi.writeFile(f)

duration = 960 
output_file = "output.mid"

create_midi(midi_seq, duration, output_file)

0: 64x640 1 bemol, 1 four, 3 lines, 17 quarter-notes, 4 quarter-rests, 1 rest, 1 treble, 44.2ms
Speed: 1.5ms preprocess, 44.2ms inference, 340.6ms postprocess per image at shape (1, 3, 64, 640)
Staff <class 'numpy.ndarray'>
Left Column <class 'numpy.ndarray'> [255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255
 255 255 255]
Staff <class 'numpy.ndarray'>
Left Column <class 'numpy.ndarray'> [255 255 102   1 253 255 255 255 255 254 252 255 255 255 255 253 255 255 236   1 116 255 254 255 255 254 254 255 255 255 255 255 255 255 255 119   0 237 255 251 255 255 253 255 255 255 255 255 255 255 255 255   2 104 253 254 255 254 255 255 255 255 255 255 255 255 255 255 139   0 205 254 254 255 255]
Staff <class 'numpy.ndarray'>
Left Column

In [4]:
show_img(staff, 'ddd')