In [35]:
import cv2 as cv
import torch
import xml.etree.ElementTree as ET  
from collections import namedtuple
import os
from os import listdir
from os.path import isfile, join
from matplotlib import pyplot as plt

In [102]:
Glyph = namedtuple('Glyph', 'name bbox')
BBox = namedtuple('Bbox', 'xmin xmax ymin ymax')

STRIDE_RATIO = 0.8
BOUNDARY_EXTRA = 4

class MusicFile:
    def __init__(self, filename='', height=0, space=0, column=0, row=0, rot=0, model=[], staff_start=[]):
        self.filename = filename
        self.height = height
        self.space = space
        self.column = column
        self.row = row
        self.rot = rot
        self.model = model
        self.staff_start = staff_start
        
        self.width = self.height = 0
    
    def compute_kernel(self):
        self.kernel_size = file.space + file.height    
        self.stride = int(kernel_size * STRIDE_RATIO)
        self.n_strides_per_staff = int(len(file.model) / stride)
        self.boundary_adjust = BOUNDARY_EXTRA * (file.height + file.space)
        
        self.staffs = [[[]] * n_strides_per_staff] * len(self.staff_start)
        
    # Naive implementation because of the small search space
    # No point into implementing a k-d tree
    def locate(self, glyphs):
        staff_pos = [staff + 2 * (self.space + self.height) + int(self.height / 2) for staff in self.staff_start]
        for glyph in glyphs:
            x = int((glyph.bbox.xmin + glyph.bbox.xmax) / 2) - self.column
            y = int((glyph.bbox.ymin + glyph.bbox.ymax) / 2) - self.row
            
            min_dist = self.height
            closest_staff_idx = 0
            for i in range(len(staff_pos)):
                dist = abs(y - staff_pos[i])
                if min_dist > dist:
                    min_dist = dist
                    closest_staff_idx = i
            
            closest_kernel_idx = int(float(x) / len(self.model) * self.n_strides_per_staff)
            self.staffs[closest_staff_idx][closest_kernel_idx - 1].append(glyph)

In [103]:
# Function that parses the staff xml and returns a MusicFile object
def staff_xml(fn):
    tree = ET.parse(fn)
    root = tree.getroot()
    assert root.tag == 'AutoScore'
    
    filename = root[0].text
    height = int(root[1][0].text)
    space = int(root[1][1].text)
    column = int(root[1][2].text)
    row = int(root[1][3].text)
    rot = float(root[1][4].text)
    model = [float(gradient) for gradient in str.split(root[1][5].text)]
    staffs = [int(staff.text) for staff in root[2]]

    return MusicFile(filename, height, space, column, row, rot, model, staffs)

'''
Parsing all the staff description xml files for both datasets that were 
generated by deepscores.cc and muscima.cc binaries
'''
artificial_fn = '../datasets/Artificial/'
artificial_files = [f for f in listdir(artificial_fn) if isfile(join(artificial_fn, f))]
handwritten_fn = '../datasets/Handwritten/'
handwritten_files = [f for f in listdir(handwritten_fn) if isfile(join(handwritten_fn, f))]

artificial_files = [staff_xml(join(artificial_fn, f)) for f in artificial_files]
handwritten_files = [staff_xml(join(handwritten_fn, f)) for f in handwritten_files]

In [104]:
# Parsing both datasets' groundtruths
deepscores_fn = '../../data/DEEPSCORES/images_png/'
deepscores_gt_fn = '../../data/DEEPSCORES/xml_annotations/'
muscima_fn = '../../data/MUSCIMA/CvcMuscima-Distortions/'
muscima_gt_fn = '../../data/MUSCIMA/v1.0/data/cropobjects_manual/'

# Functions that parses DEEPSCORES dataset and returns an array of glyphs
def deepscores_gt(file):
    tree = ET.parse(join(deepscores_gt_fn, os.path.splitext(file.filename)[0] + '.xml'))
    root = tree.getroot()
    assert root.tag == 'annotation'
    
    width = height = 0
    glyphs = []
    for node in root:
        if node.tag == 'size':
            width = int(node[0].text)
            height = int(node[1].text)
            continue
            
        elif node.tag != 'object':
            continue
            
        assert width and height
        name = node[0].text
        xmin, xmax = int(width * float(node[1][0].text)), int(width * float(node[1][1].text))
        ymin, ymax = int(height * float(node[1][2].text)), int(height * float(node[1][3].text))
        bbox = BBox(xmin, xmax, ymin, ymax)
        glyph = Glyph(name, bbox)
        glyphs.append(glyph)
            
    return glyphs, width, height

for file in artificial_files: 
    file.compute_kernel()
    glyphs, width, height = deepscores_gt(file)
    file.width, file.height = width, height
    file.locate(glyphs)
    for g in file.staffs[0][0]:
        print(g.bbox.xmin)

1885
1885
1835
1835
1745
1912
1912
1912
1922
1912
1912
1922
1912
2010
2010
2102
2112
2102
2102
2102
2112
2102
2102
2018
2008
2008
2008
2018
2008
2008
2008
1771
1771
1771
1781
1771
1771
1771
1771
1771
1771
1781
1771
1771
1773
1745
1771
1773
2482
2482
2482
2492
2482
2482
2482
2492
2391
2391
1286
1286
1286
1286
2200
2193
2200
2199
2199
2193
2199
2209
2199
2199
2199
2209
2389
2389
2399
2389
2389
2389
2399
2389
2363
2363
2293
2293
2303
2293
2293
2293
2303
2293
755
755
755
691
600
817
817
817
541
541
503
498
503
503
503
503
505
541
896
896
896
923
923
923
923
896
909
923
923
923
923
923
923
2134
147
2134
147
526
526
526
526
526
526
526
526
147
147
526
147
94
94
147
261
261
261
261
261
382
147
147
147
147
158
147
158
147
147
147
1341
1346
1341
1253
1253
1253
1253
1346
1346
1346
1346
1346
1346
1346
1346
1346
1510
1510
1536
1536
1536
1536
1538
1630
1630
1630
1630
1538
1443
1443
1348
1443
1348
1443
924
896
924
987
896
896
896
987
1157
1157
1157
1157
1158
1158
1063
1063
1063
1063
1037
1037


IndexError: list index out of range