In [39]:
import music21
from fractions import Fraction

In [None]:
music_xml_path = "data/1/Qmb26rWhtURDr6GU4rRWqtNxLzGa9BWjXug5YvU2MF1f8R.mxl" # time signatures and key signatures changes + voices

# music_xml_path = "data/1/Qmb2VvLHxqG5UQgWXT5DjmZytgUAuQWW359JavSjxaEton.mxl" # triplets

# music_xml_path = "data/1/Qmb2EEueDXEq8G6WdxQhmttjPrjQLMPpYm8kDTEBsYD2kR.mxl" # parts

# music_xml_path = "data/1/Qmb2d6ptFFaeCxUJJiPLzXPKXJZgwGYLk2r9U7aBpL1rmC.mxl" # 

# music_xml_path = "data/1/Qmb2a9Fva12WXodCP6drWC8GsvhEf8ev3LUCpWMFWfEfzR.mxl" # basic
score = music21.converter.parse(music_xml_path)

In [None]:
tokens = ["<BOS>"]
for element in score.recurse():
    print(element)
    if isinstance(element, music21.stream.Part):
        tokens.append(f"PART_{element.id}")
    elif isinstance(element, music21.clef.Clef):
        tokens.append(f"CLEF_{element.sign}_{element.line}")
    elif isinstance(element, music21.meter.TimeSignature):
        tokens.append(f"TIME_SIG_{element.numerator}/{element.denominator}")
    elif isinstance(element, music21.key.KeySignature):
        if isinstance(element, music21.key.Key):
            # Key is a subclass of KeySignature, already handled above
            # but just in case ordering matters:
            tokens.append(f"KEY_{element.tonic.name}_{element.mode}")
        else:
            # Pure KeySignature: infer tonic, assume major
            # sharps > 0 = sharps, sharps < 0 = flats
            sharps = element.sharps
            key_obj = element.asKey(mode='major')
            tokens.append(f"KEY_{key_obj.tonic.name}_{key_obj.mode}")
    elif isinstance(element, music21.stream.Measure):
        tokens.append(f"BAR_{element.number}")
    elif isinstance(element, music21.note.Note):
        tokens.append(f"POS_BAR_{element.offset}")
        tokens.append(f"POS_ABS_{element.getOffsetInHierarchy(score)}")
        tokens.append(f"PITCH_{element.pitch.nameWithOctave}")
        tokens.append(f"DUR_{element.duration.quarterLength}")
    elif isinstance(element, music21.chord.Chord):
        pitches = element.pitches
        for pitch in pitches:
            tokens.append(f"POS_BAR_{element.offset}")
            tokens.append(f"POS_ABS_{element.getOffsetInHierarchy(score)}")
            tokens.append(f"PITCH_{pitch.nameWithOctave}")
            tokens.append(f"DUR_{element.duration.quarterLength}")
    elif isinstance(element, music21.note.Rest):
        tokens.append(f"REST_{element.duration.quarterLength}")
    elif isinstance(element, music21.stream.Voice):
        tokens.append(f"VOICE_{element.id}")
tokens.append("<EOS>")
print(tokens)

<music21.text.TextBox 'Sonate No....'>
<music21.text.TextBox 'Call, Leon...'>
<music21.metadata.Metadata object at 0x74f7a4959910>
<music21.stream.Part Classical Guitar>
P1: Classical Guitar: Classical Guitar
<music21.stream.Measure 1 offset=0.0>
<music21.expressions.TextExpression 'Adagio'>
<music21.expressions.TextExpression '\ue520'>
<music21.layout.SystemLayout>
<music21.clef.Treble8vbClef>
<music21.tempo.MetronomeMark grave Dotted Quarter=40>
<music21.key.KeySignature of no sharps or flats>
<music21.meter.TimeSignature 6/8>
<music21.stream.Voice 1>
<music21.chord.Chord A3 C4>
<music21.chord.Chord A3 C4>
<music21.chord.Chord A3 C4>
<music21.note.Note D>
<music21.note.Note C>
<music21.stream.Voice 4>
<music21.note.Note A>
<music21.note.Note A>
<music21.note.Note A>
<music21.spanner.Slur <music21.note.Note F><music21.note.Note E>>
<music21.spanner.Slur <music21.note.Note D><music21.note.Note C>>
<music21.spanner.Slur <music21.note.Note B><music21.note.Note A>>
<music21.spanner.Slur <

In [None]:
def detokenize(tokens):
    score = music21.stream.Score()
    current_part = None
    current_measure = None
    current_note = None
    current_pos_bar = None  # track current bar offset
    pending_notes = []      # buffer notes at same offset
    pending_pos = None      # offset those notes belong to

    def flush_pending(target):
        """Flush buffered notes as a Note or Chord into target."""
        nonlocal pending_notes, pending_pos, current_part
        if not pending_notes:
            print("flush_pending: no pending notes to flush")
            return
        if len(pending_notes) == 1:
            print("adding", pending_notes[0], "to", target)
            target.append(pending_notes[0])
        else:
            print("adding chord", pending_notes, "to", target)
            chord = music21.chord.Chord(pending_notes)
            chord.duration = pending_notes[0].duration  # all share same duration
            target.append(chord)
        pending_notes = []
        pending_pos = None

    for token in tokens:
        if token == "<BOS>":
            continue

        elif token.startswith("PART_"):
            target = current_measure if current_measure is not None else current_part
            flush_pending(target)
            part_id = token.split("_", 1)[1]
            current_part = music21.stream.Part(id=part_id)
            current_part.partName = part_id
            score.append(current_part)
            current_measure = None
            current_note = None
            current_pos_bar = None

        elif token.startswith("TIME_SIG_") and current_part is not None:
            sig = token.split("_", 2)[2]
            numerator, denominator = sig.split("/")
            ts = music21.meter.TimeSignature(f"{numerator}/{denominator}")
            target = current_measure if current_measure is not None else current_part
            target.append(ts)

        elif token.startswith("KEY_") and current_part is not None:
            parts = token.split("_", 2)
            key = music21.key.Key(parts[1], parts[2])
            target = current_measure if current_measure is not None else current_part
            target.append(key)

        elif token.startswith("CLEF_") and current_part is not None:
            target = current_measure if current_measure is not None else current_part
            flush_pending(target)
            _, sign, line = token.split("_")
            clef = music21.clef.clefFromString(sign + line)
            target = current_measure if current_measure is not None else current_part
            target.append(clef)

        elif token.startswith("BAR_") and current_part is not None:
            target = current_measure if current_measure is not None else current_part
            flush_pending(target)
            number = int(token.split("_", 1)[1])
            current_measure = music21.stream.Measure(number=number)
            current_part.append(current_measure)
            current_pos_bar = None

        elif token.startswith("POS_BAR_"):
            # New position: if it differs from pending, flush first
            new_pos = float(Fraction(token.split("_", 2)[2]))
            if pending_notes and new_pos != pending_pos:
                target = current_measure if current_measure is not None else current_part
                flush_pending(target)
            current_pos_bar = new_pos

        elif token.startswith("POS_ABS_"):
            continue

        elif token.startswith("PITCH_") and current_part is not None:
            pitch = token.split("_", 1)[1]
            current_note = music21.note.Note(pitch)

        elif token.startswith("DUR_") and current_part is not None:
            duration = float(Fraction(token.split("_", 1)[1]))
            if current_note is not None:
                current_note.duration.quarterLength = duration
                # Buffer the note; it will be flushed when offset changes
                pending_notes.append(current_note)
                pending_pos = current_pos_bar
                current_note = None

        elif token.startswith("REST_") and current_part is not None:
            target = current_measure if current_measure is not None else current_part
            flush_pending(target)            
            duration = float(Fraction(token.split("_", 1)[1]))           
            rest = music21.note.Rest()
            rest.duration.quarterLength = duration
            target = current_measure if current_measure is not None else current_part
            target.append(rest)
        
        elif token.startswith("VOICE_") and current_part is not None:
            target = current_measure if current_measure is not None else current_part
            flush_pending(target)            
            voice_id = token.split("_", 1)[1]
            voice = music21.stream.Voice(id=voice_id)
            target.append(voice)

        elif token == "<EOS>":
            target = current_measure if current_measure is not None else current_part
            flush_pending(target)                 

    for part in score.parts:
        measures = part.getElementsByClass(music21.stream.Measure)
        if measures:
            last_measure = measures[-1]
            last_measure.rightBarline = music21.bar.Barline(type='final')

    score.write("musicxml", fp="detokenized_score.mxl")
    return score

In [43]:
detokenized_score = detokenize(tokens)

flush_pending: no pending notes to flush
flush_pending: no pending notes to flush
flush_pending: no pending notes to flush
adding chord [<music21.note.Note A>, <music21.note.Note C>] to <music21.stream.Measure 1 offset=0.0>
adding chord [<music21.note.Note A>, <music21.note.Note C>] to <music21.stream.Measure 1 offset=0.0>
adding chord [<music21.note.Note A>, <music21.note.Note C>] to <music21.stream.Measure 1 offset=0.0>
adding <music21.note.Note D> to <music21.stream.Measure 1 offset=0.0>
adding <music21.note.Note C> to <music21.stream.Measure 1 offset=0.0>
adding <music21.note.Note A> to <music21.stream.Measure 1 offset=0.0>
adding <music21.note.Note A> to <music21.stream.Measure 1 offset=0.0>
adding <music21.note.Note A> to <music21.stream.Measure 1 offset=0.0>
adding chord [<music21.note.Note G#>, <music21.note.Note B>] to <music21.stream.Measure 2 offset=6.0>
adding chord [<music21.note.Note G#>, <music21.note.Note B>] to <music21.stream.Measure 2 offset=6.0>
adding chord [<music