In [1]:
from collections import namedtuple

import xml
from xml.etree import ElementTree
from pprint import pprint


In [3]:
tree = ElementTree.parse('data/VocalSetScoresOld/excerpts_row.musicxml')
root = tree.getroot()

print(root)
for child in root:
    print(child)

<Element 'score-partwise' at 0x00000246DE25C400>
<Element 'work' at 0x00000246DE25C450>
<Element 'identification' at 0x00000246DE25C4F0>
<Element 'defaults' at 0x00000246DE25C900>
<Element 'credit' at 0x00000246DE25CEF0>
<Element 'part-list' at 0x00000246DE25D040>
<Element 'part' at 0x00000246DE25D4A0>


In [4]:
# A4 is 69
accidental2note = {'flat': -1, 'natural': 0, 'sharp': 1}
step2note = {'C': 0, 'D': 2, 'E': 4, 'F': 5, 'G': 7, 'A': 9, 'B': 11}
def pitch2note(step, octave, accidental='natural'):
    try:
        step = str(step)
        octave = int(octave)
        accidental = str(accidental)
        return step2note[step] + accidental2note[accidental] + (octave + 1) * 12
    except KeyError:
        raise ValueError(f"Step {step} or accidental {accidental} is not valid")

In [5]:
pitch2note('A', 4, 'sharp')

70

In [6]:
arpabet_vowels = {"AA", "AE", "AH", "AO", "AW", "AX", "AXR", "AY", "EH", "ER", "EY", "IH", "IX", "IY", "OW", "OY", "UH", "UW", "IX"}
arpabet_consonants = {"B", "CH", "D", "DH", "DX", "EL", "EM", "EN", "F", "G", "HH", "H", "JH", "K", "L", "M", "N", "NG", "NX", "P", "Q", "R", "S", "SH", "T", "TH", "V", "W", "WH", "Y", "Z", "ZH"}
arpabet = arpabet_vowels.union(arpabet_consonants)

arpabet2idx

Note = namedtuple('Note', ["pitch", "duration", "lyric"])

In [7]:
parts = {}
for part in root.findall('part-list/score-part'):
    parts[part.attrib['id']] = {'name': part.find('part-name').text}
for part_id in parts.keys():
    part = root.find(f'part[@id="{part_id}"]')
    measures = part.findall('measure')
    parsed_notes = []
    last_lyric = []
    for measure in measures:
        notes = measure.findall('note')
        for note in notes:
            duration = int(note.find("duration").text)
            is_rest = note.find("rest") is not None
            if not is_rest:
                pitch = note.find("pitch")
                step = pitch.find("step").text
                octave = int(pitch.find("octave").text)
                accidental = note.find("accidental")
                if accidental is None:
                    accidental = 'natural'
                else:
                    accidental = accidental.text
                lyric = note.find("lyric[@number='2']")
                if lyric is not None:
                    lyric = lyric.find("text").text.replace("\xa0", " ").split(" ")
                    for phoneme in lyric:
                        assert phoneme in arpabet, f"Unexpected phoneme {phoneme}"
                else:
                    assert len(last_lyric) > 0, "Expected a lyric"
                    lyric = []
                    last_phoneme = last_lyric[-1]
                    if last_phoneme in arpabet_consonants:
                        # Pop it and carry it over, and repeat the vowel
                        lyric.append(last_lyric.pop(-1))
                        lyric.insert(0, last_lyric[-1])
                    else:
                        # Just repeat the vowel
                        lyric.append(last_lyric[-1])
                last_lyric = lyric
                note = pitch2note(step, octave, accidental)
                parsed_notes.append(Note(note, duration, lyric))
            else:
                parsed_notes.append(Note(None, duration, None))
    parts[part_id]['notes'] = parsed_notes
pprint(parts)

{'P1': {'name': 'Voice',
        'notes': [Note(pitch=60, duration=12, lyric=['R', 'OW']),
                  Note(pitch=60, duration=12, lyric=['R', 'OW']),
                  Note(pitch=60, duration=9, lyric=['R', 'OW']),
                  Note(pitch=62, duration=3, lyric=['Y', 'AO', 'R']),
                  Note(pitch=64, duration=12, lyric=['B', 'OW', 'T']),
                  Note(pitch=64, duration=9, lyric=['JH', 'EH', 'N', 'T']),
                  Note(pitch=62, duration=3, lyric=['L', 'IY']),
                  Note(pitch=64, duration=9, lyric=['D', 'AW', 'N']),
                  Note(pitch=65, duration=3, lyric=['DH', 'AH']),
                  Note(pitch=67, duration=24, lyric=['S', 'T', 'R', 'IY', 'M']),
                  Note(pitch=72, duration=4, lyric=['M', 'EH']),
                  Note(pitch=72, duration=4, lyric=['R', 'AH']),
                  Note(pitch=72, duration=4, lyric=['L', 'IY']),
                  Note(pitch=67, duration=4, lyric=['M', 'EH']),
                  N

In [9]:
from utils.musicxml import parse_musicxml
parse_musicxml('data/VocalSetScoresOld/long_tones.musicxml', constant_phoneme="AH")

{'P1': {'name': 'Voice',
  'notes': [Note(pitch=60, duration=4, lyric=['AH']),
   Note(pitch=72, duration=4, lyric=['AH']),
   Note(pitch=77, duration=4, lyric=['AH'])]}}