In [1]:
import xmltodict
import os
import json
import pickle
from lxml import etree

encoding = "utf-8"


def load_data(file_path):
    with open(file_path, "r", encoding=encoding) as f:
        content = f.read()
    content = content.replace('%20', '')
    return content


def xml_parser(content):
    parser = etree.XMLParser(recover=True, encoding=encoding)
    root = etree.fromstring(content, parser=parser)
    return root


def get_metadata(root):
    meta_list = ['title', 'beats_in_measure', 'BPM', 'key', 'YouTubeID', 'mode']

    metadata = dict()
    for e in meta_list:
        tag = root.find('.//' + e)
        metadata[e] = tag.text if tag is not None else None

    # duration
    tag = root.find('.//duration')
    tag = root.find('.//section_duration') if tag is None else tag
    metadata['duration'] = tag.text if tag is not None else None

    version = root.find('version')
    version = version.text if version is not None else None
    return metadata, version


def get_lead_sheet(root, version):
    segments_tag = root.findall('.//segment')

    # set chord tag according to version
    chord_tag = 'chords' if root.tag == 'super' else 'harmony'

    segment_list = []
    num_measures = 0

    for segment in segments_tag:
        num_measure = float(segment.find('numMeasures').text)

        # melody
        note_tags = segment.findall('.//notes/note')
        note_list = [xmltodict.parse(etree.tostring(n))['note'] for n in note_tags] if note_tags else []

        # chord
        chord_tags = segment.findall('.//' + chord_tag + '/chord')
        chord_list = [xmltodict.parse(etree.tostring(c))['chord'] for c in chord_tags] if chord_tags else []

        segment_list.append({'notes': note_list, 'chords': chord_list, 'num_measure': num_measure})
        num_measures += num_measure

    return segment_list, num_measures


def event_localization(measure_offset, start_beat_abs, duration):
    event_on = measure_offset + start_beat_abs
    event_off = measure_offset + start_beat_abs + duration
    return event_on, event_off

def segments_parser(segments, mode, beats_in_measure):
    measure_counter = 0

    chord_track = []
    melody_track = []

    for sidx, segment in enumerate(segments):
        measure_offset = measure_counter * float(beats_in_measure)

        for chord in segment['chords']:
            chord_track.append(proc_object(chord, measure_offset, type_='chord'))
        for note in segment['notes']:
            melody_track.append(proc_object(note, measure_offset, type_='note'))

        measure_counter += int(segment['num_measure'])

    return melody_track, chord_track


def proc_xml(file_path, save_path=None, name='tab', save_type='pickle'):
    content = load_data(file_path)
    root = xml_parser(content)
    metadata, version = get_metadata(root)
    segments, num_measures = get_lead_sheet(root, version)

    mode = int(metadata['mode']) if metadata['mode'] is not None else 1
    beats_in_measure = int(metadata['beats_in_measure'])

    melody, chord = segments_parser(segments, mode, beats_in_measure)

    data = {
        'version': version,
        'metadata': metadata,
        'tracks': {
            'melody': melody,
            'chord': chord,
        },
        'num_measures': num_measures,
    }

    if save_path:
        if not os.path.exists(save_path):
            os.makedirs(save_path)

        if save_type is 'pickle':
            file_name = os.path.join(save_path, name+'.pickle')
            with open(file_name, 'wb') as handle:
                pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)
        elif save_type is 'json':
            file_name = os.path.join(save_path, name+'.json')
            with open(file_name, 'w') as handle:
                json.dump(data, handle)
        else:
            raise ValueError('Unkown type for saving')

    return data


def traverse_dir(root_dir, extension='.xml'):
    file_list = []
    for root, dirs, files in os.walk(root_dir):
        for file in files:
            if file.endswith(extension):
                file_list.append(os.path.join(root, file))

    return file_list


def get_postfix_dirpath(filename, idx=-4):
    path = os.path.normpath(filename)
    dir_list = path.split(os.sep)
    new_path = ''

    for d in dir_list[idx:]:
        new_path = os.path.join(new_path, d)

    return new_path


def proc_dir(file_list, root):
    num_file = len(file_list)
    for fidx in range(num_file):
        print('(%d/%d)' % (fidx, num_file))
        file_path = file_list[fidx]
        save_path = root
        name = os.path.basename(file_path)
        proc_xml(file_path, save_path=save_path, name=name, save_type='json')


In [2]:
# mode = int(metadata['mode']) if metadata['mode'] is not None else 1
# beats_in_measure = int(metadata['beats_in_measure'])

# melody, chord = segments_parser(segments, mode, beats_in_measure)

In [3]:
from pathlib import Path

In [4]:
version = 'v7'
data_path = Path('data/midi')
version_path = data_path/version
orig_path = version_path/'midi_sources'

In [5]:
from fastai.data_block import get_files

In [6]:
h_path = orig_path/'hooktheory'

In [7]:
files = get_files(h_path, extensions=['.xml'], recurse=True); files[:10]

[PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/wayne-sharpe/yu-gi-oh-theme-song/chorus.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/wayne-sharpe/yu-gi-oh-theme-song/intro.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/what-a-day/kiefer/chorus.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/whiteflame/senbonzakura/pre-chorus.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/whiteflame/senbonzakura/verse.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/whiteflame/senbonzakura/chorus.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/wham/last-christmas/verse.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/wham/last-christmas/chorus.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/wham/last-christmas/intro.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/wham/freedom/chorus.xml')]

In [8]:
# Loading from specific file
keywords = ['get-lucky', 'daft-punk']
def contains_keywords(f): return all([k in str(f) for k in keywords])
search = [f for f in files if contains_keywords(f)]; search

[PosixPath('data/midi/v7/midi_sources/hooktheory/xml/d/daft-punk/get-lucky/outro.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/d/daft-punk/get-lucky/pre-chorus-and-chorus.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/d/daft-punk/get-lucky/verse.xml')]

In [11]:
import pprint

file_path = search[1]
# raw_info = proc_xml(file_path)
# pprint.pprint(raw_info)



In [12]:
content = load_data(file_path)
root = xml_parser(content)
metadata, version = get_metadata(root)
segments, num_measures = get_lead_sheet(root, version)

In [13]:
len(segments)

3

In [14]:
segments

[{'notes': [OrderedDict([('start_beat_abs', '0'),
                ('start_measure', '1'),
                ('start_beat', '1'),
                ('note_length', '2.5'),
                ('scale_degree', '3'),
                ('octave', '0'),
                ('isRest', '0')]),
   OrderedDict([('start_beat_abs', '2.5'),
                ('start_measure', '1'),
                ('start_beat', '3.5'),
                ('note_length', '0.5'),
                ('scale_degree', '2'),
                ('octave', '0'),
                ('isRest', '0')]),
   OrderedDict([('start_beat_abs', '3'),
                ('start_measure', '1'),
                ('start_beat', '4'),
                ('note_length', '0.5'),
                ('scale_degree', '3'),
                ('octave', '0'),
                ('isRest', '0')]),
   OrderedDict([('start_beat_abs', '3.5'),
                ('start_measure', '1'),
                ('start_beat', '4.5'),
                ('note_length', '3'),
                ('scale_degree',

In [26]:
import music21

### Create config file

In [27]:
config = dict(sustain=True, sep_octave=True, default_octave=4)

### Constants

In [16]:
# note to number
# 

### Classes

In [36]:
from dataclasses import dataclass
import dataclasses
from typing import Dict, Any, AnyStr, List, Sequence, TypeVar, Tuple, Optional, Union

In [18]:
@dataclass
class Base:
    @classmethod
    def parse(cls, d):
        cls_keys = cls.__dataclass_fields__.keys()
        kwargs = {key:d[key] for key in cls_keys}
        return cls(**kwargs)

In [127]:
@dataclass
class HMetadata(Base):
    title:str
    BPM:str='120'
    beats_in_measure:str='4'
    key:str='1'
    mode:str='1'

In [43]:
@dataclass
class HNote(Base):
    start_beat_abs: str
    start_measure: str
    start_beat: str
    note_length: str
    scale_degree: str
    octave: str
        
    def to_m21(self)->music21.note.Note:
        if self.scale_degree == 'rest':
            n = music21.note.Rest(quarterLength=note_length)
        else:
            n = music21.note.Note(quarterLength=note_length)
            n.octave = int(self.octave)+config.default_octave
        n.offset = self.start_beat_abs
        return n

In [58]:
@dataclass
class HChord(Base):
#     sd: str
#     fb: str
#     sec: str
    composition:List[int]
    chord_duration:str
    start_measure:str
    start_beat:str
    start_beat_abs:str
#     sus: str = None
#     borrowed:str = None
#     alternate:str = None
        
    def to_m21(self)->music21.chord.Chord:
        if self.scale_degree == 'rest':
            n = music21.note.Rest(quarterLength=note_length)
        else:
            n = music21.note.Note(quarterLength=note_length)
            n.octave = int(self.octave)+config.default_octave
        n.offset = self.start_beat_abs
        return n
    
    
    @classmethod
    def parse(cls, d, mode, key_offset):
        ns = [HNote.parse(n) for n in d.get('notes', [])]
        cs = [HChord.parse(c) for c in d.get('chords', [])]
        return cls(notes=ns, chords=cs)

In [59]:
@dataclass
class HPart(Base):
    notes: List[HNote]
    chords: List[HChord]
        
    @classmethod
    def parse(cls, d, metadata):
        metadata['key']
        ns = [HNote.parse(n) for n in d.get('notes', [])]
        cs = [HChord.parse(c) for c in d.get('chords', [])]
        return cls(notes=ns, chords=cs)
        

In [60]:
@dataclass
class HSong(Base):
    metadata: HMetadata
    parts: List[HPart]
    
    @classmethod
    def parse(cls, metadata, segments):
        m = HMetadata.parse(metadata)
        ps = [HPart.parse(s, metadata) for s in segments]
        return cls(metadata=m, parts=ps)
    
    def to_stream(self):
        # music21 stream
        return None

In [61]:
s = HSong.parse(metadata, segments)

In [62]:
# translator
class Translator():
    @classmethod
    def to_numpy(cls, s:HSong):
        pass
    @classmethod
    def to_song(cls, arr):
        pass

In [63]:
from src import roman_to_symbol

In [64]:
c = s.parts[0].chords[0]

In [67]:
c_d = dataclasses.asdict(c)
c_d['isRest'] = False
c_d['event_on'] = 2
c_d['event_off'] = 4
c_d['event_duration'] = 4

In [74]:
c

HChord(sd='1', fb='7', sec=None, chord_duration='4', start_measure='1', start_beat='1', start_beat_abs='0', sus=None, borrowed=None, alternate=None)

In [76]:
from src import to_pianoroll

In [106]:
parsed_c = roman_to_symbol.chord_parser(c_d, '2', 2); parsed_c

OrderedDict([('root', 2),
             ('bass', 2),
             ('comp_vec',
              array([2, None, 5, None, 9, 12, None, None, None], dtype=object)),
             ('composition', array([2, 5, 9, 12], dtype=object)),
             ('quality', 'm'),
             ('chord_type', 7),
             ('chord_mode', -2),
             ('isRest', False),
             ('event_on', 2),
             ('event_off', 4),
             ('event_duration', 4),
             ('inv', 0),
             ('sus', None),
             ('alter', None),
             ('emb', []),
             ('alter_map', None),
             ('symbol', 'dm7')])

In [78]:
parsed_c['comp_vec']

array([0, None, 4, None, 7, 11, None, None, None], dtype=object)

In [97]:
parsed_c['composition']

array([2, 5, 9, 12], dtype=object)

In [95]:
music21.chord.Chord([0,4,7,11])

<music21.chord.Chord C E G B>

In [124]:
out = parsed_c['composition'].astype(int).tolist()

In [125]:
music21.chord.Chord(out)

<music21.chord.Chord D F A C0>

In [98]:
music21.chord.Chord(parsed_c['composition'].tolist())

ChordException: Could not process input argument 2

In [82]:
to_pianoroll.voicing(parsed_c)

OrderedDict([('root', 0),
             ('bass', 0),
             ('comp_vec',
              array([0, None, 4, None, 7, 11, None, None, None], dtype=object)),
             ('composition', array([0, 4, 7, 11], dtype=object)),
             ('quality', 'maj'),
             ('chord_type', 7),
             ('chord_mode', 0),
             ('isRest', False),
             ('event_on', 2),
             ('event_off', 4),
             ('event_duration', 4),
             ('inv', 0),
             ('sus', None),
             ('alter', None),
             ('emb', []),
             ('alter_map', None),
             ('symbol', 'Cmaj7')])

In [89]:
comp = to_pianoroll.to_chromagram(parsed_c['composition'])

In [90]:
comp

array([2, 5, 9, 0], dtype=object)

In [None]:
music21.chord.composition