In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import xmltodict
import os
import json
import pickle
from lxml import etree

encoding = "utf-8"


def load_data(file_path):
    with open(file_path, "r", encoding=encoding) as f:
        content = f.read()
    content = content.replace('%20', '')
    return content


def xml_parser(content):
    parser = etree.XMLParser(recover=True, encoding=encoding)
    root = etree.fromstring(content, parser=parser)
    return root


def get_metadata(root):
    meta_list = ['title', 'beats_in_measure', 'BPM', 'key', 'YouTubeID', 'mode']

    metadata = dict()
    for e in meta_list:
        tag = root.find('.//' + e)
        metadata[e] = tag.text if tag is not None else None

    # duration
    tag = root.find('.//duration')
    tag = root.find('.//section_duration') if tag is None else tag
    metadata['duration'] = tag.text if tag is not None else None

    version = root.find('version')
    version = version.text if version is not None else None
    return metadata, version


def get_lead_sheet(root, version):
    segments_tag = root.findall('.//segment')

    # set chord tag according to version
    chord_tag = 'chords' if root.tag == 'super' else 'harmony'

    segment_list = []
    num_measures = 0

    for segment in segments_tag:
        num_measure = float(segment.find('numMeasures').text)

        # melody
        note_tags = segment.findall('.//notes/note')
        note_list = [xmltodict.parse(etree.tostring(n))['note'] for n in note_tags] if note_tags else []

        # chord
        chord_tags = segment.findall('.//' + chord_tag + '/chord')
        chord_list = [xmltodict.parse(etree.tostring(c))['chord'] for c in chord_tags] if chord_tags else []

        segment_list.append({'notes': note_list, 'chords': chord_list, 'num_measure': num_measure})
        num_measures += num_measure

    return segment_list, num_measures


def event_localization(measure_offset, start_beat_abs, duration):
    event_on = measure_offset + start_beat_abs
    event_off = measure_offset + start_beat_abs + duration
    return event_on, event_off

def segments_parser(segments, mode, beats_in_measure):
    measure_counter = 0

    chord_track = []
    melody_track = []

    for sidx, segment in enumerate(segments):
        measure_offset = measure_counter * float(beats_in_measure)

        for chord in segment['chords']:
            chord_track.append(proc_object(chord, measure_offset, type_='chord'))
        for note in segment['notes']:
            melody_track.append(proc_object(note, measure_offset, type_='note'))

        measure_counter += int(segment['num_measure'])

    return melody_track, chord_track

def traverse_dir(root_dir, extension='.xml'):
    file_list = []
    for root, dirs, files in os.walk(root_dir):
        for file in files:
            if file.endswith(extension):
                file_list.append(os.path.join(root, file))

    return file_list


def get_postfix_dirpath(filename, idx=-4):
    path = os.path.normpath(filename)
    dir_list = path.split(os.sep)
    new_path = ''

    for d in dir_list[idx:]:
        new_path = os.path.join(new_path, d)

    return new_path


def proc_dir(file_list, root):
    num_file = len(file_list)
    for fidx in range(num_file):
        print('(%d/%d)' % (fidx, num_file))
        file_path = file_list[fidx]
        save_path = root
        name = os.path.basename(file_path)
        proc_xml(file_path, save_path=save_path, name=name, save_type='json')


In [3]:
# mode = int(metadata['mode']) if metadata['mode'] is not None else 1
# beats_in_measure = int(metadata['beats_in_measure'])

# melody, chord = segments_parser(segments, mode, beats_in_measure)

In [4]:
from pathlib import Path

In [5]:
version = 'v7'
data_path = Path('data/midi')
version_path = data_path/version
orig_path = version_path/'midi_sources'

In [6]:
from fastai.data_block import get_files

In [7]:
h_path = orig_path/'hooktheory'

In [8]:
files = get_files(h_path, extensions=['.xml'], recurse=True); files[:10]

[PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/wayne-sharpe/yu-gi-oh-theme-song/chorus.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/wayne-sharpe/yu-gi-oh-theme-song/intro.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/what-a-day/kiefer/chorus.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/whiteflame/senbonzakura/pre-chorus.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/whiteflame/senbonzakura/verse.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/whiteflame/senbonzakura/chorus.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/wham/last-christmas/verse.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/wham/last-christmas/chorus.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/wham/last-christmas/intro.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/wham/freedom/chorus.xml')]

In [9]:
# Loading from specific file
# keywords = ['get-lucky', 'daft-punk', 'pre-chorus']
# keywords = ['skrillex', 'scary']
keywords = ['idina', 'intro', 'let']
def contains_keywords(f): return all([k in str(f) for k in keywords])
search = [f for f in files if contains_keywords(f)]; search

[PosixPath('data/midi/v7/midi_sources/hooktheory/xml/i/idina-menzel/let-it-go/intro.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/i/idina-menzel/let-it-go/intro-and-verse.xml')]

In [10]:
import pprint

file_path = search[0]
# raw_info = proc_xml(file_path)
# pprint.pprint(raw_info)



In [11]:
content = load_data(file_path)
root = xml_parser(content)
metadata, version = get_metadata(root)
segments, num_measures = get_lead_sheet(root, version)

In [12]:
import music21

### Create config file

In [13]:
config = dict(sustain=True, sep_octave=True, note_octave=4, chord_octave=2, ts='4/4', ks=0, bpm=120)

### Constants

### Classes

In [14]:
MODE_TO_KEYOFFSET = {
    '1': 0,
    '2': 2,
    '3': 4,
    '4': 5,
    '5': 7,
    '6': 9,
    '7': 11
#     '5': -5,
#     '6': -3,
#     '7': -1
}

In [15]:
PITCH_TO_SD = {
    0: '1',
    1: '1#',
    2: '2',
    3: '2#',
    4: '3',
    5: '4',
    6: '4#',
    7: '5',
    8: '5#',
    9: '6',
    10:'6#',
    11:'7',
}

SD_TO_PITCH = {v:k for k,v in PITCH_TO_SD.items()}

In [16]:
from dataclasses import dataclass
import dataclasses
from typing import Dict, Any, AnyStr, List, Sequence, TypeVar, Tuple, Optional, Union

In [17]:
def parse(cls, d):
    cls_keys = cls.__dataclass_fields__.keys()
    kwargs = {key:d[key] for key in cls_keys}
    return cls(**kwargs)

@dataclass
class Base:
    @classmethod
    def from_dict(cls, d):
        cls_keys = cls.__dataclass_fields__.keys()
        kwargs = {key:d[key] for key in cls_keys}
        return cls(**kwargs)
    
    @classmethod
    def parse(cls, d):
        return cls.from_dict(d)

In [18]:
@dataclass
class HMetadata(Base):
    title:str
    BPM:str='120'
    beats_in_measure:str='4'
    key:str='C'
    mode:str='1'

In [19]:
@dataclass
class HNote(Base):
    start_beat_abs: str
    start_measure: str
    start_beat: str
    note_length: str
    scale_degree: str
    octave: str
        
    def to_m21(self)->music21.note.Note:
#         if self.scale_degree == 'rest': return None, None
#             n = music21.note.Rest(quarterLength=note_length)
        pitch = self.pitch() + 12*(int(self.octave)+config.note_octave)
        n = music21.note.Note(pitch, quarterLength=note_length)
        return n, self.start_beat_abs
    
    def pitch(self):
        return SD_TO_PITCH[self.scale_degree]
    
    @classmethod
    def parse(cls, d, mode, key_offset):
        if key_offset > 5: key_offset = key_offset-12
        parsed = roman_to_symbol.hnote_parser(d, mode, key_offset)
        pitch = parsed['pitch']
        scale_degree = PITCH_TO_SD[int((pitch+12) % 12)]
        octave = (pitch // 12)
        m = {
            'scale_degree': scale_degree,
            'octave': octave
        }
        return cls.from_dict({**d, **m})

In [20]:
@dataclass
class HChord(Base):
    # ht relative
    sd:str
    fb:str
    sus:str
    
    # ht tempo
    chord_duration:str
    start_measure:str
    start_beat:str
    start_beat_abs:str
        
    # abs
    symbol:str
    quality:str
    composition:List[int]
        
    def to_m21(self)->music21.chord.Chord:
        c = music21.chord.Chord(self.composition, quarterLength=self.note_length)
        c.octave = config.chord_octave
        return c, self.start_beat_abs
    
    @classmethod
    def parse(cls, d, mode, key_offset):
        parsed = roman_to_symbol.hchord_parser(d, mode, key_offset)
        parsed['composition'] = parsed['composition'].astype(int).tolist()
        return cls.from_dict({**d, **parsed})

In [21]:
def default_stream(cls=music21.stream.Stream, ts='4/4', bpm=120, ks=0)
    # (AS) TODO: use config ts or metadata
    s = cls()
    s.append(music21.instrument.Piano())
    s.append(music21.meter.TimeSignature(ts))
    s.append(music21.tempo.MetronomeMark(number=bpm))
    s.append(music21.key.KeySignature(ks))
    return s

SyntaxError: invalid syntax (<ipython-input-21-f289284bb8df>, line 1)

In [22]:
@dataclass
class HPart(Base):
    notes: List[HNote]
    chords: List[HChord]
        
    @classmethod
    def parse(cls, d, metadata):
        mode = metadata['mode']
        key_offset = MODE_TO_KEYOFFSET.get(mode, 0)
        ns = [HNote.parse(n, mode, key_offset) for n in d.get('notes', []) if n['scale_degree'] != 'rest']
        cs = [HChord.parse(c, mode, key_offset) for c in d.get('chords', []) if c['sd'] != 'rest']
        return cls(notes=ns, chords=cs)
    
    def to_m21(self)->music21.stream.Stream:
#         s = music21.stream.Stream
        pc = music21.stream.Part()
        pn = music21.stream.Part()
        
        cm21 = [c.to_m21() for c in self.chords]
        for c,d in cm21: pc.insert(d,c)
            
        nm21 = [n.to_m21() for n in self.notes]
        for n,d in nm21: pn.insert(d,n)
            
        return nm21, cm21
        
    def min_pitch(self):
        return min([n.pitch for n in self.notes])

In [23]:
@dataclass
class HSong(Base):
    metadata: HMetadata
    parts: List[HPart]
    
    @classmethod
    def parse(cls, metadata, segments):
        m = HMetadata.parse(metadata)
        ps = [HPart.parse(s, metadata) for s in segments]
        return cls(metadata=m, parts=ps)
    
    def to_stream(self):
        # music21 stream
        return None

In [24]:
# s = HSong.parse(metadata, segments)

In [25]:
# translator
class Translator():
    @classmethod
    def to_numpy(cls, s:HSong):
        pass
    @classmethod
    def to_song(cls, arr):
        pass

In [26]:
from src import roman_to_symbol
from src import to_pianoroll
from collections import defaultdict
from midi_data import keyc_offset

In [27]:
# parsed = [roman_to_symbol.hchord_parser(c, metadata['mode'], 7) for c in segments[0]['chords']]

Skrillex  
G minor -> 9 offset -> C major

minor = 5, G->C=

Daft Punk  
B dorian -> 2 offset -> C major

Let it go  
F minor -> 9 offset -> C major

Game of thrones  
C minor -> 9 offset -> C major

In [51]:
# Loading from specific file
# keywords = ['get-lucky', 'daft-punk', 'pre-chorus']
# keywords = ['skrillex', 'scary']
keywords = ['idina', 'intro', 'let']
# keywords = ['game-of-thrones', 'intro', 'ramin']
# keywords = ['kiss-from-a-rose', 'seal']
def contains_keywords(f): return all([k in str(f) for k in keywords])
search = [f for f in files if contains_keywords(f)]; search

[PosixPath('data/midi/v7/midi_sources/hooktheory/xml/i/idina-menzel/let-it-go/intro.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/i/idina-menzel/let-it-go/intro-and-verse.xml')]

In [52]:
file_path = search[0]
content = load_data(file_path)
root = xml_parser(content)
metadata, version = get_metadata(root)
segments, num_measures = get_lead_sheet(root, version)

In [53]:
key_offset = MODE_TO_KEYOFFSET.get(metadata['mode'], 0)
# key_offset = roman_to_symbol.get_key_offset(metadata['key']); key_offset # this is offset to key not to c

In [54]:
parsed = [roman_to_symbol.hchord_parser(c, metadata['mode'], key_offset) for c in segments[0]['chords']]

In [55]:
s = HSong.parse(metadata, segments)

In [57]:
s.parts[0].notes

[HNote(start_beat_abs='0', start_measure='1', start_beat='1', note_length='0.5', scale_degree='7', octave=0.0),
 HNote(start_beat_abs='0.5', start_measure='1', start_beat='1.5', note_length='0.5', scale_degree='1', octave=1.0),
 HNote(start_beat_abs='1', start_measure='1', start_beat='2', note_length='0.5', scale_degree='3', octave=0.0),
 HNote(start_beat_abs='1.5', start_measure='1', start_beat='2.5', note_length='1', scale_degree='7', octave=0.0),
 HNote(start_beat_abs='2.5', start_measure='1', start_beat='3.5', note_length='0.5', scale_degree='1', octave=1.0),
 HNote(start_beat_abs='4', start_measure='2', start_beat='1', note_length='0.5', scale_degree='7', octave=0.0),
 HNote(start_beat_abs='4.5', start_measure='2', start_beat='1.5', note_length='0.5', scale_degree='1', octave=1.0),
 HNote(start_beat_abs='5', start_measure='2', start_beat='2', note_length='0.5', scale_degree='3', octave=0.0),
 HNote(start_beat_abs='5.5', start_measure='2', start_beat='2.5', note_length='1', scale_d

In [43]:
%debug

> [0;32m<ipython-input-20-94ac2517c786>[0m(27)[0;36mparse[0;34m()[0m
[0;32m     24 [0;31m    [0;34m@[0m[0mclassmethod[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     25 [0;31m    [0;32mdef[0m [0mparse[0m[0;34m([0m[0mcls[0m[0;34m,[0m [0md[0m[0;34m,[0m [0mmode[0m[0;34m,[0m [0mkey_offset[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     26 [0;31m        [0mparsed[0m [0;34m=[0m [0;34m[[0m[0mroman_to_symbol[0m[0;34m.[0m[0mhchord_parser[0m[0;34m([0m[0md[0m[0;34m,[0m [0mmode[0m[0;34m,[0m [0mkey_offset[0m[0;34m)[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 27 [0;31m        [0mparsed[0m[0;34m[[0m[0;34m'composition'[0m[0;34m][0m [0;34m=[0m [0mparsed[0m[0;34m[[0m[0;34m'composition'[0m[0;34m][0m[0;34m.[0m[0mastype[0m[0;34m([0m[0mint[0m[0;34m)[0m[0;34m.[0m[0mtolist[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     28 [0;31m        [0;32mreturn[0m [0mcls[0m

In [62]:
%debug

> [0;32m<ipython-input-49-0d71b7202dfa>[0m(24)[0;36mparse[0;34m()[0m
[0;32m     22 [0;31m        [0;32mif[0m [0mkey_offset[0m [0;34m>[0m [0;36m5[0m[0;34m:[0m [0mkey_offset[0m [0;34m=[0m [0mkey_offset[0m[0;34m-[0m[0;36m12[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     23 [0;31m        [0mparsed[0m [0;34m=[0m [0mroman_to_symbol[0m[0;34m.[0m[0mhnote_parser[0m[0;34m([0m[0md[0m[0;34m,[0m [0mmode[0m[0;34m,[0m [0mkey_offset[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 24 [0;31m        [0mpitch[0m [0;34m=[0m [0mparsed[0m[0;34m[[0m[0;34m'pitch'[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     25 [0;31m        [0mscale_degree[0m [0;34m=[0m [0mPITCH_TO_SD[0m[0;34m[[0m[0mint[0m[0;34m([0m[0;34m([0m[0mpitch[0m[0;34m+[0m[0;36m12[0m[0;34m)[0m [0;34m%[0m [0;36m12[0m[0;34m)[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     26 [0;31m        [0moctave[0m [0;34m=[0m [0;34m([0m[0mpitch

In [45]:
[(p['symbol'], p['chord_type']) for p in parsed]

[('am', 5),
 ('Fmaj7', 7),
 ('G sus2', 5),
 ('dm sus4', 5),
 ('dm', 5),
 ('am', 5),
 ('Fmaj7', 7),
 ('G sus2', 5),
 ('D sus4', 5),
 ('D', 5)]

In [46]:
segments[0]['chords']

[OrderedDict([('sd', '1'),
              ('fb', None),
              ('sec', None),
              ('sus', None),
              ('pedal', None),
              ('alternate', None),
              ('borrowed', None),
              ('chord_duration', '4'),
              ('start_measure', '1'),
              ('start_beat', '1'),
              ('start_beat_abs', '0'),
              ('isRest', '0')]),
 OrderedDict([('sd', '6'),
              ('fb', '7'),
              ('sec', None),
              ('emb', None),
              ('sus', None),
              ('pedal', None),
              ('alternate', None),
              ('borrowed', None),
              ('chord_duration', '4'),
              ('start_measure', '2'),
              ('start_beat', '1'),
              ('start_beat_abs', '4'),
              ('isRest', '0')]),
 OrderedDict([('sd', '7'),
              ('fb', None),
              ('sec', None),
              ('emb', None),
              ('sus', 'sus2'),
              ('pedal', None),
   

In [47]:
parsed # no offset

[OrderedDict([('root', 9),
              ('bass', 9),
              ('comp_vec',
               array([9, None, 12, None, 16, None, None, None, None], dtype=object)),
              ('composition', array([9, 12, 16], dtype=object)),
              ('quality', 'm'),
              ('chord_type', 5),
              ('chord_mode', -3),
              ('inv', 0),
              ('sus', None),
              ('alter', None),
              ('emb', []),
              ('alter_map', None),
              ('symbol', 'am')]),
 OrderedDict([('root', 5),
              ('bass', 5),
              ('comp_vec',
               array([5, None, 9, None, 12, 16, None, None, None], dtype=object)),
              ('composition', array([5, 9, 12, 16], dtype=object)),
              ('quality', 'maj'),
              ('chord_type', 7),
              ('chord_mode', -3),
              ('inv', 0),
              ('sus', None),
              ('alter', None),
              ('emb', []),
              ('alter_map', None),
      

In [59]:
parsed

[OrderedDict([('root', 9),
              ('bass', 21),
              ('comp_vec',
               array([21, None, 24, None, 28, None, None, None, None], dtype=object)),
              ('composition', array([21, 24, 28], dtype=object)),
              ('quality', 'm'),
              ('chord_type', 5),
              ('chord_mode', -3),
              ('inv', 0),
              ('sus', None),
              ('alter', None),
              ('emb', []),
              ('alter_map', None),
              ('symbol', 'am')]),
 OrderedDict([('root', 5),
              ('bass', 29),
              ('comp_vec',
               array([29, None, 33, None, 36, 40, None, None, None], dtype=object)),
              ('composition', array([29, 33, 36, 40], dtype=object)),
              ('quality', 'maj'),
              ('chord_type', 7),
              ('chord_mode', -3),
              ('inv', 0),
              ('sus', None),
              ('alter', None),
              ('emb', []),
              ('alter_map', None)

In [54]:
parsed

[OrderedDict([('root', 9),
              ('bass', 9),
              ('comp_vec',
               array([9, None, 12, None, 16, None, None, None, None], dtype=object)),
              ('composition', array([9, 12, 16], dtype=object)),
              ('quality', 'm'),
              ('chord_type', 5),
              ('chord_mode', -3),
              ('inv', 0),
              ('sus', None),
              ('alter', None),
              ('emb', []),
              ('alter_map', None),
              ('symbol', 'am')]),
 OrderedDict([('root', 5),
              ('bass', 17),
              ('comp_vec',
               array([17, None, 21, None, 24, 28, None, None, None], dtype=object)),
              ('composition', array([17, 21, 24, 28], dtype=object)),
              ('quality', 'maj'),
              ('chord_type', 7),
              ('chord_mode', -3),
              ('inv', 0),
              ('sus', None),
              ('alter', None),
              ('emb', []),
              ('alter_map', None),
 

In [45]:
segments[0]['notes']

[OrderedDict([('start_beat_abs', '0'),
              ('start_measure', '1'),
              ('start_beat', '1'),
              ('note_length', '0.5'),
              ('scale_degree', '2'),
              ('octave', '1'),
              ('isRest', '0')]),
 OrderedDict([('start_beat_abs', '0.5'),
              ('start_measure', '1'),
              ('start_beat', '1.5'),
              ('note_length', '0.5'),
              ('scale_degree', '3'),
              ('octave', '1'),
              ('isRest', '0')]),
 OrderedDict([('start_beat_abs', '1'),
              ('start_measure', '1'),
              ('start_beat', '2'),
              ('note_length', '0.5'),
              ('scale_degree', '5'),
              ('octave', '0'),
              ('isRest', '0')]),
 OrderedDict([('start_beat_abs', '1.5'),
              ('start_measure', '1'),
              ('start_beat', '2.5'),
              ('note_length', '1'),
              ('scale_degree', '2'),
              ('octave', '1'),
              ('isRest'

In [43]:
parsed_p = [roman_to_symbol.hnote_parser(c, metadata['mode'], key_offset) for c in segments[0]['notes']]

In [44]:
[p['pitch'] for p in parsed_p if p is not None]

[23.0,
 24.0,
 16.0,
 23.0,
 24.0,
 23.0,
 24.0,
 16.0,
 24.0,
 23.0,
 14.0,
 21.0,
 23.0,
 14.0,
 21.0,
 23.0,
 14.0,
 19.0,
 17.0,
 23.0,
 24.0,
 16.0,
 23.0,
 24.0,
 24.0,
 23.0,
 16.0,
 24.0,
 23.0,
 14.0,
 21.0,
 23.0,
 14.0,
 21.0,
 23.0,
 14.0,
 19.0,
 18.0]

In [46]:
parsed_c['comp_vec']

NameError: name 'parsed_c' is not defined

In [47]:
parsed_c['composition']

NameError: name 'parsed_c' is not defined

In [48]:
music21.chord.Chord([0,4,7,11])

<music21.chord.Chord C E G B>

In [49]:
out = parsed_c['composition'].astype(int).tolist()

music21.chord.Chord(out)

NameError: name 'parsed_c' is not defined

In [51]:
comp = to_pianoroll.to_chromagram(parsed_c['composition'])

NameError: name 'parsed_c' is not defined

In [90]:
comp

array([2, 5, 9, 0], dtype=object)

In [None]:
music21.chord.composition