In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import xmltodict
import os
import json
import pickle
from lxml import etree

encoding = "utf-8"


def load_data(file_path):
    with open(file_path, "r", encoding=encoding) as f:
        content = f.read()
    content = content.replace('%20', '')
    return content


def xml_parser(content):
    parser = etree.XMLParser(recover=True, encoding=encoding)
    root = etree.fromstring(content, parser=parser)
    return root


def get_metadata(root):
    meta_list = ['title', 'beats_in_measure', 'BPM', 'key', 'YouTubeID', 'mode']

    metadata = dict()
    for e in meta_list:
        tag = root.find('.//' + e)
        metadata[e] = tag.text if tag is not None else None

    # duration
    tag = root.find('.//duration')
    tag = root.find('.//section_duration') if tag is None else tag
    metadata['duration'] = tag.text if tag is not None else None

    version = root.find('version')
    version = version.text if version is not None else None
    return metadata, version


def get_lead_sheet(root, version):
    segments_tag = root.findall('.//segment')

    # set chord tag according to version
    chord_tag = 'chords' if root.tag == 'super' else 'harmony'

    segment_list = []
    num_measures = 0

    for segment in segments_tag:
        num_measure = float(segment.find('numMeasures').text)

        # melody
        note_tags = segment.findall('.//notes/note')
        note_list = [xmltodict.parse(etree.tostring(n))['note'] for n in note_tags] if note_tags else []

        # chord
        chord_tags = segment.findall('.//' + chord_tag + '/chord')
        chord_list = [xmltodict.parse(etree.tostring(c))['chord'] for c in chord_tags] if chord_tags else []

        segment_list.append({'notes': note_list, 'chords': chord_list, 'num_measure': num_measure})
        num_measures += num_measure

    return segment_list, num_measures


def event_localization(measure_offset, start_beat_abs, duration):
    event_on = measure_offset + start_beat_abs
    event_off = measure_offset + start_beat_abs + duration
    return event_on, event_off

def segments_parser(segments, mode, beats_in_measure):
    measure_counter = 0

    chord_track = []
    melody_track = []

    for sidx, segment in enumerate(segments):
        measure_offset = measure_counter * float(beats_in_measure)

        for chord in segment['chords']:
            chord_track.append(proc_object(chord, measure_offset, type_='chord'))
        for note in segment['notes']:
            melody_track.append(proc_object(note, measure_offset, type_='note'))

        measure_counter += int(segment['num_measure'])

    return melody_track, chord_track


def proc_xml(file_path, save_path=None, name='tab', save_type='pickle'):
    content = load_data(file_path)
    root = xml_parser(content)
    metadata, version = get_metadata(root)
    segments, num_measures = get_lead_sheet(root, version)

    mode = int(metadata['mode']) if metadata['mode'] is not None else 1
    beats_in_measure = int(metadata['beats_in_measure'])

    melody, chord = segments_parser(segments, mode, beats_in_measure)

    data = {
        'version': version,
        'metadata': metadata,
        'tracks': {
            'melody': melody,
            'chord': chord,
        },
        'num_measures': num_measures,
    }

    if save_path:
        if not os.path.exists(save_path):
            os.makedirs(save_path)

        if save_type is 'pickle':
            file_name = os.path.join(save_path, name+'.pickle')
            with open(file_name, 'wb') as handle:
                pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)
        elif save_type is 'json':
            file_name = os.path.join(save_path, name+'.json')
            with open(file_name, 'w') as handle:
                json.dump(data, handle)
        else:
            raise ValueError('Unkown type for saving')

    return data


def traverse_dir(root_dir, extension='.xml'):
    file_list = []
    for root, dirs, files in os.walk(root_dir):
        for file in files:
            if file.endswith(extension):
                file_list.append(os.path.join(root, file))

    return file_list


def get_postfix_dirpath(filename, idx=-4):
    path = os.path.normpath(filename)
    dir_list = path.split(os.sep)
    new_path = ''

    for d in dir_list[idx:]:
        new_path = os.path.join(new_path, d)

    return new_path


def proc_dir(file_list, root):
    num_file = len(file_list)
    for fidx in range(num_file):
        print('(%d/%d)' % (fidx, num_file))
        file_path = file_list[fidx]
        save_path = root
        name = os.path.basename(file_path)
        proc_xml(file_path, save_path=save_path, name=name, save_type='json')


In [3]:
# mode = int(metadata['mode']) if metadata['mode'] is not None else 1
# beats_in_measure = int(metadata['beats_in_measure'])

# melody, chord = segments_parser(segments, mode, beats_in_measure)

In [4]:
from pathlib import Path

In [5]:
version = 'v7'
data_path = Path('data/midi')
version_path = data_path/version
orig_path = version_path/'midi_sources'

In [6]:
from fastai.data_block import get_files

In [7]:
h_path = orig_path/'hooktheory'

In [8]:
files = get_files(h_path, extensions=['.xml'], recurse=True); files[:10]

[PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/wayne-sharpe/yu-gi-oh-theme-song/chorus.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/wayne-sharpe/yu-gi-oh-theme-song/intro.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/what-a-day/kiefer/chorus.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/whiteflame/senbonzakura/pre-chorus.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/whiteflame/senbonzakura/verse.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/whiteflame/senbonzakura/chorus.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/wham/last-christmas/verse.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/wham/last-christmas/chorus.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/wham/last-christmas/intro.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/w/wham/freedom/chorus.xml')]

In [139]:
# Loading from specific file
# keywords = ['get-lucky', 'daft-punk', 'pre-chorus']
# keywords = ['skrillex', 'scary']
keywords = ['idina', 'intro', 'let']
def contains_keywords(f): return all([k in str(f) for k in keywords])
search = [f for f in files if contains_keywords(f)]; search

[PosixPath('data/midi/v7/midi_sources/hooktheory/xml/i/idina-menzel/let-it-go/intro.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/i/idina-menzel/let-it-go/intro-and-verse.xml')]

In [140]:
import pprint

file_path = search[0]
# raw_info = proc_xml(file_path)
# pprint.pprint(raw_info)



In [141]:
content = load_data(file_path)
root = xml_parser(content)
metadata, version = get_metadata(root)
segments, num_measures = get_lead_sheet(root, version)

In [142]:
import music21

### Create config file

In [143]:
config = dict(sustain=True, sep_octave=True, default_octave=4)

### Constants

### Classes

In [17]:
from dataclasses import dataclass
import dataclasses
from typing import Dict, Any, AnyStr, List, Sequence, TypeVar, Tuple, Optional, Union

In [18]:
@dataclass
class Base:
    @classmethod
    def parse(cls, d):
        cls_keys = cls.__dataclass_fields__.keys()
        kwargs = {key:d[key] for key in cls_keys}
        return cls(**kwargs)

In [19]:
@dataclass
class HMetadata(Base):
    title:str
    BPM:str='120'
    beats_in_measure:str='4'
    key:str='C'
    mode:str='1'

In [20]:
@dataclass
class HNote(Base):
    start_beat_abs: str
    start_measure: str
    start_beat: str
    note_length: str
    scale_degree: str
    octave: str
        
    def to_m21(self)->music21.note.Note:
        if self.scale_degree == 'rest':
            n = music21.note.Rest(quarterLength=note_length)
        else:
            n = music21.note.Note(quarterLength=note_length)
            n.octave = int(self.octave)+config.default_octave
        n.offset = self.start_beat_abs
        return n

In [21]:
@dataclass
class HChord(Base):
    composition:List[int]
    chord_duration:str
    start_measure:str
    start_beat:str
    start_beat_abs:str
        
    def to_m21(self)->music21.chord.Chord:
        if self.scale_degree == 'rest':
            n = music21.note.Rest(quarterLength=note_length)
        else:
            n = music21.note.Note(quarterLength=note_length)
            n.octave = int(self.octave)+config.default_octave
        n.offset = self.start_beat_abs
        return n
    
    
    @classmethod
    def parse(cls, d, mode, key_offset):
        ns = [HNote.parse(n) for n in d.get('notes', [])]
        cs = [HChord.parse(c) for c in d.get('chords', [])]
        return cls(notes=ns, chords=cs)

In [22]:
@dataclass
class HPart(Base):
    notes: List[HNote]
    chords: List[HChord]
        
    @classmethod
    def parse(cls, d, metadata):
        metadata['key']
        ns = [HNote.parse(n) for n in d.get('notes', [])]
        cs = [HChord.parse(c) for c in d.get('chords', [])]
        return cls(notes=ns, chords=cs)
        

In [23]:
@dataclass
class HSong(Base):
    metadata: HMetadata
    parts: List[HPart]
    
    @classmethod
    def parse(cls, metadata, segments):
        m = HMetadata.parse(metadata)
        ps = [HPart.parse(s, metadata) for s in segments]
        return cls(metadata=m, parts=ps)
    
    def to_stream(self):
        # music21 stream
        return None

In [58]:
# s = HSong.parse(metadata, segments)

In [25]:
# translator
class Translator():
    @classmethod
    def to_numpy(cls, s:HSong):
        pass
    @classmethod
    def to_song(cls, arr):
        pass

In [68]:
from src import roman_to_symbol
from src import to_pianoroll
from collections import defaultdict
from midi_data import keyc_offset

In [144]:
key_offset = keyc_offset(metadata['key'], metadata['mode'])-1; key_offset

3

In [145]:
metadata

{'title': 'Let It Go',
 'beats_in_measure': '4',
 'BPM': '137',
 'key': 'F',
 'YouTubeID': 'moSFlvxnbgk',
 'mode': '6',
 'duration': '16.81'}

In [146]:
# parsed = [roman_to_symbol.hchord_parser(c, metadata['mode'], 7) for c in segments[0]['chords']]

Skrillex  
G minor -> 9 offset -> C major

minor = 5, G->C=

Daft Punk  
B dorian -> 2 offset -> C major

Let it go  
F minor -> 9 offset -> C major

Game of thrones  
C minor -> 9 offset -> C major

In [298]:
MODE_TO_KEYOFFSET = {
    '1': 0,
    '2': 2,
    '3': 4,
    '4': 5,
    '5': 7,
    '6': 9,
    '7': 11
}

In [309]:
# Loading from specific file
# keywords = ['get-lucky', 'daft-punk', 'pre-chorus']
# keywords = ['skrillex', 'scary']
keywords = ['idina', 'intro', 'let']
# keywords = ['game-of-thrones', 'intro', 'ramin']
# keywords = ['kiss-from-a-rose', 'seal']
def contains_keywords(f): return all([k in str(f) for k in keywords])
search = [f for f in files if contains_keywords(f)]; search

[PosixPath('data/midi/v7/midi_sources/hooktheory/xml/i/idina-menzel/let-it-go/intro.xml'),
 PosixPath('data/midi/v7/midi_sources/hooktheory/xml/i/idina-menzel/let-it-go/intro-and-verse.xml')]

In [310]:
file_path = search[0]
content = load_data(file_path)
root = xml_parser(content)
metadata, version = get_metadata(root)
segments, num_measures = get_lead_sheet(root, version)

In [314]:
key_offset = MODE_TO_KEYOFFSET.get(metadata['mode'], 0)
# key_offset = roman_to_symbol.get_key_offset(metadata['key']); key_offset # this is offset to key not to c

In [315]:
parsed = [roman_to_symbol.hchord_parser(c, metadata['mode'], key_offset) for c in segments[0]['chords']]

In [316]:
[(p['symbol'], p['chord_type']) for p in parsed]

[('am', 5),
 ('Fmaj7', 7),
 ('G sus2', 5),
 ('dm sus4', 5),
 ('dm', 5),
 ('am', 5),
 ('Fmaj7', 7),
 ('G sus2', 5),
 ('D sus4', 5),
 ('D', 5)]

In [317]:
parsed_p = [roman_to_symbol.hnote_parser(c, metadata['mode'], key_offset) for c in segments[0]['notes']]

In [325]:
[p['pitch'] for p in parsed_p if p is not None]

[23.0,
 24.0,
 16.0,
 23.0,
 24.0,
 23.0,
 24.0,
 16.0,
 24.0,
 23.0,
 14.0,
 21.0,
 23.0,
 14.0,
 21.0,
 23.0,
 14.0,
 19.0,
 17.0,
 23.0,
 24.0,
 16.0,
 23.0,
 24.0,
 24.0,
 23.0,
 16.0,
 24.0,
 23.0,
 14.0,
 21.0,
 23.0,
 14.0,
 21.0,
 23.0,
 14.0,
 19.0,
 18.0]

In [324]:
parsed_c['comp_vec']

NameError: name 'parsed_c' is not defined

In [97]:
parsed_c['composition']

array([2, 5, 9, 12], dtype=object)

In [95]:
music21.chord.Chord([0,4,7,11])

<music21.chord.Chord C E G B>

In [124]:
out = parsed_c['composition'].astype(int).tolist()

In [125]:
music21.chord.Chord(out)

<music21.chord.Chord D F A C0>

In [89]:
comp = to_pianoroll.to_chromagram(parsed_c['composition'])

In [90]:
comp

array([2, 5, 9, 0], dtype=object)

In [None]:
music21.chord.composition