In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import sys
sys.path.insert(0, '../../')
from src.encode_data import *
from src.midi_data import *
from src.data_sources import process_all, arr2csv
from src.midi_transform import *
from src.fastai_data import *
from src.unilm import S2SFileProcessor, S2SPreloader

In [3]:
import traceback
import time

## Standardize and reformat raw midi files before encoding to text
- Transform key to C major
- Remove unused instruments
- Combine multiple tracks with the same instrument into a single part
- Melody, Piano, String

### Load midi data

In [4]:
version = 'v16'
data_path = Path('data/midi')
version_path = data_path/version

In [5]:
import pandas as pd

In [5]:
# out_dir = 'midi_encode'
# duet_only = False
sf_path = f'sf{SAMPLE_FREQ}'
out_dir = Path(f'{sf_path}/s2s_encode')
duet_only = True

In [6]:
source_dir = 'midi_sources'
source_csv = version_path/'metadata'/f'{source_dir}.csv'
out_csv = version_path/out_dir/f'{out_dir.name}.csv'
out_csv.parent.mkdir(parents=True, exist_ok=True)
source_csv, out_csv

(PosixPath('data/midi/v16/metadata/midi_sources.csv'),
 PosixPath('data/midi/v16/sf4/s2s_encode/s2s_encode.csv'))

In [7]:
# num_comps = 2 # note, duration
cutoff = 5 # max instruments
min_variation = 3 # minimum number of different midi notes played
# max_dur = 128

### Encoding midi to numpy

In [8]:
df = pd.read_csv(source_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,ht_time_signature,ht_offset,midi,section,parts,ht_bpm,title,midi_title,artist,song_url,genres,source,ht_key,md5,mxl,ht_mode
0,4.0,0.0,midi_sources/hooktheory/pianoroll/w/wayne-shar...,chorus,"intro,chorus",128.0,yu-gi-oh-theme-song,yu-gi-oh3,wayne-sharpe,https://www.hooktheory.com/theorytab/view/wayn...,,hooktheory,C,bf1f29e5ff84e3e93e37fb873bfb590e,,1.0
1,3.0,0.0,midi_sources/hooktheory/pianoroll/w/wayne-shar...,intro,"intro,chorus",85.0,yu-gi-oh-theme-song,yu-gi-oh,wayne-sharpe,https://www.hooktheory.com/theorytab/view/wayn...,,hooktheory,C,055f80ad67f64edb14a85ca8fbfe8c29,,1.0
2,4.0,-5.0,midi_sources/hooktheory/pianoroll/w/what-a-day...,chorus,chorus,96.0,kiefer,kiefer,what-a-day,https://www.hooktheory.com/theorytab/view/what...,Jazz,hooktheory,D,197f96f5d181f6ce1e2c5ab04ac1ff87,,6.0
3,4.0,-5.0,midi_sources/hooktheory/pianoroll/w/whiteflame...,pre-chorus,"verse,pre-chorus,chorus",152.0,senbonzakura,senbonzakura - pre-Pre-Chorus,whiteflame,https://www.hooktheory.com/theorytab/view/whit...,"J-Pop,Pop",hooktheory,D,9e7ce13a35f1314423a9a6d5a5287a4a,,6.0
4,4.0,-5.0,midi_sources/hooktheory/pianoroll/w/whiteflame...,verse,"verse,pre-chorus,chorus",152.0,senbonzakura,Senbonzakura,whiteflame,https://www.hooktheory.com/theorytab/view/whit...,"J-Pop,Pop",hooktheory,D,d5aaf79d0989222f1362f9f46c540a27,,6.0


In [9]:
all_records = df.to_dict(orient='records'); len(all_records)

197182

In [10]:
def part_enc(chordarr, part, input_path):
    partarr = chordarr[:,part:part+1,:]
    # Part 3. Chord array to numpy
    npenc = chordarr2npenc(partarr)
    
    if npenc[-1, -1] > MAX_NOTE_DUR: 
        print('Part is shorter than song. Trimming end:', input_path)
        npenc[-1, -1] = MAX_NOTE_DUR
    if not is_valid_npenc(npenc, min_notes=8, input_path=input_path): return None
    
    return npenc

In [11]:
def process_metadata(metadata):
    result = metadata.copy()
    
    # Part 1. Compress tracks/instruments
    if not isinstance(metadata.get('midi'), str): return None
    
    input_path = version_path/metadata['midi']
    extension = input_path.suffix.lower()
    if not input_path.exists(): 
        print('Input path does not exist:', input_path, metadata)
        return result
    
    # Get outfile and check if it exists
    out_file = Path(str(input_path).replace(f'/{source_dir}/', f'/{out_dir}/'))
    out_file = out_file.with_suffix('.npy')
    out_file.parent.mkdir(parents=True, exist_ok=True)
    if out_file.exists(): 
        result['numpy'] = str(out_file.relative_to(version_path))
        return result
    
    npenc = transform_midi(input_path)
    if npenc is None: return result
    np.save(out_file, npenc)
    result['numpy'] = str(out_file.relative_to(version_path))
    return result

In [12]:
def transform_midi(midi_file):
    input_path = midi_file
    
    try: 
        if num_piano_tracks(input_path) not in [1, 2]: return None
        input_file = compress_midi_file(input_path, min_variation=min_variation, cutoff=cutoff) # remove non note tracks and standardize instruments
        if not input_file: return None
    except Exception as e:
        if 'badly form' in str(e): return None # ignore badly formatted midi errors
        if 'out of range' in str(e): return None # ignore badly formatted midi errors
        print('Error parsing midi', input_path, e)
        return None
        
    # Part 2. Compress rests and long notes
    stream = file2stream(input_file) # 1.
    try:
        chordarr = stream2chordarr(stream) # 2. max_dur = quarter_len * sample_freq (4). 128 = 8 bars
    except Exception as e:
        print('Could not encode to chordarr:', input_path, e)
#         print(traceback.format_exc())
        return None
    
    chord_trim = trim_chordarr_rests(chordarr)
    chord_short = shorten_chordarr_rests(chord_trim)
    delta_trim = chord_trim.shape[0] - chord_short.shape[0]
#     if delta_trim > 300: 
#         print(f'Removed {delta_trim} rests from {input_path}. Skipping song')
#         return None
    chordarr = chord_short
    
    _,num_parts,_ = chordarr.shape
    if num_parts != 2: return None
    
    parts = [part_enc(chordarr, i, input_path) for i in range(num_parts)]
    for p in parts: 
        if p is None: return None
    
    return np.array(parts)

In [13]:
# transform_midi(piano_file)
midi_mxl_file = version_path/'midi_sources/from_mxl/musescore/data/49143.mid'
input_file = midi_mxl_file
stream = file2stream(input_file) # 1.
chordarr = stream2chordarr(stream)

In [14]:
chordarr.shape

(1021, 2, 128)

In [15]:
transform_midi(midi_mxl_file).shape

(2,)

In [16]:
def try_process_metadata(metadata):
    try:
        return process_metadata(metadata)
    except Exception:
#         print(traceback.format_exc())
        return None

In [17]:
# # sanity check
import random
for r in random.sample(all_records, 10):
    process_metadata(r)

In [18]:
def timeout_func(data, seconds):
    print("Timeout:", seconds, data.get('midi'))

In [19]:
processed = process_all(try_process_metadata, all_records, timeout=300, timeout_func=timeout_func)

Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/w/war/low-rider/intro-and-verse_key_original.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/w/willie-nelson/you-were-always-on-my-mind/verse_key_original.mid
Sequence too short: 7 data/midi/v16/midi_sources/hooktheory/pianoroll/w/willie-nelson/you-were-always-on-my-mind/verse_key_original.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/w/wowaka/rollin-girl/verse_key_original.mid
Sequence too short: 5 data/midi/v16/midi_sources/hooktheory/pianoroll/y/yasunori-mitsuda/black-omen/instrumental_key_original.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/y/yo-la-tengo/ohm/verse_key_original.mid
Sequence too short: 4 data/midi/v16/midi_sources/hooktheory/pianoroll/y/yes/heart-of-the-sunrise/intro_key_original.mid
Part is shorter than song. Trimming end: data/midi/v16/mid

Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/j/james-ingram/one-hundred-ways/intro-and-verse_key_original.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/j/jose-gonzales/step-out/intro_key_original.mid
Sequence too short: 4 data/midi/v16/midi_sources/hooktheory/pianoroll/j/jose-gonzales/step-out/intro_key_original.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/j/judee-sill/crayon-angels/verse_key_original.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/j/jelly-roll-morton/milenburg-joys/instrumental_key_original.mid
Sequence too short: 4 data/midi/v16/midi_sources/hooktheory/pianoroll/j/john-tesh/roundball-rock/intro_key_original.mid
Sequence too short: 4 data/midi/v16/midi_sources/hooktheory/pianoroll/j/jay-hardway---mike-hawkins/freedom/chorus_key_original.mid
Sequence too short: 4 data/midi/v16/midi_source

Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/z/zun/nightmare-diary/chorus_key_original.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/d/deftones/battle-axe/pre-chorus-and-chorus_key_original.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/d/drake/hold-on-were-going-home/verse_key_original.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/d/daft-punk/the-grid/instrumental_key_original.mid
Sequence too short: 4 data/midi/v16/midi_sources/hooktheory/pianoroll/d/daft-punk/the-grid/instrumental_key_original.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/d/dooley-wilson/as-time-goes-by/verse_key_original.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/d/dawid-podsiadlo/trojkaty-i-kwadraty/chorus_key_original.mid
Part is shorter

Sequence too short: 5 data/midi/v16/midi_sources/hooktheory/pianoroll/n/nirvana/all-apologies/intro_key_original.mid
npenc exceeds max 161 duration: 192 data/midi/v16/midi_sources/hooktheory/pianoroll/n/nintendo/kirby-air-ride---machine-passage/verse_key_original.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/n/nintendo/kirby-air-ride---machine-passage/verse_key_original.mid
npenc exceeds max 161 duration: 362 data/midi/v16/midi_sources/hooktheory/pianoroll/i/inxs/never-tear-us-apart/verse-and-pre-chorus_key_original.mid
Sequence too short: 5 data/midi/v16/midi_sources/hooktheory/pianoroll/i/ikke-huftgold/dicke-titten-kartoffelsalat/intro_key_original.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/i/iq/leap-of-faith/intro-and-verse_key_original.mid
Sequence too short: 4 data/midi/v16/midi_sources/hooktheory/pianoroll/h/homestuck/skies-of-skaia/chorus_key_original.mid
Sequence too short: 4 data/m

Sequence too short: 5 data/midi/v16/midi_sources/hooktheory/pianoroll/t/the-lorax/let-it-grow/intro_key_original.mid
Sequence too short: 6 data/midi/v16/midi_sources/hooktheory/pianoroll/k/koji-kondo/delfino-plaza/intro_key_original.mid
Sequence too short: 5 data/midi/v16/midi_sources/hooktheory/pianoroll/k/king-gizzard-and-the-lizard-wizard/im-in-your-mind/intro_key_original.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/k/king-gizzard-and-the-lizard-wizard/robot-stop/solo_key_original.mid
npenc exceeds max 161 duration: 177 data/midi/v16/midi_sources/hooktheory/pianoroll/k/kanye-west/ultralight-beam/chorus_key_original.mid
Sequence too short: 4 data/midi/v16/midi_sources/hooktheory/pianoroll/k/kshmr/kolkata/chorus_key_original.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/k/kyle-richards/terminal-velocity---ymir-theme/intro_key_original.mid
Sequence too short: 4 data/midi/v16/midi_sources/hoo

npenc exceeds max 161 duration: 832 data/midi/v16/midi_sources/freemidi/genre-pop/98 Degrees - The Hardest Thing.mid
npenc exceeds max 161 duration: 416 data/midi/v16/midi_sources/freemidi/genre-pop/R.E.M. - Orange Crush.mid
Error parsing midi data/midi/v16/midi_sources/freemidi/genre-pop/PSY - gangnam style KILLYURSLEF.mid ord() expected string of length 1, but int found
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/freemidi/genre-pop/Gin Blossoms - Follow You Down.mid
npenc exceeds max 161 duration: 1234 data/midi/v16/midi_sources/freemidi/genre-pop/Gin Blossoms - Follow You Down.mid
npenc exceeds max 161 duration: 193 data/midi/v16/midi_sources/freemidi/genre-pop/Jay-Z - Run This Town.mid
npenc exceeds max 161 duration: 325 data/midi/v16/midi_sources/freemidi/genre-pop/Geoff Moore - Said And Done.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/midiworld/named_midi/Berckmans_de_Oliveira_-_Berckmans_de_Oliveira_-_Tarde_da_noite.mid
Part is

npenc exceeds max 161 duration: 182 data/midi/v16/midi_sources/from_mxl/ecomp/2008/Duepree01.mid
npenc exceeds max 161 duration: 175 data/midi/v16/midi_sources/from_mxl/ecomp/2006/ChenC02.mid
npenc exceeds max 161 duration: 188 data/midi/v16/midi_sources/from_mxl/ecomp/2006/Cai03.mid
npenc exceeds max 161 duration: 195 data/midi/v16/midi_sources/from_mxl/ecomp/2006/Yarden06.mid
Timeout: 300 midi_sources/freemidi/genre-dance-eletric/Enigma - Variations.mid
npenc exceeds max 161 duration: 201 data/midi/v16/midi_sources/from_mxl/ecomp/2006/Shen06.mid
npenc exceeds max 161 duration: 296 data/midi/v16/midi_sources/from_mxl/ecomp/2006/Yarden01.mid
npenc exceeds max 161 duration: 327 data/midi/v16/midi_sources/from_mxl/ecomp/2006/Yarden01.mid
npenc exceeds max 161 duration: 298 data/midi/v16/midi_sources/from_mxl/ecomp/2006/Yarden10.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/from_mxl/ecomp/2006/Hao01.mid
npenc exceeds max 161 duration: 167 data/midi/v16/midi_sourc

npenc exceeds max 161 duration: 239 data/midi/v16/midi_sources/from_mxl/ecomp/2009/Sekino13.mid
npenc exceeds max 161 duration: 1298 data/midi/v16/midi_sources/from_mxl/ecomp/2015/LuoJ11.mid
npenc exceeds max 161 duration: 248 data/midi/v16/midi_sources/from_mxl/ecomp/2015/LiYZ08.mid
npenc exceeds max 161 duration: 211 data/midi/v16/midi_sources/from_mxl/ecomp/2009/Sekino04.mid
npenc exceeds max 161 duration: 180 data/midi/v16/midi_sources/from_mxl/ecomp/2015/LiYZ04.mid
npenc exceeds max 161 duration: 244 data/midi/v16/midi_sources/from_mxl/ecomp/2015/LiYZ04.mid
npenc exceeds max 161 duration: 192 data/midi/v16/midi_sources/from_mxl/ecomp/2015/LeeN05.mid
npenc exceeds max 161 duration: 168 data/midi/v16/midi_sources/from_mxl/ecomp/2015/JeonH05.mid
npenc exceeds max 161 duration: 166 data/midi/v16/midi_sources/from_mxl/ecomp/2015/LuM04.mid
npenc exceeds max 161 duration: 176 data/midi/v16/midi_sources/from_mxl/ecomp/2011/Lou04.mid
Part is shorter than song. Trimming end: data/midi/v16/m

npenc exceeds max 161 duration: 208 data/midi/v16/midi_sources/from_mxl/classic_piano/appass_3_format0.mid
npenc exceeds max 161 duration: 231 data/midi/v16/midi_sources/from_mxl/classic_piano/appass_1_format0.mid
npenc exceeds max 161 duration: 384 data/midi/v16/midi_sources/from_mxl/classical_archives/021/bl109_08.mid
npenc exceeds max 161 duration: 257 data/midi/v16/midi_sources/from_mxl/classical_archives/021/bl109_08.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/from_mxl/classic_piano/schub_d760_2_format0.mid
npenc exceeds max 161 duration: 176 data/midi/v16/midi_sources/from_mxl/classical_archives/021/kv_397.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/from_mxl/classical_archives/8/lempsb4.mid
npenc exceeds max 161 duration: 408 data/midi/v16/midi_sources/from_mxl/classical_archives/4/grostemp.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/from_mxl/classical_archives/4/faerie.mid
npenc exceeds max 161 d

npenc exceeds max 161 duration: 182 data/midi/v16/midi_sources/from_mxl/classical_archives/9/alk39n10.mid
npenc exceeds max 161 duration: 188 data/midi/v16/midi_sources/from_mxl/classical_archives/2/sacrevoc.mid
npenc exceeds max 161 duration: 196 data/midi/v16/midi_sources/from_mxl/classical_archives/2/chaccone.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/from_mxl/musescore/data/3553486.mid
npenc exceeds max 161 duration: 176 data/midi/v16/midi_sources/from_mxl/musescore/data/651366.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/from_mxl/musescore/data/1645036.mid
npenc exceeds max 161 duration: 252 data/midi/v16/midi_sources/from_mxl/musescore/data/5447831.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/from_mxl/musescore/data/3646181.mid
npenc exceeds max 161 duration: 258 data/midi/v16/midi_sources/from_mxl/musescore/data/2910611.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/from_m

npenc exceeds max 161 duration: 164 data/midi/v16/midi_sources/from_mxl/musescore/data/1627401.mid
npenc exceeds max 161 duration: 370 data/midi/v16/midi_sources/from_mxl/classical_archives/1/b92.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/from_mxl/musescore/data/1758346.mid
npenc exceeds max 161 duration: 196 data/midi/v16/midi_sources/from_mxl/musescore/data/54418.mid
npenc exceeds max 161 duration: 252 data/midi/v16/midi_sources/from_mxl/musescore/data/3273086.mid
npenc exceeds max 161 duration: 196 data/midi/v16/midi_sources/from_mxl/musescore/data/3273086.mid
npenc exceeds max 161 duration: 400 data/midi/v16/midi_sources/from_mxl/musescore/data/259941.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/from_mxl/musescore/data/259941.mid
npenc exceeds max 161 duration: 192 data/midi/v16/midi_sources/from_mxl/musescore/data/259941.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/from_mxl/musescore/data/1795106.mi

npenc exceeds max 161 duration: 174 data/midi/v16/midi_sources/from_mxl/musescore/data/160623.mid
npenc exceeds max 161 duration: 192 data/midi/v16/midi_sources/from_mxl/musescore/data/3808506.mid
npenc exceeds max 161 duration: 168 data/midi/v16/midi_sources/from_mxl/musescore/data/1503516.mid
npenc exceeds max 161 duration: 172 data/midi/v16/midi_sources/from_mxl/musescore/data/72176.mid
npenc exceeds max 161 duration: 312 data/midi/v16/midi_sources/from_mxl/musescore/data/4740426.mid
npenc exceeds max 161 duration: 272 data/midi/v16/midi_sources/from_mxl/musescore/data/439976.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/from_mxl/musescore/data/5195168.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/from_mxl/musescore/data/4087291.mid
npenc exceeds max 161 duration: 384 data/midi/v16/midi_sources/from_mxl/musescore/data/5051389.mid
npenc exceeds max 161 duration: 304 data/midi/v16/midi_sources/from_mxl/musescore/data/3984106.mid
Part

npenc exceeds max 161 duration: 192 data/midi/v16/midi_sources/from_mxl/musescore/data/4253946.mid
npenc exceeds max 161 duration: 224 data/midi/v16/midi_sources/from_mxl/musescore/data/105547.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/from_mxl/musescore/data/4820439.mid
npenc exceeds max 161 duration: 166 data/midi/v16/midi_sources/from_mxl/musescore/data/502456.mid
npenc exceeds max 161 duration: 420 data/midi/v16/midi_sources/from_mxl/musescore/data/2080561.mid
npenc exceeds max 161 duration: 164 data/midi/v16/midi_sources/from_mxl/musescore/data/1053251.mid
npenc exceeds max 161 duration: 228 data/midi/v16/midi_sources/from_mxl/musescore/data/1798726.mid
npenc exceeds max 161 duration: 190 data/midi/v16/midi_sources/from_mxl/musescore/data/123918.mid
npenc exceeds max 161 duration: 196 data/midi/v16/midi_sources/from_mxl/musescore/data/5428129.mid
npenc exceeds max 161 duration: 216 data/midi/v16/midi_sources/from_mxl/musescore/data/5485542.mid
npenc ex

Part is shorter than song. Trimming end: data/midi/v16/midi_sources/from_mxl/musescore/data/5229221.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/from_mxl/musescore/data/4354151.mid
npenc exceeds max 161 duration: 273 data/midi/v16/midi_sources/from_mxl/musescore/data/972271.mid
npenc exceeds max 161 duration: 192 data/midi/v16/midi_sources/from_mxl/musescore/data/4988830.mid
npenc exceeds max 161 duration: 168 data/midi/v16/midi_sources/from_mxl/musescore/data/5286490.mid
npenc exceeds max 161 duration: 204 data/midi/v16/midi_sources/from_mxl/musescore/data/403246.mid
npenc exceeds max 161 duration: 392 data/midi/v16/midi_sources/from_mxl/musescore/data/2546131.mid
npenc exceeds max 161 duration: 224 data/midi/v16/midi_sources/from_mxl/musescore/data/3437061.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/from_mxl/musescore/data/657236.mid
npenc exceeds max 161 duration: 464 data/midi/v16/midi_sources/from_mxl/musescore/data/657236.mid

Part is shorter than song. Trimming end: data/midi/v16/midi_sources/lmd_clean/Queen/Love of My Life.1.mid
npenc exceeds max 161 duration: 712 data/midi/v16/midi_sources/lmd_clean/Queen/Love of My Life.1.mid
npenc exceeds max 161 duration: 572 data/midi/v16/midi_sources/lmd_clean/Evans Bill/Blue in Green.mid
npenc exceeds max 161 duration: 416 data/midi/v16/midi_sources/lmd_clean/Queen/My Melancholy Blues.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/lmd_clean/Evans Bill/Israel.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/lmd_clean/Evans Bill/Israel.mid
Timeout: 300 midi_sources/from_mxl/musescore/data/2445146.mid
Error parsing midi data/midi/v16/midi_sources/lmd_clean/Pink Floyd/High Hopes.2.mid ord() expected string of length 1, but int found
Error parsing midi data/midi/v16/midi_sources/lmd_clean/Pink Floyd/Hey You.mid ord() expected string of length 1, but int found
Timeout: 300 midi_sources/from_mxl/musescore/data/189697.mid
Erro

npenc exceeds max 161 duration: 320 data/midi/v16/midi_sources/130k_reddit/P/P/puertorico_potpourri.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/P/P/Princess Witches - Fallen Tears.mid
npenc exceeds max 161 duration: 400 data/midi/v16/midi_sources/130k_reddit/P/P/Princess Witches - Fallen Tears.mid
Error parsing midi data/midi/v16/midi_sources/130k_reddit/P/P/prgybess.mid ord() expected string of length 1, but int found
npenc exceeds max 161 duration: 239 data/midi/v16/midi_sources/130k_reddit/P/P/Pokemon Mystery Dungeon BlueRed Rescue Team - Title Screen.mid
npenc exceeds max 161 duration: 205 data/midi/v16/midi_sources/130k_reddit/P/P/PictFace.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/P/P/planetnintendo.mid
npenc exceeds max 161 duration: 208 data/midi/v16/midi_sources/130k_reddit/P/P/prism.mid
npenc exceeds max 161 duration: 192 data/midi/v16/midi_sources/130k_reddit/P/P/Pokemon BlackWhite - Battle Elit

npenc exceeds max 161 duration: 416 data/midi/v16/midi_sources/130k_reddit/Guitar_midkar.com_MIDIRip/jazz/brazilian_suite-Michel-Pertrucciani_dm.mid
npenc exceeds max 161 duration: 473 data/midi/v16/midi_sources/130k_reddit/Guitar_midkar.com_MIDIRip/jazz/stompin_at_the_savoy2_jc.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/Guitar_midkar.com_MIDIRip/holidays/halloween/the_murder_in_the_cemetery-Tom-Huck_sn.mid
npenc exceeds max 161 duration: 216 data/midi/v16/midi_sources/130k_reddit/Guitar_midkar.com_MIDIRip/holidays/christmas/o_tannenbaum_dwb.mid
npenc exceeds max 161 duration: 1312 data/midi/v16/midi_sources/130k_reddit/Guitar_midkar.com_MIDIRip/jazz/my_foolish_heart-pt_dm.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/Guitar_midkar.com_MIDIRip/gospel/battle_hymn_of_the_republic_sda.mid
npenc exceeds max 161 duration: 925 data/midi/v16/midi_sources/130k_reddit/Guitar_midkar.com_MIDIRip/gospel/battle_hymn_of_t

npenc exceeds max 161 duration: 956 data/midi/v16/midi_sources/130k_reddit/J/J/jessica_simpson-you_take_my_breath_away.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/J/J/J.D._-_Future_Breeze__Temple_of_Dreams_20070829101655.mid
Sequence too short: 1 data/midi/v16/midi_sources/130k_reddit/J/J/J.D._-_Future_Breeze__Temple_of_Dreams_20070829101655.mid
npenc exceeds max 161 duration: 256 data/midi/v16/midi_sources/130k_reddit/J/J/Josh & Wesz - Autunm Green 2:11-3:40.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/J/J/JingleRkShfl.mid
Sequence too short: 5 data/midi/v16/midi_sources/130k_reddit/J/J/JingleRkShfl.mid
npenc exceeds max 161 duration: 265 data/midi/v16/midi_sources/130k_reddit/J/J/jay_z-girls_best_friend.mid
npenc exceeds max 161 duration: 162 data/midi/v16/midi_sources/130k_reddit/J/J/jai.mid
Error parsing midi data/midi/v16/midi_sources/130k_reddit/J/J/jabba2.mid cannot handle midi file format: <built-in 

npenc exceeds max 161 duration: 412 data/midi/v16/midi_sources/130k_reddit/Classical Archives - The Greats (MIDI)/Schumann/Papillons op2.mid
npenc exceeds max 161 duration: 1080 data/midi/v16/midi_sources/130k_reddit/Classical Archives - The Greats (MIDI)/Classical Piano Midis/Tarrega/Recuerdos de la Alhambra.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/Classical Archives - The Greats (MIDI)/Classical Piano Midis/Schubert/Ave Maria.mid
npenc exceeds max 161 duration: 211 data/midi/v16/midi_sources/130k_reddit/Classical Archives - The Greats (MIDI)/Classical Piano Midis/Ginastera/No. 6.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/Classical Archives - The Greats (MIDI)/Classical Piano Midis/Chopin/19 Polish Songs, for Solo Voice and Piano accomplements, No.10.mid
npenc exceeds max 161 duration: 192 data/midi/v16/midi_sources/130k_reddit/Classical Archives - The Greats (MIDI)/Classical Piano Midis/Chopin/19 Poli

npenc exceeds max 161 duration: 212 data/midi/v16/midi_sources/130k_reddit/C/C/C-emeraude.mid
Error parsing midi data/midi/v16/midi_sources/130k_reddit/C/C/closcred2.mid cannot handle midi file format: <built-in function format>
npenc exceeds max 161 duration: 372 data/midi/v16/midi_sources/130k_reddit/C/C/calypso5.mid
npenc exceeds max 161 duration: 372 data/midi/v16/midi_sources/130k_reddit/C/C/calypso12.mid
npenc exceeds max 161 duration: 424 data/midi/v16/midi_sources/130k_reddit/C/C/csrag.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/C/C/cherish08.mid
npenc exceeds max 161 duration: 416 data/midi/v16/midi_sources/130k_reddit/C/C/cherish08.mid
npenc exceeds max 161 duration: 336 data/midi/v16/midi_sources/130k_reddit/C/C/celtic05.mid
npenc exceeds max 161 duration: 208 data/midi/v16/midi_sources/130k_reddit/C/C/chpn_op33_4.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/C/C/CarelessWhisper2.mid
Sequence too s

npenc exceeds max 161 duration: 256 data/midi/v16/midi_sources/130k_reddit/V/V/VIOLINES.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/3/3day_day.mid
Error parsing midi data/midi/v16/midi_sources/130k_reddit/D/D/Dark_Forces.mid cannot handle midi file format: <built-in function format>
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/D/D/Donkey Kong Country 2 Diddys Kong Quest - Ice.mid
npenc exceeds max 161 duration: 332 data/midi/v16/midi_sources/130k_reddit/D/D/deck_the.mid
npenc exceeds max 161 duration: 304 data/midi/v16/midi_sources/130k_reddit/D/D/Donkey Kong Country - Jungle Swing.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/D/D/DirtySouthThomasGold_-_AlivefeatKateElsworth__FrozenRay_20130204104834.mid
npenc exceeds max 161 duration: 258 data/midi/v16/midi_sources/130k_reddit/D/D/Dance Nation - Sunshine (Wippenberg Remix).mid
npenc exceeds max 161 duration: 276 data/midi/v

Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/Classical_Piano_piano-midi.de_MIDIRip/moszkowski/mos_op36_6.mid
Sequence too short: 1 data/midi/v16/midi_sources/130k_reddit/Classical_Piano_piano-midi.de_MIDIRip/moszkowski/mos_op36_6.mid
npenc exceeds max 161 duration: 242 data/midi/v16/midi_sources/130k_reddit/Classical_Piano_piano-midi.de_MIDIRip/tchaikovsky/ty_november.mid
npenc exceeds max 161 duration: 166 data/midi/v16/midi_sources/130k_reddit/Classical_Piano_piano-midi.de_MIDIRip/liszt/liz_rhap02.mid
npenc exceeds max 161 duration: 416 data/midi/v16/midi_sources/130k_reddit/Classical_Piano_piano-midi.de_MIDIRip/schubert/schubert_D935_4.mid
npenc exceeds max 161 duration: 174 data/midi/v16/midi_sources/130k_reddit/G/G/gesubambino.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/G/G/Gareth_Emery_-_Exposure__OCEANIC_20090710120335.mid
Sequence too short: 5 data/midi/v16/midi_sources/130k_reddit/G/G/Gareth_Emery_-_Exp

Could not encode to chordarr: data/midi/v16/midi_sources/130k_reddit/I/I/i_ran.mid 
npenc exceeds max 161 duration: 216 data/midi/v16/midi_sources/130k_reddit/H/H/hmcello1.mid
npenc exceeds max 161 duration: 296 data/midi/v16/midi_sources/130k_reddit/I/I/indreams.mid
npenc exceeds max 161 duration: 276 data/midi/v16/midi_sources/130k_reddit/I/I/ibelieve03.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/I/I/imcazyu3.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/I/I/if_we_hold_on_together2.mid
npenc exceeds max 161 duration: 208 data/midi/v16/midi_sources/130k_reddit/I/I/if_we_hold_on_together2.mid
Error parsing midi data/midi/v16/midi_sources/130k_reddit/I/I/icantcry.mid ord() expected string of length 1, but int found
npenc exceeds max 161 duration: 192 data/midi/v16/midi_sources/130k_reddit/I/I/illbelovingyoualways.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/H/H/hmmd-601.m

Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/W/W/Waterfal.mid
npenc exceeds max 161 duration: 258 data/midi/v16/midi_sources/130k_reddit/W/W/WastedPenguinz_-_Blinded__DJWORK_20121008074547.mid
Error parsing midi data/midi/v16/midi_sources/130k_reddit/W/W/whiteshade.mid ord() expected string of length 1, but int found
npenc exceeds max 161 duration: 201 data/midi/v16/midi_sources/130k_reddit/W/W/Welcome to NHK - Odoru Akachan Ningen.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/W/W/way_to_san_jose.mid
npenc exceeds max 161 duration: 518 data/midi/v16/midi_sources/130k_reddit/W/W/WIDOR.MID
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/W/W/waitress.mid
npenc exceeds max 161 duration: 174 data/midi/v16/midi_sources/130k_reddit/W/W/waitress.mid
npenc exceeds max 161 duration: 464 data/midi/v16/midi_sources/130k_reddit/W/W/WindBeneathMyWings6.mid
npenc exceeds max 161 duration: 464 data

Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/R/R/Rsm-Nacht.mid
npenc exceeds max 161 duration: 180 data/midi/v16/midi_sources/130k_reddit/R/R/Rsm-Nacht.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/R/R/Robbie Rivera - In Too Deep (Nasty Dub).mid
npenc exceeds max 161 duration: 288 data/midi/v16/midi_sources/130k_reddit/R/R/Robbie Rivera - In Too Deep (Nasty Dub).mid
npenc exceeds max 161 duration: 634 data/midi/v16/midi_sources/130k_reddit/R/R/runaway-5.mid
npenc exceeds max 161 duration: 290 data/midi/v16/midi_sources/130k_reddit/R/R/runaway-5.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/R/R/RCM1.MID
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/130k_reddit/R/R/ROWFUN.MID
Sequence too short: 1 data/midi/v16/midi_sources/130k_reddit/R/R/ROWFUN.MID
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/w/willie-nelson/you

Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/b/big-giant-circles/sevcon/intro-and-verse_key_cmajor.mid
Sequence too short: 4 data/midi/v16/midi_sources/hooktheory/pianoroll/b/big-wild/venice-venture/instrumental_key_cmajor.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/b/bic-runga/sway/verse_key_cmajor.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/b/bo-en/my-time/bridge_key_cmajor.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/b/bruce-rowland/jessicas-theme/intro-and-verse_key_cmajor.mid
Sequence too short: 4 data/midi/v16/midi_sources/hooktheory/pianoroll/b/baauer/harlem-shake/verse_key_cmajor.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/b/backstreet-boys/show-me-the-meaning-of-being-lonely/verse_key_cmajor.mid
npenc exceeds max 161 duration: 192 data/midi/v

Sequence too short: 4 data/midi/v16/midi_sources/hooktheory/pianoroll/e/exid/ddd/intro_key_cmajor.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/e/elvis-costello/big-boys/verse_key_cmajor.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/e/eva-cassidy/fields-of-gold/intro-and-verse_key_cmajor.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/e/elvis-presley/you-were-always-on-my-mind/verse_key_cmajor.mid
Sequence too short: 7 data/midi/v16/midi_sources/hooktheory/pianoroll/e/elvis-presley/you-were-always-on-my-mind/verse_key_cmajor.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/n/niall-horan/this-town/pre-chorus_key_cmajor.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/n/niall-horan/this-town/verse_key_cmajor.mid
Part is shorter than song. Trimming end: data/midi/v1

npenc exceeds max 161 duration: 316 data/midi/v16/midi_sources/hooktheory/pianoroll/t/the-script/you-wont-feel-a-thing/intro_key_cmajor.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/t/traditional/frere-jacque/chorus_key_cmajor.mid
Sequence too short: 4 data/midi/v16/midi_sources/hooktheory/pianoroll/t/traditional/frere-jacque/chorus_key_cmajor.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/t/tyler-the-creator/ifhy/intro_key_cmajor.mid
Sequence too short: 5 data/midi/v16/midi_sources/hooktheory/pianoroll/t/tyler-the-creator/ifhy/intro_key_cmajor.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/hooktheory/pianoroll/t/the-sing-team/satisfied-in-you/verse_key_cmajor.mid
Sequence too short: 6 data/midi/v16/midi_sources/hooktheory/pianoroll/t/the-dispatch/how-deep-the-fathers-love-for-us/intro_key_cmajor.mid
Part is shorter than song. Trimming end: data/midi/v16/midi_sources/ho

In [20]:
arr2csv(processed, out_csv); len(processed)

196518

In [21]:
df = pd.read_csv(out_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,artist,section,ht_key,numpy,ht_offset,genres,title,midi,midi_title,source,parts,mxl,song_url,ht_time_signature,ht_mode,ht_bpm,md5
0,wayne-sharpe,chorus,C,sf4/s2s_encode/hooktheory/pianoroll/w/wayne-sh...,0.0,,yu-gi-oh-theme-song,midi_sources/hooktheory/pianoroll/w/wayne-shar...,yu-gi-oh3,hooktheory,"intro,chorus",,https://www.hooktheory.com/theorytab/view/wayn...,4.0,1.0,128.0,bf1f29e5ff84e3e93e37fb873bfb590e
1,wayne-sharpe,intro,C,,0.0,,yu-gi-oh-theme-song,midi_sources/hooktheory/pianoroll/w/wayne-shar...,yu-gi-oh,hooktheory,"intro,chorus",,https://www.hooktheory.com/theorytab/view/wayn...,3.0,1.0,85.0,055f80ad67f64edb14a85ca8fbfe8c29
2,what-a-day,chorus,D,sf4/s2s_encode/hooktheory/pianoroll/w/what-a-d...,-5.0,Jazz,kiefer,midi_sources/hooktheory/pianoroll/w/what-a-day...,kiefer,hooktheory,chorus,,https://www.hooktheory.com/theorytab/view/what...,4.0,6.0,96.0,197f96f5d181f6ce1e2c5ab04ac1ff87
3,whiteflame,pre-chorus,D,sf4/s2s_encode/hooktheory/pianoroll/w/whitefla...,-5.0,"J-Pop,Pop",senbonzakura,midi_sources/hooktheory/pianoroll/w/whiteflame...,senbonzakura - pre-Pre-Chorus,hooktheory,"verse,pre-chorus,chorus",,https://www.hooktheory.com/theorytab/view/whit...,4.0,6.0,152.0,9e7ce13a35f1314423a9a6d5a5287a4a
4,whiteflame,verse,D,sf4/s2s_encode/hooktheory/pianoroll/w/whitefla...,-5.0,"J-Pop,Pop",senbonzakura,midi_sources/hooktheory/pianoroll/w/whiteflame...,Senbonzakura,hooktheory,"verse,pre-chorus,chorus",,https://www.hooktheory.com/theorytab/view/whit...,4.0,6.0,152.0,d5aaf79d0989222f1362f9f46c540a27


In [22]:
len([f for f in df.numpy.values if isinstance(f, str)])

53314

In [23]:
from collections import Counter

In [24]:
df[df.numpy.notnull()]

Unnamed: 0,artist,section,ht_key,numpy,ht_offset,genres,title,midi,midi_title,source,parts,mxl,song_url,ht_time_signature,ht_mode,ht_bpm,md5
0,wayne-sharpe,chorus,C,sf4/s2s_encode/hooktheory/pianoroll/w/wayne-sh...,0.0,,yu-gi-oh-theme-song,midi_sources/hooktheory/pianoroll/w/wayne-shar...,yu-gi-oh3,hooktheory,"intro,chorus",,https://www.hooktheory.com/theorytab/view/wayn...,4.0,1.0,128.0,bf1f29e5ff84e3e93e37fb873bfb590e
2,what-a-day,chorus,D,sf4/s2s_encode/hooktheory/pianoroll/w/what-a-d...,-5.0,Jazz,kiefer,midi_sources/hooktheory/pianoroll/w/what-a-day...,kiefer,hooktheory,chorus,,https://www.hooktheory.com/theorytab/view/what...,4.0,6.0,96.0,197f96f5d181f6ce1e2c5ab04ac1ff87
3,whiteflame,pre-chorus,D,sf4/s2s_encode/hooktheory/pianoroll/w/whitefla...,-5.0,"J-Pop,Pop",senbonzakura,midi_sources/hooktheory/pianoroll/w/whiteflame...,senbonzakura - pre-Pre-Chorus,hooktheory,"verse,pre-chorus,chorus",,https://www.hooktheory.com/theorytab/view/whit...,4.0,6.0,152.0,9e7ce13a35f1314423a9a6d5a5287a4a
4,whiteflame,verse,D,sf4/s2s_encode/hooktheory/pianoroll/w/whitefla...,-5.0,"J-Pop,Pop",senbonzakura,midi_sources/hooktheory/pianoroll/w/whiteflame...,Senbonzakura,hooktheory,"verse,pre-chorus,chorus",,https://www.hooktheory.com/theorytab/view/whit...,4.0,6.0,152.0,d5aaf79d0989222f1362f9f46c540a27
5,whiteflame,chorus,D,sf4/s2s_encode/hooktheory/pianoroll/w/whitefla...,-5.0,"J-Pop,Pop",senbonzakura,midi_sources/hooktheory/pianoroll/w/whiteflame...,Senbonzakura,hooktheory,"verse,pre-chorus,chorus",,https://www.hooktheory.com/theorytab/view/whit...,4.0,6.0,152.0,e0c189ee753b30c4758d85211f13c189
6,wham,verse,Db,sf4/s2s_encode/hooktheory/pianoroll/w/wham/las...,-1.0,Holiday,last-christmas,midi_sources/hooktheory/pianoroll/w/wham/last-...,Last Christmas Verse,hooktheory,"intro,verse,chorus",,https://www.hooktheory.com/theorytab/view/wham...,4.0,1.0,108.0,38e38402443506e326b76536e8e327a0
7,wham,chorus,Db,sf4/s2s_encode/hooktheory/pianoroll/w/wham/las...,-1.0,Holiday,last-christmas,midi_sources/hooktheory/pianoroll/w/wham/last-...,Last Christmas Chorus,hooktheory,"intro,verse,chorus",,https://www.hooktheory.com/theorytab/view/wham...,4.0,1.0,108.0,75d0251177c8c1fa9a02821299fa5ba8
8,wham,intro,Db,sf4/s2s_encode/hooktheory/pianoroll/w/wham/las...,-1.0,Holiday,last-christmas,midi_sources/hooktheory/pianoroll/w/wham/last-...,Last Christmas Intro,hooktheory,"intro,verse,chorus",,https://www.hooktheory.com/theorytab/view/wham...,4.0,1.0,108.0,83d2a800f40aeca07e30e4718cda8fe5
9,wham,chorus,C,sf4/s2s_encode/hooktheory/pianoroll/w/wham/fre...,0.0,,freedom,midi_sources/hooktheory/pianoroll/w/wham/freed...,Freedom Chorus,hooktheory,chorus,,https://www.hooktheory.com/theorytab/view/wham...,4.0,1.0,128.0,60fa29cfec107df27b053cf9708823d5
11,wg-snuffy-walden,instrumental,G,sf4/s2s_encode/hooktheory/pianoroll/w/wg-snuff...,5.0,,west-wing-suite,midi_sources/hooktheory/pianoroll/w/wg-snuffy-...,snuffy,hooktheory,instrumental,,https://www.hooktheory.com/theorytab/view/wg-s...,4.0,1.0,86.0,a856dff6c54398544c217104d047abe0


In [25]:
Counter(df[df.numpy.notnull()].source.values)

Counter({'hooktheory': 17761,
         'freemidi': 51,
         'midiworld': 66,
         'ecomp': 2357,
         'cprato': 124,
         'classical_piano': 319,
         'classical_archives': 2400,
         'musescore': 6911,
         'wikifonia': 33,
         'lmd': 128,
         'reddit': 5273,
         'hooktheory_c': 17891})

In [26]:
len(df[df.numpy.notnull()].source.values)

53314

In [27]:
Counter(df[df.numpy.notnull()].source.values)

Counter({'hooktheory': 17761,
         'freemidi': 51,
         'midiworld': 66,
         'ecomp': 2357,
         'cprato': 124,
         'classical_piano': 319,
         'classical_archives': 2400,
         'musescore': 6911,
         'wikifonia': 33,
         'lmd': 128,
         'reddit': 5273,
         'hooktheory_c': 17891})

In [28]:
len(df[df.numpy.notnull()].source.values)

53314

In [29]:
Counter(df.source.values)

Counter({'hooktheory': 19882,
         'freemidi': 5166,
         'midiworld': 4107,
         'ecomp': 2507,
         'cprato': 312,
         'classical_piano': 329,
         'classical_archives': 14641,
         'musescore': 10928,
         'wikifonia': 6346,
         'lmd': 13565,
         'reddit': 98659,
         'hooktheory_c': 20076})

## Convert to hooktheory databunch

In [30]:
def get_files(csv):
    files = csv['numpy']
    flist = [Path(version_path/f) for f in files.values if isinstance(f, str)]
    flist = [f for f in flist if f.exists()]
    return flist

In [31]:
def create_databunch(files, cache_name, batch_size=32, load_cached=False):
    if load_cached and (out_path/f'{cache_name}/train_ids.npy').exists():
        data = MusicDataBunch.load(out_path, bs=batch_size, cache_name=cache_name)
    else:
        ps = [S2SFileProcessor()]
        data = (MusicItemList(items=files, path=out_path, processor=ps)
                .split_by_rand_pct(0.01, seed=6)
                .label_const(label_cls=LMLabelList))
#         data.x._bunch = MusicDataBunch
        data = data.databunch(bs=batch_size, preloader_cls=S2SPreloader)
        data.save(cache_name)
    return data

In [32]:
out_path = version_path/out_dir

In [33]:
csv = df

In [34]:
hook_csv = csv.loc[csv.source.isin(['hooktheory'])]
hook_files = get_files(hook_csv); len(hook_files)
hook_data = create_databunch(hook_files, cache_name='tmp/hook')

In [35]:
hook_csv = csv.loc[csv.source.isin(['hooktheory_c'])]
hook_files = get_files(hook_csv); len(hook_files)
hook_data = create_databunch(hook_files, cache_name='tmp/hook_c')

In [36]:
lq_csv = csv.loc[csv.source.isin(['reddit', 'classical_piano', 'ecomp', 'midiworld', 'freemidi', 'lmd', 'cprato', 'wikifonia', 'classical_archives'])]
lq_files = get_files(lq_csv); len(lq_files)
lq_data = create_databunch(lq_files, cache_name='tmp/lq')

In [37]:
hq_csv = csv.loc[csv.source.isin(['hooktheory', 'musescore'])]
hq_files = get_files(hq_csv); len(hq_files)
hq_data = create_databunch(hq_files, cache_name='tmp/hq')

In [38]:
len(hook_data.train_dl.dl.dataset)

17264

In [39]:
all_files = get_files(csv); len(all_files)
all_data = create_databunch(all_files, cache_name='tmp/all')

In [40]:
import random
sample_data = create_databunch(random.sample(all_files, 1000), cache_name='tmp/sample')

## Load data

In [41]:
single_tfm = partial(to_single_stream, vocab=vocab)
load_data =  MusicDataBunch.load(path=out_path, cache_name='tmp/hook_c', preloader_cls=S2SPreloader, train_tfms=[single_tfm])



Tried: 0,1,2,3,4...
  warn(warn_msg)


In [42]:
load_data =  MusicDataBunch.load(path=out_path, cache_name='tmp/hook_c', preloader_cls=S2SPreloader)

In [49]:
load_data.one_batch()

(tensor([[  6, 622,  88,  ..., 143,   8, 143],
         [  5, 622,  55,  ..., 161,  51, 161],
         [  5, 622,  65,  ..., 149,  61, 149],
         ...,
         [  5, 622,  66,  ..., 185,  75, 185],
         [  5, 622,  64,  ..., 173,  61, 173],
         [  5, 622,  63,  ..., 185,  60, 185]]),
 tensor([[  3,   5, 622,  ..., 167,  78, 155],
         [  3,   6, 622,  ..., 143,   8, 143],
         [  3,   6, 622,  ..., 140,   8, 140],
         ...,
         [  3,   6, 622,  ..., 143,  87, 143],
         [  3,   6, 622,  ..., 149,   8, 149],
         [  3,   6, 622,  ..., 149,  75, 197]]))

In [None]:
# ps = [S2SFileProcessor()]

# single_tfm = partial(to_single_stream, vocab=vocab)
# data = (MusicItemList(items=hook_files[:100], path=out_path, processor=ps, tfms=[single_tfm])
#         .split_by_rand_pct(0.01, seed=6)
#         .label_const(label_cls=LMLabelList))
# data.x._bunch = MusicDataBunch

In [78]:
# data.x.tfms = [single_tfm]

In [None]:
data = data.databunch(bs=4, preloader_cls=S2SPreloader, train_tfms=[single_tfm])

In [80]:
out = data.train_dl.dl.dataset[0]

In [None]:
data.one_batch()