In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
import sys
sys.path.insert(0, '../../')
from src.encode_data import *
from src.midi_data import *
from src.data_sources import process_all, arr2csv
from src.midi_transform import *
from src.fastai_data import *

Failed to load FluidSynth. Must install if you want to convert to wav files.


In [4]:
import traceback
import time

## Standardize and reformat raw midi files before encoding to text
- Transform key to C major
- Remove unused instruments
- Combine multiple tracks with the same instrument into a single part
- Melody, Piano, String

### Load midi data

In [4]:
version = 'v16'
data_path = Path('data/midi')
version_path = data_path/version

In [5]:
import pandas as pd

In [6]:
sf_path = f'sf{SAMPLE_FREQ}'
out_dir = Path(f'{sf_path}/midi_encode')
duet_only = False
# out_dir = Path(f'{sf_path}/piano_duet')
# duet_only = True

In [7]:
source_dir = 'midi_sources'
source_csv = version_path/'metadata'/f'{source_dir}.csv'
out_csv = version_path/out_dir/f'{out_dir.name}.csv'
out_csv.parent.mkdir(parents=True, exist_ok=True)
source_csv, out_csv

(PosixPath('data/midi/v16/metadata/midi_sources.csv'),
 PosixPath('data/midi/v16/sf4/midi_encode/midi_encode.csv'))

In [8]:
# num_comps = 2 # note, duration
cutoff = 5 # max instruments
min_variation = 3 # minimum number of different midi notes played
# max_dur = 128

### Encoding midi to numpy

In [9]:
df = pd.read_csv(source_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,ht_time_signature,ht_offset,midi,section,parts,ht_bpm,title,midi_title,artist,song_url,genres,source,ht_key,md5,mxl,ht_mode
0,4.0,0.0,midi_sources/hooktheory/pianoroll/w/wayne-shar...,chorus,"intro,chorus",128.0,yu-gi-oh-theme-song,yu-gi-oh3,wayne-sharpe,https://www.hooktheory.com/theorytab/view/wayn...,,hooktheory,C,bf1f29e5ff84e3e93e37fb873bfb590e,,1.0
1,3.0,0.0,midi_sources/hooktheory/pianoroll/w/wayne-shar...,intro,"intro,chorus",85.0,yu-gi-oh-theme-song,yu-gi-oh,wayne-sharpe,https://www.hooktheory.com/theorytab/view/wayn...,,hooktheory,C,055f80ad67f64edb14a85ca8fbfe8c29,,1.0
2,4.0,-5.0,midi_sources/hooktheory/pianoroll/w/what-a-day...,chorus,chorus,96.0,kiefer,kiefer,what-a-day,https://www.hooktheory.com/theorytab/view/what...,Jazz,hooktheory,D,197f96f5d181f6ce1e2c5ab04ac1ff87,,6.0
3,4.0,-5.0,midi_sources/hooktheory/pianoroll/w/whiteflame...,pre-chorus,"verse,pre-chorus,chorus",152.0,senbonzakura,senbonzakura - pre-Pre-Chorus,whiteflame,https://www.hooktheory.com/theorytab/view/whit...,"J-Pop,Pop",hooktheory,D,9e7ce13a35f1314423a9a6d5a5287a4a,,6.0
4,4.0,-5.0,midi_sources/hooktheory/pianoroll/w/whiteflame...,verse,"verse,pre-chorus,chorus",152.0,senbonzakura,Senbonzakura,whiteflame,https://www.hooktheory.com/theorytab/view/whit...,"J-Pop,Pop",hooktheory,D,d5aaf79d0989222f1362f9f46c540a27,,6.0


In [10]:
all_records = df.to_dict(orient='records'); len(all_records)

197182

In [11]:
def process_metadata(metadata):
    result = metadata.copy()
    
    # Part 1. Compress tracks/instruments
    if not isinstance(metadata.get('midi'), str): return None
    
    input_path = version_path/metadata['midi']
    extension = input_path.suffix.lower()
    if not input_path.exists(): 
        print('Input path does not exist:', input_path, metadata)
        return result
    
    # Get outfile and check if it exists
    out_file = Path(str(input_path).replace(f'/{source_dir}/', f'/{out_dir}/'))
    out_file = out_file.with_suffix('.npy')
    out_file.parent.mkdir(parents=True, exist_ok=True)
    if out_file.exists(): 
        result['numpy'] = str(out_file.relative_to(version_path))
        return result
    
    npenc = transform_midi(input_path)
    if npenc is None: return result
    np.save(out_file, npenc)
    result['numpy'] = str(out_file.relative_to(version_path))
    return result

In [12]:
def transform_midi(midi_file):
    input_path = midi_file
    
    # Part 1: Filter out midi tracks (drums, repetitive instruments, etc.)
    try: 
        if duet_only and num_piano_tracks(input_path) not in [1, 2]: return None
        input_file = compress_midi_file(input_path, min_variation=min_variation, cutoff=cutoff) # remove non note tracks and standardize instruments
        
        if input_file is None: return None
    except Exception as e:
        if 'badly form' in str(e): return None # ignore badly formatted midi errors
        if 'out of range' in str(e): return None # ignore badly formatted midi errors
        print('Error parsing midi', input_path, e)
        return None
        
    # Part 2. Compress rests and long notes
    stream = file2stream(input_file) # 1.
    try:
        chordarr = stream2chordarr(stream) # 2. max_dur = quarter_len * sample_freq (4). 128 = 8 bars
    except Exception as e:
        print('Could not encode to chordarr:', input_path, e)
        print(traceback.format_exc())
        return None
    
    # Part 3. Compress song rests - Don't want songs with really long pauses 
    # (this happens because we filter out midi tracks).
    chord_trim = trim_chordarr_rests(chordarr)
    chord_short = shorten_chordarr_rests(chord_trim)
    delta_trim = chord_trim.shape[0] - chord_short.shape[0]
#     if delta_trim > 500: 
#         print(f'Removed {delta_trim} rests from {input_path}. Skipping song')
#         return None
    chordarr = chord_short
    
    # Part 3. Chord array to numpy
    npenc = chordarr2npenc(chordarr)
    if not is_valid_npenc(npenc, input_path=input_path):
        return None
    
    return npenc

In [None]:
def try_process_metadata(metadata):
    try:
        return process_metadata(metadata)
    except Exception:
#         print(traceback.format_exc())
        return None

In [None]:
# # sanity check
import random
for r in random.sample(all_records, 10):
    process_metadata(r)

In [None]:
def timeout_func(data, seconds):
    print("Timeout:", seconds, data.get('midi'))

In [None]:
timeout = 500
processed = process_all(try_process_metadata, all_records, timeout=timeout, timeout_func=timeout_func)

Sequence too short: 29 data/midi/v16/midi_sources/hooktheory/pianoroll/w/willie-nelson/on-the-road-again/verse_key_original.mid
Sequence too short: 20 data/midi/v16/midi_sources/hooktheory/pianoroll/w/willie-nelson/on-the-road-again/bridge_key_original.mid
Sequence too short: 16 data/midi/v16/midi_sources/hooktheory/pianoroll/w/weebl/donkeys/intro_key_original.mid
Sequence too short: 25 data/midi/v16/midi_sources/hooktheory/pianoroll/w/wolfgang-gartner/undertaker/intro_key_original.mid
Sequence too short: 17 data/midi/v16/midi_sources/hooktheory/pianoroll/w/womack-and-womack/teardrops/intro-and-verse_key_original.mid
Sequence too short: 19 data/midi/v16/midi_sources/hooktheory/pianoroll/w/wolfgang-amadeus-mozart/confutatis-from-requiem/solo-3_key_original.mid
Sequence too short: 20 data/midi/v16/midi_sources/hooktheory/pianoroll/w/wilco/elt/intro_key_original.mid
Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/w/wan-wan/sky-filling-departed-spirit---lightning-wor

Sequence too short: 28 data/midi/v16/midi_sources/hooktheory/pianoroll/y/yes/ive-seen-all-good-people/bridge_key_original.mid
Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/y/yes/ive-seen-all-good-people/outro_key_original.mid
Sequence too short: 4 data/midi/v16/midi_sources/hooktheory/pianoroll/y/ylvis/massachusetts/intro_key_original.mid
Sequence too short: 30 data/midi/v16/midi_sources/hooktheory/pianoroll/y/ylvis/massachusetts/pre-chorus_key_original.mid
Sequence too short: 31 data/midi/v16/midi_sources/hooktheory/pianoroll/j/janet-jackson/nasty/intro_key_original.mid
Sequence too short: 20 data/midi/v16/midi_sources/hooktheory/pianoroll/j/jackson-browne/doctor-my-eyes/verse_key_original.mid
Sequence too short: 28 data/midi/v16/midi_sources/hooktheory/pianoroll/j/jackson-browne/doctor-my-eyes/chorus_key_original.mid
Sequence too short: 16 data/midi/v16/midi_sources/hooktheory/pianoroll/j/julia-holter/feel-you/pre-chorus_key_original.mid
Sequence too short: 3

Sequence too short: 28 data/midi/v16/midi_sources/hooktheory/pianoroll/b/bear-mccreary/roslin-and-adama/bridge_key_original.mid
Sequence too short: 29 data/midi/v16/midi_sources/hooktheory/pianoroll/b/boards-of-canada/olson/outro_key_original.mid
Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/b/boards-of-canada/cold-earth/verse_key_original.mid
Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/b/boards-of-canada/an-eagle-in-your-mind/instrumental_key_original.mid
Sequence too short: 18 data/midi/v16/midi_sources/hooktheory/pianoroll/b/boards-of-canada/new-seeds/outro_key_original.mid
Sequence too short: 17 data/midi/v16/midi_sources/hooktheory/pianoroll/b/boards-of-canada/new-seeds/chorus_key_original.mid
Sequence too short: 19 data/midi/v16/midi_sources/hooktheory/pianoroll/b/boards-of-canada/corsair/verse_key_original.mid
Sequence too short: 29 data/midi/v16/midi_sources/hooktheory/pianoroll/b/boards-of-canada/music-is-math/intro-and-verse

Sequence too short: 23 data/midi/v16/midi_sources/hooktheory/pianoroll/d/david-bowie/fame/intro_key_original.mid
Sequence too short: 20 data/midi/v16/midi_sources/hooktheory/pianoroll/d/digitalism/blitz/intro_key_original.mid
Sequence too short: 25 data/midi/v16/midi_sources/hooktheory/pianoroll/d/daft-punk/voyager/intro_key_original.mid
Sequence too short: 16 data/midi/v16/midi_sources/hooktheory/pianoroll/d/dean-blunt/demon/verse_key_original.mid
Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/d/dirty-projectors/impregnable-question/pre-chorus_key_original.mid
Sequence too short: 28 data/midi/v16/midi_sources/hooktheory/pianoroll/d/deadmau5/i-remember/intro_key_original.mid
Sequence too short: 8 data/midi/v16/midi_sources/hooktheory/pianoroll/d/david-bowie/starman/pre-chorus_key_original.mid
Sequence too short: 21 data/midi/v16/midi_sources/hooktheory/pianoroll/d/disney/something-there---beauty-and-the-beast/intro_key_original.mid
Sequence too short: 26 data/mi

Sequence too short: 26 data/midi/v16/midi_sources/hooktheory/pianoroll/c/capcom/okami---yami/intro_key_original.mid
Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/c/capcom/okami---yami/intro-and-verse_key_original.mid
Sequence too short: 20 data/midi/v16/midi_sources/hooktheory/pianoroll/c/ceoma-feat-the-larx/love-is-more--rob-mayth-remix-/bridge_key_original.mid
Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/c/cat-stevens/wild-world/chorus_key_original.mid
Sequence too short: 16 data/midi/v16/midi_sources/hooktheory/pianoroll/c/captain-hollywood-project/odyssey-of-emotions/instrumental_key_original.mid
Sequence too short: 16 data/midi/v16/midi_sources/hooktheory/pianoroll/c/camera-obscura/if-looks-could-kill/verse_key_original.mid
Sequence too short: 26 data/midi/v16/midi_sources/hooktheory/pianoroll/c/chris-de-burgh/lady-in-red/intro_key_original.mid
Sequence too short: 26 data/midi/v16/midi_sources/hooktheory/pianoroll/c/coda/bloody-st

Sequence too short: 21 data/midi/v16/midi_sources/hooktheory/pianoroll/p/paul-mccartney/maybe-im-amazed/intro_key_original.mid
Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/p/pr3/overlooked-memories/intro_key_original.mid
Sequence too short: 27 data/midi/v16/midi_sources/hooktheory/pianoroll/p/paris-hilton/my-bff/intro_key_original.mid
Sequence too short: 18 data/midi/v16/midi_sources/hooktheory/pianoroll/p/parov-stelar/the-mojo-radio-gang/intro_key_original.mid
Sequence too short: 20 data/midi/v16/midi_sources/hooktheory/pianoroll/p/panic-at-the-disco/do-you-know-what-im-seeing/intro-and-verse_key_original.mid
Sequence too short: 31 data/midi/v16/midi_sources/hooktheory/pianoroll/p/post-malone/rockstar/chorus_key_original.mid
Sequence too short: 26 data/midi/v16/midi_sources/hooktheory/pianoroll/p/pablo-acenso/little-monsters/chorus_key_original.mid
Sequence too short: 10 data/midi/v16/midi_sources/hooktheory/pianoroll/t/this-town-needs-guns/gibbon/intro_key_o

Sequence too short: 28 data/midi/v16/midi_sources/hooktheory/pianoroll/k/koji-kondo/athletic-bgm-super-mario-bros-3/intro_key_original.mid
Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/k/koji-kondo/deku-palace---legend-of-zelda-majoras-mask/bridge_key_original.mid
Sequence too short: 22 data/midi/v16/midi_sources/hooktheory/pianoroll/k/koji-kondo/delfino-plaza/intro_key_original.mid
Sequence too short: 20 data/midi/v16/midi_sources/hooktheory/pianoroll/k/koji-kondo/super-mario-bros-3---hammer-bros-battle-theme/intro_key_original.mid
Sequence too short: 30 data/midi/v16/midi_sources/hooktheory/pianoroll/k/koji-kondo/super-mario-world---athletic/intro_key_original.mid
Sequence too short: 31 data/midi/v16/midi_sources/hooktheory/pianoroll/k/koji-kondo/link-to-the-past---minigame-theme/intro_key_original.mid
Sequence too short: 28 data/midi/v16/midi_sources/hooktheory/pianoroll/k/knife-party/destroy-them-with-lazers/bridge_key_original.mid
Sequence too short: 16 da

Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/q/queen/we-will-rock-you/chorus_key_original.mid
Sequence too short: 20 data/midi/v16/midi_sources/hooktheory/pianoroll/a/ashlee-simpson/pieces-of-me/pre-chorus_key_original.mid
Sequence too short: 18 data/midi/v16/midi_sources/hooktheory/pianoroll/a/alex-lloyd/amazing/intro_key_original.mid
Sequence too short: 20 data/midi/v16/midi_sources/hooktheory/pianoroll/a/asgeir/king-and-cross/intro_key_original.mid
Sequence too short: 4 data/midi/v16/midi_sources/hooktheory/pianoroll/a/axwell-and-ingrosso/something-new-/intro_key_original.mid
Sequence too short: 25 data/midi/v16/midi_sources/hooktheory/pianoroll/a/a-lovely-war/autumn-leaves-us-blue/chorus_key_original.mid
Sequence too short: 23 data/midi/v16/midi_sources/hooktheory/pianoroll/a/akito-nakatsuka/zelda-ii---the-adventure-of-link---village/intro_key_original.mid
Sequence too short: 20 data/midi/v16/midi_sources/hooktheory/pianoroll/a/andrew-lloyd-webber/music-of

Error parsing midi data/midi/v16/midi_sources/lmd_clean/Wonder Stevie/Sir Duke.1.mid ord() expected string of length 1, but int found
Error parsing midi data/midi/v16/midi_sources/lmd_clean/Rednex/The Ultimate Rednex Mega Mix, Part 3: Cotton Eye Joe.mid ord() expected string of length 1, but int found
Error parsing midi data/midi/v16/midi_sources/lmd_clean/Jackson Michael/The Girl Is Mine.mid ord() expected string of length 1, but int found
Error parsing midi data/midi/v16/midi_sources/lmd_clean/Jackson Michael/Thriller.3.mid ord() expected string of length 1, but int found
Error parsing midi data/midi/v16/midi_sources/lmd_clean/Jackson Michael/The Girl Is Mine.3.mid ord() expected string of length 1, but int found
Error parsing midi data/midi/v16/midi_sources/lmd_clean/Jackson Michael/Don't Stop 'Til You Get Enough.mid ord() expected string of length 1, but int found
Error parsing midi data/midi/v16/midi_sources/lmd_clean/Los Del Rio/Macarena.1.mid ord() expected string of length 1, b

Sequence too short: 24 data/midi/v16/midi_sources/130k_reddit/L/L/Lisa_Miskovsky_-_Still_Alive_(Mirrors_Edge_Theme)__Frozen_Ray_20090312010426.mid
Sequence too short: 6 data/midi/v16/midi_sources/130k_reddit/Guitar_midkar.com_MIDIRip/resets/res_gm.mid
Sequence too short: 13 data/midi/v16/midi_sources/130k_reddit/Guitar_midkar.com_MIDIRip/MidiStudio/RBarclay/1A_reset_File_Resetter.mid
Sequence too short: 22 data/midi/v16/midi_sources/130k_reddit/M/M/Mario Lopez - Free Your Mind (Pulstate Melody).mid
Sequence too short: 24 data/midi/v16/midi_sources/130k_reddit/M/M/Mauro Picotto - Lizard.mid
Sequence too short: 22 data/midi/v16/midi_sources/130k_reddit/M/M/MarioLopez_-_FreeYourMind__Js14_20130129031339.mid
Error parsing midi data/midi/v16/midi_sources/130k_reddit/M/M/MILES.Children.mid ord() expected string of length 1, but int found
Sequence too short: 21 data/midi/v16/midi_sources/130k_reddit/M/M/Mario Kart Double Dash - Countdown to Race Start.mid
Error parsing midi data/midi/v16/midi

Sequence too short: 12 data/midi/v16/midi_sources/130k_reddit/A/A/ATB_-_Mysterious_Skies__MX25_20090820033608.mid
Sequence too short: 20 data/midi/v16/midi_sources/130k_reddit/A/A/Alvaro & Lange Frans - Geef Alle Hoeren In De Disco Wat Te Drinken.mid
Sequence too short: 16 data/midi/v16/midi_sources/130k_reddit/A/A/Axwell_-_Heart_is_King__John_20110615105948.mid
Sequence too short: 18 data/midi/v16/midi_sources/130k_reddit/A/A/Afrojack ft. The Partysquad - Drop Down.mid
Sequence too short: 20 data/midi/v16/midi_sources/130k_reddit/A/A/Axwell_ft_Sebastian_Ingrosso_-_Its_True_(Roog__Prom_Remix)___DaveC_20101218112854.mid
Sequence too short: 28 data/midi/v16/midi_sources/130k_reddit/A/A/Afrojack - Ghettoblaster.mid
Sequence too short: 31 data/midi/v16/midi_sources/130k_reddit/A/A/alien03.mid
Sequence too short: 21 data/midi/v16/midi_sources/130k_reddit/A/A/aaliyah.mid
Error parsing midi data/midi/v16/midi_sources/130k_reddit/A/A/animals-house_of_the_rising_sun.mid ord() expected string of

Sequence too short: 24 data/midi/v16/midi_sources/130k_reddit/S/S/Sidekick_-_Deep_Fear__Skez_20090626161633.mid
Error parsing midi data/midi/v16/midi_sources/130k_reddit/S/S/sw2.mid cannot handle midi file format: <built-in function format>
Sequence too short: 29 data/midi/v16/midi_sources/130k_reddit/S/S/short07.mid
Sequence too short: 17 data/midi/v16/midi_sources/130k_reddit/S/S/shred_for_sathan.mid
Sequence too short: 24 data/midi/v16/midi_sources/130k_reddit/S/S/Svenson And Gielen - Answer The Question.mid
Sequence too short: 17 data/midi/v16/midi_sources/130k_reddit/S/S/snc7034.mid
Sequence too short: 23 data/midi/v16/midi_sources/130k_reddit/S/S/swamp.mid
Sequence too short: 20 data/midi/v16/midi_sources/130k_reddit/S/S/Supaboyz - Master Flat.mid
Sequence too short: 12 data/midi/v16/midi_sources/130k_reddit/S/S/Super Mario Bros - 1 Up.mid
Error parsing midi data/midi/v16/midi_sources/130k_reddit/S/S/songwhiteshade.mid ord() expected string of length 1, but int found
Error parsin

Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/j/jerry-lee-lewis/whole-lotta-shakin-goin-on/verse_key_cmajor.mid
Sequence too short: 28 data/midi/v16/midi_sources/hooktheory/pianoroll/j/jason-becker/altitudes/solo-1_key_cmajor.mid
Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/j/jack-johnson/crying-shame/intro_key_cmajor.mid
Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/j/jack-johnson/constellations/verse_key_cmajor.mid
Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/j/john-tesh/roundball-rock/intro_key_cmajor.mid
Sequence too short: 8 data/midi/v16/midi_sources/hooktheory/pianoroll/j/jack-white/freedom-at-21/bridge_key_cmajor.mid
Sequence too short: 25 data/midi/v16/midi_sources/hooktheory/pianoroll/j/jill-scott/cross-my-mind/intro_key_cmajor.mid
Sequence too short: 30 data/midi/v16/midi_sources/hooktheory/pianoroll/j/jorge-ben/errare-humanum-est/verse_key_cmajor.mid
Sequence too short: 4 d

Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/b/bill-brown/windows-xp-startup-sound/intro_key_cmajor.mid
Sequence too short: 20 data/midi/v16/midi_sources/hooktheory/pianoroll/b/bubbles-and-the-shitrockers/liquor-and-whores/verse_key_cmajor.mid
Sequence too short: 12 data/midi/v16/midi_sources/hooktheory/pianoroll/b/bubbles-and-the-shitrockers/liquor-and-whores/intro_key_cmajor.mid
Sequence too short: 20 data/midi/v16/midi_sources/hooktheory/pianoroll/b/bubbles-and-the-shitrockers/liquor-and-whores/solo_key_cmajor.mid
Sequence too short: 16 data/midi/v16/midi_sources/hooktheory/pianoroll/b/beach-house/holy-dances/pre-chorus_key_cmajor.mid
Sequence too short: 16 data/midi/v16/midi_sources/hooktheory/pianoroll/b/beach-house/the-traveller/verse_key_cmajor.mid
Sequence too short: 17 data/midi/v16/midi_sources/hooktheory/pianoroll/b/beach-house/the-traveller/chorus_key_cmajor.mid
Sequence too short: 8 data/midi/v16/midi_sources/hooktheory/pianoroll/b/birds-of-tokyo/

Sequence too short: 30 data/midi/v16/midi_sources/hooktheory/pianoroll/s/star-control-ii-soundtrack/hyperspace/intro_key_cmajor.mid
Sequence too short: 31 data/midi/v16/midi_sources/hooktheory/pianoroll/s/schoolhouse-rock/my-hero-zero/verse_key_cmajor.mid
Sequence too short: 29 data/midi/v16/midi_sources/hooktheory/pianoroll/s/soundprank/a-single-many---shingo-nakamura-remix/chorus_key_cmajor.mid
Sequence too short: 18 data/midi/v16/midi_sources/hooktheory/pianoroll/s/stevie-ray-vaughn/lenny/intro-and-verse_key_cmajor.mid
Sequence too short: 28 data/midi/v16/midi_sources/hooktheory/pianoroll/s/singularity/horizon/intro_key_cmajor.mid
Sequence too short: 28 data/midi/v16/midi_sources/hooktheory/pianoroll/s/soul-asylum/runaway-train/pre-chorus_key_cmajor.mid
Sequence too short: 19 data/midi/v16/midi_sources/hooktheory/pianoroll/s/sara-bareilles/bottle-it-up/pre-chorus_key_cmajor.mid
Sequence too short: 28 data/midi/v16/midi_sources/hooktheory/pianoroll/s/sufjan-stevens/jupiter-winter/int

Sequence too short: 29 data/midi/v16/midi_sources/hooktheory/pianoroll/e/evanescence/going-under/pre-chorus_key_cmajor.mid
Sequence too short: 25 data/midi/v16/midi_sources/hooktheory/pianoroll/e/elton-john/dont-let-the-sun-go-down-on-me/intro_key_cmajor.mid
Sequence too short: 27 data/midi/v16/midi_sources/hooktheory/pianoroll/e/eric-whitacre/fly-to-paradise/chorus_key_cmajor.mid
Sequence too short: 16 data/midi/v16/midi_sources/hooktheory/pianoroll/e/eminem/desperation---ft-jamie-n-commons/chorus_key_cmajor.mid
Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/e/earth-wind-and-fire/sun-goddess/intro_key_cmajor.mid
Sequence too short: 16 data/midi/v16/midi_sources/hooktheory/pianoroll/e/eden/forever-over/verse_key_cmajor.mid
Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/e/elvis-presley/hound-dog/verse_key_cmajor.mid
Sequence too short: 16 data/midi/v16/midi_sources/hooktheory/pianoroll/n/naomi/three-stars-no-match/intro_key_cmajor.mid
Sequ

Sequence too short: 20 data/midi/v16/midi_sources/hooktheory/pianoroll/t/the-kinks/lola/intro_key_cmajor.mid
Sequence too short: 23 data/midi/v16/midi_sources/hooktheory/pianoroll/t/the-shins/phantom-limb/bridge_key_cmajor.mid
Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/t/the-white-stripes/the-denial-twist/chorus_key_cmajor.mid
Sequence too short: 28 data/midi/v16/midi_sources/hooktheory/pianoroll/t/the-white-stripes/dead-leaves-and-the-dirty-ground/bridge_key_cmajor.mid
Sequence too short: 20 data/midi/v16/midi_sources/hooktheory/pianoroll/t/tom-helsen/please/chorus-lead-out_key_cmajor.mid
Sequence too short: 16 data/midi/v16/midi_sources/hooktheory/pianoroll/t/thq/spongebob-bfbb---chum-bucket-lab/intro_key_cmajor.mid
Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/t/the-white-stripes/ball-and-biscuit/verse_key_cmajor.mid
Sequence too short: 16 data/midi/v16/midi_sources/hooktheory/pianoroll/t/taylor-swift/enchanted/intro_key_cmajor.mi

Sequence too short: 31 data/midi/v16/midi_sources/hooktheory/pianoroll/k/kanye-west/all-mine/intro_key_cmajor.mid
Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/k/kasabian/la-fee-verte/intro-and-verse_key_cmajor.mid
Sequence too short: 30 data/midi/v16/midi_sources/hooktheory/pianoroll/k/konami/super-castlevania-4---forest-of-monsters/intro-and-verse_key_cmajor.mid
Sequence too short: 15 data/midi/v16/midi_sources/hooktheory/pianoroll/k/kendrick-lamar/fear/instrumental_key_cmajor.mid
Sequence too short: 22 data/midi/v16/midi_sources/hooktheory/pianoroll/k/kaskade/4am-adam-k-and-soha-mix/intro_key_cmajor.mid
Sequence too short: 16 data/midi/v16/midi_sources/hooktheory/pianoroll/r/rage-against-the-machine/killing-in-the-name-of/intro_key_cmajor.mid
Sequence too short: 22 data/midi/v16/midi_sources/hooktheory/pianoroll/r/rodriguez/sugar-man/intro_key_cmajor.mid
Sequence too short: 23 data/midi/v16/midi_sources/hooktheory/pianoroll/r/rush/yyz/intro_key_cmajor.mid
Se

Sequence too short: 23 data/midi/v16/midi_sources/hooktheory/pianoroll/a/art-of-trance/blue-owl/intro_key_cmajor.mid
Sequence too short: 8 data/midi/v16/midi_sources/hooktheory/pianoroll/a/alan-braxe---fred-falke/rubicon/intro_key_cmajor.mid
Sequence too short: 16 data/midi/v16/midi_sources/hooktheory/pianoroll/a/absynthe-minded/end-of-the-line/intro-and-verse_key_cmajor.mid
Sequence too short: 12 data/midi/v16/midi_sources/hooktheory/pianoroll/a/audien/wayfarer/intro_key_cmajor.mid
Sequence too short: 27 data/midi/v16/midi_sources/hooktheory/pianoroll/a/a-tribe-called-quest/word-play/instrumental_key_cmajor.mid
Sequence too short: 24 data/midi/v16/midi_sources/hooktheory/pianoroll/a/alabama-shakes/gimme-all-your-love/bridge_key_cmajor.mid
Sequence too short: 16 data/midi/v16/midi_sources/hooktheory/pianoroll/a/alabama-shakes/this-feeling/verse_key_cmajor.mid
Sequence too short: 20 data/midi/v16/midi_sources/hooktheory/pianoroll/a/alabama-shakes/sound-and-color/verse_key_cmajor.mid
Seq

In [None]:
arr2csv(processed, out_csv); len(processed)

In [None]:
df = pd.read_csv(out_csv); df.head()

In [19]:
len([s for s in df.midi.values if isinstance(s, str)])

196531

In [20]:
len([s for s in df.numpy.values if isinstance(s, str)])

164774

In [38]:
len([s for s in df.numpy.values if isinstance(s, str)])

112169

In [39]:
Counter(df[df.midi.notnull()].source.values)

Counter({'hooktheory': 19882,
         'freemidi': 5168,
         'midiworld': 4108,
         'ecomp': 2508,
         'cprato': 312,
         'classical_piano': 329,
         'classical_archives': 14642,
         'musescore': 10933,
         'wikifonia': 6346,
         'lmd': 13565,
         'reddit': 98674,
         'hooktheory_c': 20076})

In [22]:
Counter(df[df.numpy.notnull()].source.values)

Counter({'hooktheory': 19404,
         'freemidi': 3974,
         'midiworld': 3460,
         'ecomp': 2508,
         'cprato': 308,
         'classical_piano': 329,
         'classical_archives': 14627,
         'musescore': 10891,
         'wikifonia': 6195,
         'lmd': 9175,
         'reddit': 74341,
         'hooktheory_c': 19562})