In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import sys
sys.path.insert(0, '../../')
from src.encode_data import *
from src.midi_data import *
# from src.data_sources import process_parallel, transform_csv_row
from src.midi_transform import *
from concurrent.futures import ProcessPoolExecutor
from fastprogress.fastprogress import master_bar, progress_bar

In [19]:
import traceback
import time

In [4]:
from tqdm import tqdm

## Standardize and reformat raw midi files before encoding to text
- Transform key to C major
- Remove unused instruments
- Combine multiple tracks with the same instrument into a single part
- Melody, Piano, String

### Load midi data

In [5]:
version = 'v12'
data_path = Path('data/midi')
version_path = data_path/version

In [6]:
import pandas as pd

In [7]:
source_dir = 'midi_sources'
out_dir = 'midi_encode'
source_csv = version_path/'metadata'/f'{source_dir}.csv'
out_csv = version_path/out_dir/f'{out_dir}.csv'
out_csv.parent.mkdir(parents=True, exist_ok=True)
source_csv, out_csv

(PosixPath('data/midi/v12/metadata/midi_sources.csv'),
 PosixPath('data/midi/v12/midi_encode/midi_encode.csv'))

In [8]:
num_comps = 2 # note, duration
cutoff = 4 # max instruments
max_dur = 128

### Via Data Sources

In [9]:
import concurrent

In [10]:

def process_all(func, arr, total=None, max_workers=None, timeout=None):
    "Process array in parallel"
    if total is None: total = len(arr)
    results = []
    with ProcessPoolExecutor(max_workers=max_workers) as ex:
        futures = [ex.submit(func,o) for i,o in enumerate(arr)]
        for f in progress_bar(concurrent.futures.as_completed(futures, timeout=timeout), total=total):
            res = f.result(timeout=timeout)
            if res is not None:
                results.append(res)
    return results

## Convert mxl to midi

In [11]:
df = pd.read_csv(source_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,song_url,ht_bpm,ht_time_signature,midi,genres,artist,md5,section,ht_key,source,parts,midi_title,title,ht_offset,ht_mode,mxl
0,https://www.hooktheory.com/theorytab/view/wayn...,128.0,4.0,midi_sources/hooktheory/pianoroll/w/wayne-shar...,,wayne-sharpe,bf1f29e5ff84e3e93e37fb873bfb590e,chorus,C,hooktheory,"intro,chorus",yu-gi-oh3,yu-gi-oh-theme-song,0.0,1.0,
1,https://www.hooktheory.com/theorytab/view/wayn...,85.0,3.0,midi_sources/hooktheory/pianoroll/w/wayne-shar...,,wayne-sharpe,055f80ad67f64edb14a85ca8fbfe8c29,intro,C,hooktheory,"intro,chorus",yu-gi-oh,yu-gi-oh-theme-song,0.0,1.0,
2,https://www.hooktheory.com/theorytab/view/what...,96.0,4.0,midi_sources/hooktheory/pianoroll/w/what-a-day...,Jazz,what-a-day,197f96f5d181f6ce1e2c5ab04ac1ff87,chorus,D,hooktheory,chorus,kiefer,kiefer,-5.0,6.0,
3,https://www.hooktheory.com/theorytab/view/whit...,152.0,4.0,midi_sources/hooktheory/pianoroll/w/whiteflame...,"J-Pop,Pop",whiteflame,9e7ce13a35f1314423a9a6d5a5287a4a,pre-chorus,D,hooktheory,"verse,pre-chorus,chorus",senbonzakura - pre-Pre-Chorus,senbonzakura,-5.0,6.0,
4,https://www.hooktheory.com/theorytab/view/whit...,152.0,4.0,midi_sources/hooktheory/pianoroll/w/whiteflame...,"J-Pop,Pop",whiteflame,d5aaf79d0989222f1362f9f46c540a27,verse,D,hooktheory,"verse,pre-chorus,chorus",Senbonzakura,senbonzakura,-5.0,6.0,


In [12]:
all_records = df.to_dict(orient='records')

In [12]:
# all_records = df.loc[df.source.isin(['classical_archives'])].to_dict(orient='records'); len(all_records)

14671

In [14]:
# mxl_records = df.loc[df.source.isin(['ecomp', 'wikifonia', 'classical_piano', 'musescore'])].to_dict(orient='records')



In [13]:
def mxl2midi_func(metadata):
    start = time.time()
    result = metadata.copy()
    
    if not isinstance(metadata.get('mxl'), str): return result
    
    input_path = version_path/metadata['mxl']
    
    out_file = Path(str(metadata['mxl']).replace('midi_sources/', 'midi_sources/from_mxl/')).with_suffix('.mid')
    
    output_path = version_path/out_file
    
#     if not output_path.exists():
# #         return result
#         try:
#             output_path.parent.mkdir(parents=True, exist_ok=True)
#             stream = file2stream(input_path)
#             stream.write('midi', fp=output_path)
            
#             elapsed_time = time.time() - start
#             result['sec_elapsed_mxl2midi'] = elapsed_time
#             if elapsed_time > 60:
#                 print('Super long file:', elapsed_time, input_path)
#         except Exception:
#             print(traceback.format_exc())
#             return None
    result['midi'] = out_file
    return result

In [14]:
# # sanity check
# import random
# for r in random.sample(mxl_records, 500):
#     mxl2midi_func(r)
#     print(r)

In [15]:
# processed = process_all(mxl2midi_func, all_records, timeout=10)

In [14]:
processed = process_all(mxl2midi_func, all_records)

In [15]:
from src.data_sources import arr2csv
arr2csv(processed, out_csv)
df = pd.read_csv(out_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,genres,artist,ht_key,midi_title,ht_mode,mxl,song_url,ht_offset,parts,title,ht_bpm,ht_time_signature,section,midi,source,md5
0,,wayne-sharpe,C,yu-gi-oh3,1.0,,https://www.hooktheory.com/theorytab/view/wayn...,0.0,"intro,chorus",yu-gi-oh-theme-song,128.0,4.0,chorus,midi_sources/hooktheory/pianoroll/w/wayne-shar...,hooktheory,bf1f29e5ff84e3e93e37fb873bfb590e
1,,wayne-sharpe,C,yu-gi-oh,1.0,,https://www.hooktheory.com/theorytab/view/wayn...,0.0,"intro,chorus",yu-gi-oh-theme-song,85.0,3.0,intro,midi_sources/hooktheory/pianoroll/w/wayne-shar...,hooktheory,055f80ad67f64edb14a85ca8fbfe8c29
2,Jazz,what-a-day,D,kiefer,6.0,,https://www.hooktheory.com/theorytab/view/what...,-5.0,chorus,kiefer,96.0,4.0,chorus,midi_sources/hooktheory/pianoroll/w/what-a-day...,hooktheory,197f96f5d181f6ce1e2c5ab04ac1ff87
3,"J-Pop,Pop",whiteflame,D,senbonzakura - pre-Pre-Chorus,6.0,,https://www.hooktheory.com/theorytab/view/whit...,-5.0,"verse,pre-chorus,chorus",senbonzakura,152.0,4.0,pre-chorus,midi_sources/hooktheory/pianoroll/w/whiteflame...,hooktheory,9e7ce13a35f1314423a9a6d5a5287a4a
4,"J-Pop,Pop",whiteflame,D,Senbonzakura,6.0,,https://www.hooktheory.com/theorytab/view/whit...,-5.0,"verse,pre-chorus,chorus",senbonzakura,152.0,4.0,verse,midi_sources/hooktheory/pianoroll/w/whiteflame...,hooktheory,d5aaf79d0989222f1362f9f46c540a27


### Midi formatting

In [11]:
df = pd.read_csv(out_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,genres,artist,ht_key,midi_title,ht_mode,mxl,song_url,ht_offset,parts,title,ht_bpm,ht_time_signature,section,midi,source,md5
0,,wayne-sharpe,C,yu-gi-oh3,1.0,,https://www.hooktheory.com/theorytab/view/wayn...,0.0,"intro,chorus",yu-gi-oh-theme-song,128.0,4.0,chorus,midi_sources/hooktheory/pianoroll/w/wayne-shar...,hooktheory,bf1f29e5ff84e3e93e37fb873bfb590e
1,,wayne-sharpe,C,yu-gi-oh,1.0,,https://www.hooktheory.com/theorytab/view/wayn...,0.0,"intro,chorus",yu-gi-oh-theme-song,85.0,3.0,intro,midi_sources/hooktheory/pianoroll/w/wayne-shar...,hooktheory,055f80ad67f64edb14a85ca8fbfe8c29
2,Jazz,what-a-day,D,kiefer,6.0,,https://www.hooktheory.com/theorytab/view/what...,-5.0,chorus,kiefer,96.0,4.0,chorus,midi_sources/hooktheory/pianoroll/w/what-a-day...,hooktheory,197f96f5d181f6ce1e2c5ab04ac1ff87
3,"J-Pop,Pop",whiteflame,D,senbonzakura - pre-Pre-Chorus,6.0,,https://www.hooktheory.com/theorytab/view/whit...,-5.0,"verse,pre-chorus,chorus",senbonzakura,152.0,4.0,pre-chorus,midi_sources/hooktheory/pianoroll/w/whiteflame...,hooktheory,9e7ce13a35f1314423a9a6d5a5287a4a
4,"J-Pop,Pop",whiteflame,D,Senbonzakura,6.0,,https://www.hooktheory.com/theorytab/view/whit...,-5.0,"verse,pre-chorus,chorus",senbonzakura,152.0,4.0,verse,midi_sources/hooktheory/pianoroll/w/whiteflame...,hooktheory,d5aaf79d0989222f1362f9f46c540a27


In [12]:
all_records = df.to_dict(orient='records'); len(all_records)

197426

In [13]:
mxl_records = df.loc[df.source.isin(['classical_archives', 'hooktheory_c'])].to_dict(orient='records')
# mxl_records = df.loc[df.source.isin(['ecomp', 'wikifonia', 'classical_piano', 'musescore'])].to_dict(orient='records')

In [14]:
def transform_func(metadata):
    result = metadata.copy()
    start = time.time()
    
    # Part 1. Compress tracks/instruments
    if not isinstance(metadata.get('midi'), str): return None
    
    input_path = version_path/metadata['midi']
    extension = input_path.suffix.lower()
    if not input_path.exists(): 
        print('Input path does not exist:', input_path, metadata)
        return None
    
    # Get outfile and check if it exists
    out_file = Path(str(input_path).replace(f'/{source_dir}/', f'/{out_dir}/'))
    out_file = out_file.with_suffix('.npy')
    out_file.parent.mkdir(parents=True, exist_ok=True)
    if out_file.exists(): 
        result['numpy'] = str(out_file.relative_to(version_path))
        return result
    
#     # DEBUG
#     return None
    
    try: input_file = compress_midi_file(input_path, cutoff=cutoff) # remove non note tracks and standardize instruments
    except music21.midi.MidiException as e:
        print('Error parsing midi', input_path, e)
        return None
    if not input_file: return None
        
    # Part 2. Compress rests and long notes
    stream = file2stream(input_file) # 1.
    try:
        chordarr = stream2chordarr(stream, max_dur=max_dur, flat=False) # 2. max_dur = quarter_len * sample_freq (4). 128 = 8 bars
#     chordarr = stream2chordarr(stream, max_dur=max_dur, flat=(extension != '.mid')) # 2. max_dur = quarter_len * sample_freq (4). 128 = 8 bars
    except Exception as e:
        print('Could not encode to chordarr:', input_path, e)
#         print(traceback.format_exc())
        return None
    
    chord_trim = trim_chordarr_rests(chordarr)
    chord_short = shorten_chordarr_rests(chord_trim)
    delta_trim = chord_trim.shape[0] - chord_short.shape[0]
    if delta_trim > 300: 
        print(f'Removed {delta_trim} rests from {input_path}. Skipping song')
        return None
    chordarr = chord_short
    
    # Part 3. Chord array to numpy
    seq = chordarr2seq(chordarr)
    if len(seq) < 32:
        print('Sequence too short:', len(seq), input_path)
        return None
    
#     category = ENC_OFFSET + max_dur + sources.index(metadata['source']) + 1
    category = source2encidx(metadata['source'], max_dur=max_dur)
    npenc = seq2npenc(seq, num_comps=num_comps, category=category)
    if (npenc[1:,1] > max_dur+ENC_OFFSET).any(): 
        print('npenc exceeds max 128 duration:', input_path)
        return None
    
    if ((npenc[...,0] > ENC_OFFSET) & ((npenc[...,0] < 12+ENC_OFFSET) | (npenc[...,0] >= 127-12+ENC_OFFSET))).any(): 
        print('npenc out of note range 12 - 116:', input_path)
        return None
    
    
    np.save(out_file, npenc)
    
    elapsed_time = time.time() - start
    result['sec_elapsed_tfm'] = elapsed_time
    if elapsed_time > 60:
        print('Super long file:', elapsed_time, input_path)
    
    result['numpy'] = str(out_file.relative_to(version_path))
    return result

In [15]:
def try_transform_func(metadata):
    try:
        return transform_func(metadata)
    except Exception:
        print(traceback.format_exc())
        return None

In [None]:
# # sanity check
import random
for r in random.sample(mxl_records, 500):
    transform_func(r)
        

In [17]:
### AS TOD: Fix 'info channel is not channel 0' error

In [21]:
processed = process_all(try_transform_func, mxl_records)

Could not encode to chordarr: data/midi/v12/midi_sources/from_mxl/classical_archives/021/tmbksost.mid index 127 is out of bounds for axis 2 with size 127
npenc out of note range 12 - 116: data/midi/v12/midi_sources/from_mxl/classical_archives/021/tmdbshep.mid


Error parsing midi data/midi/v12/midi_sources/from_mxl/classical_archives/021/bl109_15.mid badly formated midi bytes, got: b''
Could not encode to chordarr: data/midi/v12/midi_sources/from_mxl/classical_archives/021/tmpanwrs.mid index 127 is out of bounds for axis 2 with size 127
Could not encode to chordarr: data/midi/v12/midi_sources/from_mxl/classical_archives/021/tmpchnem.mid index 127 is out of bounds for axis 2 with size 127
Error parsing midi data/midi/v12/midi_sources/from_mxl/classical_archives/021/3_pt_15.mid badly formated midi bytes, got: b''
npenc out of note range 12 - 116: data/midi/v12/midi_sources/from_mxl/classical_archives/021/tmhl4611.mid
Could not encode to chordarr: data/midi/v12/midi_sources/from_mxl/classical_archives/021/tmslvdnc.mid index 127 is out of bounds for axis 2 with size 127
Could not encode to chordarr: data/midi/v12/midi_sources/from_mxl/classical_archives/021/tm10es08.mid index 127 is out of bounds for axis 2 with size 127
Could not encode to chord

Super long file: 150.7940855026245 data/midi/v12/midi_sources/from_mxl/classical_archives/021/rachsona.mid
Super long file: 96.37234497070312 data/midi/v12/midi_sources/from_mxl/classical_archives/8/menop64a.mid
Removed 772 rests from data/midi/v12/midi_sources/from_mxl/classical_archives/8/donjuan.mid. Skipping song
Super long file: 88.70435810089111 data/midi/v12/midi_sources/from_mxl/classical_archives/8/jpgsuite.mid
Super long file: 78.60069489479065 data/midi/v12/midi_sources/from_mxl/classical_archives/8/beet1c1t.mid
Super long file: 66.45050835609436 data/midi/v12/midi_sources/from_mxl/classical_archives/8/be-ps-08.mid
Super long file: 166.61050462722778 data/midi/v12/midi_sources/from_mxl/classical_archives/021/jg591_34.mid
Super long file: 105.88474631309509 data/midi/v12/midi_sources/from_mxl/classical_archives/8/wamk283.mid
Super long file: 179.26136660575867 data/midi/v12/midi_sources/from_mxl/classical_archives/021/rachson.mid
Super long file: 94.24394989013672 data/midi/v

Super long file: 159.05739331245422 data/midi/v12/midi_sources/from_mxl/classical_archives/4/goldsoni.mid
Could not encode to chordarr: data/midi/v12/midi_sources/from_mxl/classical_archives/023/tmsk1304.mid index 127 is out of bounds for axis 2 with size 127
npenc out of note range 12 - 116: data/midi/v12/midi_sources/from_mxl/classical_archives/023/tmra2309.mid
Super long file: 74.08058834075928 data/midi/v12/midi_sources/from_mxl/classical_archives/4/syslmaca.mid
Super long file: 86.24698066711426 data/midi/v12/midi_sources/from_mxl/classical_archives/4/syblsy54.mid
Super long file: 166.9938097000122 data/midi/v12/midi_sources/from_mxl/classical_archives/4/lisztmef.mid
Super long file: 91.61569809913635 data/midi/v12/midi_sources/from_mxl/classical_archives/4/tchaik54.mid
Super long file: 86.80077385902405 data/midi/v12/midi_sources/from_mxl/classical_archives/4/lvbsym74.mid
Super long file: 74.12759709358215 data/midi/v12/midi_sources/from_mxl/classical_archives/4/sc-ps-01.mid
Supe

npenc out of note range 12 - 116: data/midi/v12/midi_sources/from_mxl/classical_archives/024/tmb11910.mid
Super long file: 147.597243309021 data/midi/v12/midi_sources/from_mxl/classical_archives/j/beetho92.mid
Removed 360 rests from data/midi/v12/midi_sources/from_mxl/classical_archives/024/be13_1.mid. Skipping song
Super long file: 64.31943702697754 data/midi/v12/midi_sources/from_mxl/classical_archives/023/sinf2_4.mid
Super long file: 84.58487558364868 data/midi/v12/midi_sources/from_mxl/classical_archives/023/faustact.mid
Could not encode to chordarr: data/midi/v12/midi_sources/from_mxl/classical_archives/024/tmb11909.mid index 127 is out of bounds for axis 2 with size 127
Super long file: 205.72830200195312 data/midi/v12/midi_sources/from_mxl/classical_archives/4/haydlms.mid
Could not encode to chordarr: data/midi/v12/midi_sources/from_mxl/classical_archives/024/tmembro1.mid index 127 is out of bounds for axis 2 with size 127
Could not encode to chordarr: data/midi/v12/midi_sources

Super long file: 165.77092242240906 data/midi/v12/midi_sources/from_mxl/classical_archives/024/vorspi.mid
Super long file: 95.7142584323883 data/midi/v12/midi_sources/from_mxl/classical_archives/022/figale1.mid
Super long file: 87.07804536819458 data/midi/v12/midi_sources/from_mxl/classical_archives/022/st_m123.mid
Could not encode to chordarr: data/midi/v12/midi_sources/from_mxl/classical_archives/0/handel2.mid index 775 is out of bounds for axis 0 with size 775
Removed 704 rests from data/midi/v12/midi_sources/from_mxl/classical_archives/6/eroica-3.mid. Skipping song
Super long file: 65.06366109848022 data/midi/v12/midi_sources/from_mxl/classical_archives/6/dvv2.mid
Super long file: 180.39903378486633 data/midi/v12/midi_sources/from_mxl/classical_archives/024/syswd365.mid
Input path does not exist: data/midi/v12/midi_sources/from_mxl/classical_archives/0/bcpe554b.mid {'genres': 'classical', 'artist': 'bcpe554b', 'ht_key': nan, 'midi_title': nan, 'ht_mode': nan, 'mxl': 'midi_sources/c

Removed 308 rests from data/midi/v12/midi_sources/from_mxl/classical_archives/0/brmsreq1.mid. Skipping song
Removed 712 rests from data/midi/v12/midi_sources/from_mxl/classical_archives/0/brmsreq3.mid. Skipping song
Could not encode to chordarr: data/midi/v12/midi_sources/from_mxl/classical_archives/0/mah7-1dv.mid index 127 is out of bounds for axis 2 with size 127
Super long file: 64.35889554023743 data/midi/v12/midi_sources/from_mxl/classical_archives/0/mhqop87a.mid
Removed 712 rests from data/midi/v12/midi_sources/from_mxl/classical_archives/012/tallspem.mid. Skipping song
Super long file: 79.08790445327759 data/midi/v12/midi_sources/from_mxl/classical_archives/0/tch1811.mid
npenc exceeds max 128 duration: data/midi/v12/midi_sources/from_mxl/classical_archives/0/neptune.mid
Super long file: 74.46997499465942 data/midi/v12/midi_sources/from_mxl/classical_archives/0/moz24cm1.mid
Super long file: 127.63238143920898 data/midi/v12/midi_sources/from_mxl/classical_archives/0/mensy3-1.mid
S

Super long file: 68.25854682922363 data/midi/v12/midi_sources/from_mxl/classical_archives/012/msy5514.mid
Super long file: 94.38025999069214 data/midi/v12/midi_sources/from_mxl/classical_archives/012/suite.mid
Super long file: 75.86701273918152 data/midi/v12/midi_sources/from_mxl/classical_archives/012/stsan4_2.mid
Super long file: 233.25654864311218 data/midi/v12/midi_sources/from_mxl/classical_archives/0/schm_16.mid
Super long file: 79.46306157112122 data/midi/v12/midi_sources/from_mxl/classical_archives/012/k551_4.mid
Sequence too short: 17 data/midi/v12/midi_sources/from_mxl/classical_archives/011/and10.mid
Input path does not exist: data/midi/v12/midi_sources/from_mxl/classical_archives/011/06_scena.mid {'genres': 'classical', 'artist': '06_scena', 'ht_key': nan, 'midi_title': nan, 'ht_mode': nan, 'mxl': 'midi_sources/classical_archives/011/06_scena.mxl', 'song_url': nan, 'ht_offset': nan, 'parts': nan, 'title': '06_scena', 'ht_bpm': nan, 'ht_time_signature': nan, 'section': nan, 

Super long file: 257.2486262321472 data/midi/v12/midi_sources/from_mxl/classical_archives/011/tati_com.mid
Input path does not exist: data/midi/v12/midi_sources/from_mxl/classical_archives/5/beso23-3.mid {'genres': 'classical', 'artist': 'beso23-3', 'ht_key': nan, 'midi_title': nan, 'ht_mode': nan, 'mxl': 'midi_sources/classical_archives/5/beso23-3.mxl', 'song_url': nan, 'ht_offset': nan, 'parts': nan, 'title': 'beso23-3', 'ht_bpm': nan, 'ht_time_signature': nan, 'section': nan, 'midi': 'midi_sources/from_mxl/classical_archives/5/beso23-3.mid', 'source': 'classical_archives', 'md5': '0ad6210e95753e73b8539340e25f680a'}
Super long file: 75.2008593082428 data/midi/v12/midi_sources/from_mxl/classical_archives/013/beethhov.mid
Super long file: 68.25430941581726 data/midi/v12/midi_sources/from_mxl/classical_archives/5/islamey.mid
Super long file: 137.5250904560089 data/midi/v12/midi_sources/from_mxl/classical_archives/013/alassio3.mid
Super long file: 74.05513954162598 data/midi/v12/midi_sou

Super long file: 69.49792838096619 data/midi/v12/midi_sources/from_mxl/classical_archives/3/brhms73a.mid
npenc out of note range 12 - 116: data/midi/v12/midi_sources/from_mxl/classical_archives/014/tmgr62_4.mid
Sequence too short: 13 data/midi/v12/midi_sources/from_mxl/classical_archives/014/cn337_07.mid
Super long file: 110.87957882881165 data/midi/v12/midi_sources/from_mxl/classical_archives/3/gp_intro.mid
Removed 312 rests from data/midi/v12/midi_sources/from_mxl/classical_archives/3/marchsla.mid. Skipping song
Super long file: 210.9466836452484 data/midi/v12/midi_sources/from_mxl/classical_archives/5/hunbatt.mid
Super long file: 184.112957239151 data/midi/v12/midi_sources/from_mxl/classical_archives/3/becvl1.mid
Super long file: 263.6833848953247 data/midi/v12/midi_sources/from_mxl/classical_archives/013/esposizi.mid
Super long file: 85.41096353530884 data/midi/v12/midi_sources/from_mxl/classical_archives/3/finasal.mid
Input path does not exist: data/midi/v12/midi_sources/from_mxl/

Super long file: 81.02022528648376 data/midi/v12/midi_sources/from_mxl/classical_archives/014/__24310_.mid
Super long file: 82.51382279396057 data/midi/v12/midi_sources/from_mxl/classical_archives/014/lizwierd.mid
Super long file: 245.7544093132019 data/midi/v12/midi_sources/from_mxl/classical_archives/3/bsfanctr.mid
Could not encode to chordarr: data/midi/v12/midi_sources/from_mxl/classical_archives/014/tmcl37_1.mid index 127 is out of bounds for axis 2 with size 127
Removed 476 rests from data/midi/v12/midi_sources/from_mxl/classical_archives/7/saturn.mid. Skipping song
Removed 336 rests from data/midi/v12/midi_sources/from_mxl/classical_archives/014/hiawatha.mid. Skipping song
Removed 520 rests from data/midi/v12/midi_sources/from_mxl/classical_archives/7/russludo.mid. Skipping song
Super long file: 82.52671384811401 data/midi/v12/midi_sources/from_mxl/classical_archives/014/le_carna.mid
Super long file: 281.7368257045746 data/midi/v12/midi_sources/from_mxl/classical_archives/3/slav

Super long file: 167.77457332611084 data/midi/v12/midi_sources/from_mxl/classical_archives/7/mthm26c.mid
Super long file: 63.81247591972351 data/midi/v12/midi_sources/from_mxl/classical_archives/031/sonati-2.mid
Super long file: 87.06405830383301 data/midi/v12/midi_sources/from_mxl/classical_archives/031/jk_dbl.mid
Super long file: 157.65871119499207 data/midi/v12/midi_sources/from_mxl/classical_archives/7/gotter.mid
Removed 436 rests from data/midi/v12/midi_sources/from_mxl/classical_archives/9/dansemac.mid. Skipping song
Super long file: 249.0345675945282 data/midi/v12/midi_sources/from_mxl/classical_archives/014/besy31.mid
Removed 1896 rests from data/midi/v12/midi_sources/from_mxl/classical_archives/9/hayq5021.mid. Skipping song
Super long file: 76.20379090309143 data/midi/v12/midi_sources/from_mxl/classical_archives/031/piano_co.mid
Super long file: 62.36291813850403 data/midi/v12/midi_sources/from_mxl/classical_archives/031/iv_alleg.mid
Super long file: 69.73712825775146 data/mid

Super long file: 87.06428456306458 data/midi/v12/midi_sources/from_mxl/classical_archives/9/dberg.mid
Super long file: 204.97495770454407 data/midi/v12/midi_sources/from_mxl/classical_archives/031/beethove.mid
Super long file: 82.62616276741028 data/midi/v12/midi_sources/from_mxl/classical_archives/004/borgia_2.mid
Input path does not exist: data/midi/v12/midi_sources/from_mxl/classical_archives/1/mazrka29.mid {'genres': 'classical', 'artist': 'mazrka29', 'ht_key': nan, 'midi_title': nan, 'ht_mode': nan, 'mxl': 'midi_sources/classical_archives/1/mazrka29.mxl', 'song_url': nan, 'ht_offset': nan, 'parts': nan, 'title': 'mazrka29', 'ht_bpm': nan, 'ht_time_signature': nan, 'section': nan, 'midi': 'midi_sources/from_mxl/classical_archives/1/mazrka29.mid', 'source': 'classical_archives', 'md5': 'd7382cc84cd1ff5e9c3b1518f9bd21c7'}
Removed 720 rests from data/midi/v12/midi_sources/from_mxl/classical_archives/9/triple-1.mid. Skipping song
Super long file: 105.44620966911316 data/midi/v12/midi_s

Super long file: 216.92241525650024 data/midi/v12/midi_sources/from_mxl/classical_archives/1/sarka.mid
Removed 504 rests from data/midi/v12/midi_sources/from_mxl/classical_archives/1/vysehrad.mid. Skipping song
Super long file: 61.16244649887085 data/midi/v12/midi_sources/from_mxl/classical_archives/1/dvqt14m4.mid
Removed 1072 rests from data/midi/v12/midi_sources/from_mxl/classical_archives/1/phamlet.mid. Skipping song
Could not encode to chordarr: data/midi/v12/midi_sources/from_mxl/classical_archives/2/ravmmlo5.mid index 127 is out of bounds for axis 2 with size 127
Input path does not exist: data/midi/v12/midi_sources/from_mxl/classical_archives/2/lisztson.mid {'genres': 'classical', 'artist': 'lisztson', 'ht_key': nan, 'midi_title': nan, 'ht_mode': nan, 'mxl': 'midi_sources/classical_archives/2/lisztson.mxl', 'song_url': nan, 'ht_offset': nan, 'parts': nan, 'title': 'lisztson', 'ht_bpm': nan, 'ht_time_signature': nan, 'section': nan, 'midi': 'midi_sources/from_mxl/classical_archiv

Removed 384 rests from data/midi/v12/midi_sources/from_mxl/classical_archives/2/midsumgm.mid. Skipping song
Could not encode to chordarr: data/midi/v12/midi_sources/from_mxl/classical_archives/2/ravmmlo3.mid index 127 is out of bounds for axis 2 with size 127
Super long file: 83.59933423995972 data/midi/v12/midi_sources/from_mxl/classical_archives/2/rach2-1.mid
Super long file: 179.97132110595703 data/midi/v12/midi_sources/from_mxl/classical_archives/1/betviol1.mid
npenc out of note range 12 - 116: data/midi/v12/midi_sources/from_mxl/classical_archives/2/bwv169_1.mid
Super long file: 69.84988260269165 data/midi/v12/midi_sources/from_mxl/classical_archives/2/mo-ps-13.mid
Removed 428 rests from data/midi/v12/midi_sources/from_mxl/classical_archives/2/verdirm1.mid. Skipping song
Super long file: 121.27730560302734 data/midi/v12/midi_sources/from_mxl/classical_archives/2/ds2pma.mid
Super long file: 178.9587061405182 data/midi/v12/midi_sources/from_mxl/classical_archives/1/gmb5m4.mid
Input 


Traceback (most recent call last):
  File "<ipython-input-15-c95459e3fde9>", line 3, in try_transform_func
    return transform_func(metadata)
  File "<ipython-input-14-8ce624b49e4b>", line 32, in transform_func
    stream = file2stream(input_file) # 1.
  File "../../src/midi_data.py", line 78, in file2stream
    if isinstance(fp, music21.midi.MidiFile): return music21.midi.translate.midiFileToStream(fp)
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/midi/translate.py", line 2115, in midiFileToStream
    **keywords)
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/midi/translate.py", line 1905, in midiTracksToStreams
    p = s.getElementsByClass('Stream')[0]
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/stream/iterator.py", line 328, in __getitem__
    e = fe[k]
IndexError: list index out of range

Sequence too short: 25 data/midi/v12/midi_sources/hooktheory/pianoroll/y/yes/heart-of-the-sunrise/in


Traceback (most recent call last):
  File "<ipython-input-15-c95459e3fde9>", line 3, in try_transform_func
    return transform_func(metadata)
  File "<ipython-input-14-8ce624b49e4b>", line 32, in transform_func
    stream = file2stream(input_file) # 1.
  File "../../src/midi_data.py", line 78, in file2stream
    if isinstance(fp, music21.midi.MidiFile): return music21.midi.translate.midiFileToStream(fp)
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/midi/translate.py", line 2115, in midiFileToStream
    **keywords)
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/midi/translate.py", line 1905, in midiTracksToStreams
    p = s.getElementsByClass('Stream')[0]
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/stream/iterator.py", line 328, in __getitem__
    e = fe[k]
IndexError: list index out of range

Sequence too short: 17 data/midi/v12/midi_sources/hooktheory/pianoroll/f/frankie-valli/the-night/int


Traceback (most recent call last):
  File "<ipython-input-15-c95459e3fde9>", line 3, in try_transform_func
    return transform_func(metadata)
  File "<ipython-input-14-8ce624b49e4b>", line 32, in transform_func
    stream = file2stream(input_file) # 1.
  File "../../src/midi_data.py", line 78, in file2stream
    if isinstance(fp, music21.midi.MidiFile): return music21.midi.translate.midiFileToStream(fp)
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/midi/translate.py", line 2115, in midiFileToStream
    **keywords)
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/midi/translate.py", line 1905, in midiTracksToStreams
    p = s.getElementsByClass('Stream')[0]
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/stream/iterator.py", line 328, in __getitem__
    e = fe[k]
IndexError: list index out of range

Traceback (most recent call last):
  File "<ipython-input-15-c95459e3fde9>", line 3, in try_transfor


Sequence too short: 29 data/midi/v12/midi_sources/hooktheory/pianoroll/n/neru/law-evading-rock/chorus-lead-out_key_cmajor.mid
Traceback (most recent call last):
  File "<ipython-input-15-c95459e3fde9>", line 3, in try_transform_func
    return transform_func(metadata)
  File "<ipython-input-14-8ce624b49e4b>", line 32, in transform_func
    stream = file2stream(input_file) # 1.
  File "../../src/midi_data.py", line 78, in file2stream
    if isinstance(fp, music21.midi.MidiFile): return music21.midi.translate.midiFileToStream(fp)
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/midi/translate.py", line 2115, in midiFileToStream
    **keywords)
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/midi/translate.py", line 1905, in midiTracksToStreams
    p = s.getElementsByClass('Stream')[0]
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/stream/iterator.py", line 328, in __getitem__
    e = fe[k]
IndexError:


Traceback (most recent call last):
  File "<ipython-input-15-c95459e3fde9>", line 3, in try_transform_func
    return transform_func(metadata)
  File "<ipython-input-14-8ce624b49e4b>", line 32, in transform_func
    stream = file2stream(input_file) # 1.
  File "../../src/midi_data.py", line 78, in file2stream
    if isinstance(fp, music21.midi.MidiFile): return music21.midi.translate.midiFileToStream(fp)
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/midi/translate.py", line 2115, in midiFileToStream
    **keywords)
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/midi/translate.py", line 1905, in midiTracksToStreams
    p = s.getElementsByClass('Stream')[0]
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/stream/iterator.py", line 328, in __getitem__
    e = fe[k]
IndexError: list index out of range

Traceback (most recent call last):
  File "<ipython-input-15-c95459e3fde9>", line 3, in try_transfor

Traceback (most recent call last):
  File "<ipython-input-15-c95459e3fde9>", line 3, in try_transform_func
    return transform_func(metadata)
  File "<ipython-input-14-8ce624b49e4b>", line 32, in transform_func
    stream = file2stream(input_file) # 1.
  File "../../src/midi_data.py", line 78, in file2stream
    if isinstance(fp, music21.midi.MidiFile): return music21.midi.translate.midiFileToStream(fp)
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/midi/translate.py", line 2115, in midiFileToStream
    **keywords)
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/midi/translate.py", line 1905, in midiTracksToStreams
    p = s.getElementsByClass('Stream')[0]
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/stream/iterator.py", line 328, in __getitem__
    e = fe[k]
IndexError: list index out of range

Sequence too short: 17 data/midi/v12/midi_sources/hooktheory/pianoroll/t/the-beatles/a-hard-days-nigh


Traceback (most recent call last):
  File "<ipython-input-15-c95459e3fde9>", line 3, in try_transform_func
    return transform_func(metadata)
  File "<ipython-input-14-8ce624b49e4b>", line 32, in transform_func
    stream = file2stream(input_file) # 1.
  File "../../src/midi_data.py", line 78, in file2stream
    if isinstance(fp, music21.midi.MidiFile): return music21.midi.translate.midiFileToStream(fp)
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/midi/translate.py", line 2115, in midiFileToStream
    **keywords)
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/midi/translate.py", line 1905, in midiTracksToStreams
    p = s.getElementsByClass('Stream')[0]
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/stream/iterator.py", line 328, in __getitem__
    e = fe[k]
IndexError: list index out of range

Traceback (most recent call last):
  File "<ipython-input-15-c95459e3fde9>", line 3, in try_transfor


Traceback (most recent call last):
  File "<ipython-input-15-c95459e3fde9>", line 3, in try_transform_func
    return transform_func(metadata)
  File "<ipython-input-14-8ce624b49e4b>", line 32, in transform_func
    stream = file2stream(input_file) # 1.
  File "../../src/midi_data.py", line 78, in file2stream
    if isinstance(fp, music21.midi.MidiFile): return music21.midi.translate.midiFileToStream(fp)
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/midi/translate.py", line 2115, in midiFileToStream
    **keywords)
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/midi/translate.py", line 1905, in midiTracksToStreams
    p = s.getElementsByClass('Stream')[0]
  File "/home/ubuntu/anaconda3/envs/midi/lib/python3.7/site-packages/music21/stream/iterator.py", line 328, in __getitem__
    e = fe[k]
IndexError: list index out of range
Traceback (most recent call last):
  File "<ipython-input-15-c95459e3fde9>", line 3, in try_transform

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Super long file: 148.6588113307953 data/midi/v12/midi_sources/from_mxl/classical_archives/2/ee1132.mid


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Super long file: 102.31447052955627 data/midi/v12/midi_sources/from_mxl/classical_archives/2/strpetr.mid
Super long file: 103.20752620697021 data/midi/v12/midi_sources/from_mxl/classical_archives/2/grifson.mid
Removed 516 rests from data/midi/v12/midi_sources/from_mxl/classical_archives/032/niel_s11.mid. Skipping song
Super long file: 291.60509753227234 data/midi/v12/midi_sources/from_mxl/classical_archives/1/3mephis.mid
Super long file: 92.88618898391724 data/midi/v12/midi_sources/from_mxl/classical_archives/032/humsept1.mid
Super long file: 98.68500685691833 data/midi/v12/midi_sources/from_mxl/classical_archives/2/smetsy3.mid
Super long file: 113.51791644096375 data/midi/v12/midi_sources/from_mxl/classical_archives/2/humm_t6.mid
Super long file: 1191.339098930359 data/midi/v12/midi_sources/from_mxl/classical_archives/011/die_kuns.mid
Super long file: 118.53213882446289 data/midi/v12/midi_sources/from_mxl/classical_archives/2/dj2pian1.mid
Super long file: 188.33428263664246 data/midi/

In [25]:
processed = process_all(try_transform_func, all_records)

In [None]:
from src.data_sources import arr2csv

In [None]:
arr2csv(processed, out_csv); len(processed)

In [None]:
df = pd.read_csv(out_csv)

In [None]:
df.head()