The goal of this notebook is to find the average number of monophonic midi windows per midi track to use in calculating an appropriate epoch number.

In [1]:
import sys
sys.path.append('../python')

import os, pdb
import utils, midi_utils
from multiprocessing import Pool as ThreadPool
import numpy as np

Using TensorFlow backend.


In [2]:
def get_data_from_midi(midi):
    X, y = [], []
    for m in midi:
        if m is not None:
            melody_instruments = midi_utils.filter_monophonic(m.instruments, 
                                                              0.95)
            for instrument in melody_instruments:
                if len(instrument.notes) > 20:
                    windows = midi_utils.encode_sliding_window_notes(instrument, 
                                                                window_size=20)
                    for w in windows:
                        X.append(w[0])
                        y.append(w[1])
    return (np.asarray(X), np.asarray(y))

In [3]:
midi_dir = '../../data/query_symlinks'
files = [os.path.join(midi_dir, path) for path in os.listdir(midi_dir)][0:5000]

num_threads = 8
pool = ThreadPool(num_threads)

# process in batches of 250
batch_size = 100
count = 0
for i in range(0, len(files), batch_size):
    print("batch: {}-{}".format(i, i + batch_size))
    parsed = pool.map(midi_utils.parse_midi, files[i: i + batch_size])
    data = get_data_from_midi(parsed)
    count = count + len(data[1])
    print('{} windows per file after {} files'.format(count / (i + batch_size), i + batch_size))
    del parsed
    del data
    
print(count)

batch: 0-100




796 windows per file after 100 files
batch: 100-200




817 windows per file after 200 files
batch: 200-300
818 windows per file after 300 files
batch: 300-400
802 windows per file after 400 files
batch: 400-500




824 windows per file after 500 files
batch: 500-600
837 windows per file after 600 files
batch: 600-700
844 windows per file after 700 files
batch: 700-800
830 windows per file after 800 files
batch: 800-900
827 windows per file after 900 files
batch: 900-1000
837 windows per file after 1000 files
batch: 1000-1100
840 windows per file after 1100 files
batch: 1100-1200
845 windows per file after 1200 files
batch: 1200-1300
837 windows per file after 1300 files
batch: 1300-1400
836 windows per file after 1400 files
batch: 1400-1500
836 windows per file after 1500 files
batch: 1500-1600
833 windows per file after 1600 files
batch: 1600-1700
828 windows per file after 1700 files
batch: 1700-1800
825 windows per file after 1800 files
batch: 1800-1900
825 windows per file after 1900 files
batch: 1900-2000
827 windows per file after 2000 files
batch: 2000-2100
824 windows per file after 2100 files
batch: 2100-2200
829 windows per file after 2200 files
batch: 2200-2300
831 windows per file aft

In [None]:
count