In [1]:
import os
import re
import numpy as np

from mido import MidiFile
from multiprocessing import pool

In [2]:
midi_dir = "./midi"
save_dir = "./data"

In [3]:
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)

In [4]:
files = [os.path.join(midi_dir, file) for file in os.listdir(midi_dir) if file.endswith(".mid")]

In [5]:
for file in files:
    mid = MidiFile(file)
    
    for i, track in enumerate(mid.tracks):
        
        note_on = False
        notes = dict(pitch=[], length=[])
        
        for msg in track:
            if "note_on" in str(msg):
                
                match = re.search('note=(\d+)', str(msg))
                pitch = int(match.group(1))
                
                if not note_on:
                    notes['pitch'].append(pitch)
                    note_on = True
                
                if note_on and pitch < notes['pitch'][-1]:
                    notes['pitch'][-1] = pitch
                
            elif "note_off" in str(msg):
                if note_on:
                    
                    # Append note length as a multiple of 
                    # a 16th note (corresponding to 240)
                    
                    match = re.search('time=(\d+)', str(msg))
                    notes['length'].append(int(match.group(1))//240)
                    note_on = False
        
        song = np.stack(notes.values()).T
        basename = os.path.splitext(os.path.basename(file))[0]
        filename = basename + " - {}".format(i)
        
        np.save(os.path.join(save_dir, filename), arr=song)