In [8]:
import os
import re
import numpy as np

from mido import MidiFile
from multiprocessing import pool

In [9]:
midi_dir = "./midi"
save_dir = "./data"

In [10]:
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)

In [11]:
files = [os.path.join(midi_dir, file) for file in os.listdir(midi_dir) if file.endswith(".mid")]

In [21]:
files

['./midi/Fallujah - Cerebral Hybridization - Guitar L.mid',
 './midi/Fallujah - The Flame Surreal - Guitar L.mid',
 './midi/Fallujah - Assemblage Of Wolves - Guitar R.mid']

In [22]:
for file in files[:1]:
    mid = MidiFile(file)
    
    for i, track in enumerate(mid.tracks):
        
        note_on = False
        notes = dict(pitch=[], length=[])
        
        for msg in track:
            if "note_on" in str(msg):
                if not note_on:
                    match = re.search('note=(\d+)', str(msg))
                    notes['pitch'].append(int(match.group(1)))
                    note_on = True
                
                if note_on:
                    
                    # If there are multiple note_on it is 
                    # likely a chord. Choose the root note.
                    
                    match = re.search('note=(\d+)', str(msg))
                    pitch = int(match.group(1))
                               
                    if pitch < notes['pitch'][-1]:
                        notes['pitch'][-1] = pitch
                
            elif "note_off" in str(msg):
                if note_on:
                    
                    # Append note length as a multiple of 16th note
                    
                    match = re.search('time=(\d+)', str(msg))
                    notes['length'].append(int(match.group(1))//240)
                    note_on = False
        
        song = np.stack(notes.values()).T
        basename = os.path.splitext(os.path.basename(file))[0]
        filename = basename + " - {}".format(i)
        
        np.save(os.path.join(save_dir, filename), arr=song)