In [37]:
import mido
from mido import MidiFile
from itertools import chain
import pandas as pd

In [2]:
file = MidiFile('data/raw/albeniz/alb_esp1.mid')
file

MidiFile(type=1, ticks_per_beat=480, tracks=[
  MidiTrack([
    MetaMessage('track_name', name='Espana Op. 165', time=0),
    MetaMessage('track_name', name='Prelude', time=0),
    MetaMessage('copyright', text='Copyright © 2001 by Bernd Krueger', time=0),
    MetaMessage('text', text='Isaac Albeniz', time=0),
    MetaMessage('text', text='Andantino', time=0),
    MetaMessage('text', text='Fertiggestellt  27.01.2001\n', time=0),
    MetaMessage('text', text='Update: 18.03.2001\n', time=0),
    MetaMessage('text', text='Normierung: 23.12.2002\n', time=0),
    MetaMessage('text', text='Update am 28.10.2010\n', time=0),
    MetaMessage('text', text='Dauer: 1:36 Minuten\n', time=0),
    MetaMessage('smpte_offset', frame_rate=25, hours=32, minutes=0, seconds=3, frames=0, sub_frames=0, time=0),
    MetaMessage('time_signature', numerator=3, denominator=4, clocks_per_click=24, notated_32nd_notes_per_beat=8, time=0),
    MetaMessage('key_signature', key='F', time=0),
    MetaMessage('set_tempo

In [3]:
piano_tracks = [track for track in file.tracks if track.name.lower() in ['piano left', 'piano right']]
[track.name for track in piano_tracks]

['Piano right', 'Piano left']

In [57]:
class TrackMsgConverter:
    def __init__(self, track):
        self.messages = [msg.dict() for msg in track if msg.type in ['note_on', 'note_off']]
    
    def convert(self):
        self._convert_to_absolute_time()
        self._merge_note_ons_and_offs()
        return self.messages

    def _convert_to_absolute_time(self):
        time = 0
        for msg in self.messages:
            time += msg['time']
            msg['time'] = time

    def _merge_note_ons_and_offs(self):
        note_ons = [msg for msg in self.messages if msg['type'] == 'note_on' and msg['velocity'] != 0]
        note_offs = [msg for msg in self.messages if msg['type'] == 'note_off' or msg['velocity'] == 0]
        self.messages = [self.__merge_pair(note_on, note_off) for note_on, note_off in self.__find_pairs(note_ons, note_offs)]
            
    @staticmethod
    def __merge_pair(note_on, note_off):
        return {'note': note_on['note'], 'velocity': note_on['velocity'], 
                'start': note_on['time'], 'length': note_off['time'] - note_on['time']}

    @staticmethod
    def __find_pairs(note_ons, note_offs):
        for note_on in note_ons:
            note_off_index = next(i for i, note_off in enumerate(note_offs) if note_off['note'] == note_on['note'])
            note_off = note_offs.pop(note_off_index)
            yield note_on, note_off
        
            
def convert_and_merge_tracks(tracks):
    tracks = [TrackMsgConverter(track).convert() for track in tracks]
    messages = list(chain(*tracks))
    return sorted(messages, key=lambda m: m['start'])


messages = pd.DataFrame(convert_and_merge_tracks(piano_tracks))
messages

Unnamed: 0,note,velocity,start,length
0,81,60,240,240
1,57,48,240,212
2,64,52,452,1560
3,88,66,480,1560
4,62,42,2012,60
...,...,...,...,...
629,93,40,81600,240
630,97,38,81840,240
631,100,36,82080,1440
632,93,36,82080,1440


In [58]:
messages.length = messages.length // 6
messages.length

0       40
1       35
2      260
3      260
4       10
      ... 
629     40
630     40
631    240
632    240
633    240
Name: length, Length: 634, dtype: int64

In [59]:
messages.length.mean()

43.534700315457414