In [2]:
import pretty_midi
import numpy as np
import pandas as pd


In [7]:

midi_data = pretty_midi.PrettyMIDI('songs/darude_sandstorm.mid')
print("duration:",midi_data.get_end_time())
print(f'{"note":>10} {"start":>10} {"end":>10}')

# Instrument codes: http://www.ccarh.org/courses/253/handout/gminstruments/
instruments = midi_data.instruments
print(instruments)

piano1 = instruments[0]

duration: 337.8
      note      start        end
[Instrument(program=122, is_drum=False, name=""), Instrument(program=127, is_drum=False, name=""), Instrument(program=50, is_drum=False, name=""), Instrument(program=87, is_drum=False, name=""), Instrument(program=90, is_drum=False, name=""), Instrument(program=80, is_drum=False, name=""), Instrument(program=109, is_drum=False, name=""), Instrument(program=26, is_drum=False, name=""), Instrument(program=0, is_drum=True, name=""), Instrument(program=87, is_drum=False, name=""), Instrument(program=123, is_drum=False, name="")]


In [25]:
def get_df(instrument):
    attrs = ['start', 'end', 'pitch', 'velocity']
    data = [[getattr(note, a) for a in attrs] for note in instrument.notes]
    instr_df = pd.DataFrame(data, columns=['start','end','note', 'velocity'])
    to_hz = lambda notenum: 2 ** ((notenum - 69)/12) * 440
    instr_df['freq'] = instr_df.note.map(to_hz)
    instr_df['duration'] = instr_df.end - instr_df.start
    return instr_df


In [26]:
instrument_dfs = [get_df(instr) for instr in instruments]
[len(df.note.unique()) for df in instrument_dfs] 

[3, 2, 1, 6, 9, 8, 4, 8, 13, 6, 3]

In [27]:
instrument_dfs[-3]

Unnamed: 0,start,end,note,velocity,freq,duration
0,4.800,5.400,35,127,61.735413,0.600
1,4.800,5.400,49,100,138.591315,0.600
2,4.800,5.400,36,100,65.406391,0.600
3,4.800,5.400,43,100,97.998859,0.600
4,23.400,24.000,35,100,61.735413,0.600
...,...,...,...,...,...,...
1869,330.900,330.975,59,100,246.941651,0.075
1870,330.975,331.050,40,90,82.406889,0.075
1871,331.050,331.125,40,90,82.406889,0.075
1872,331.125,331.200,40,90,82.406889,0.075


In [24]:
import librosa
librosa.midi_to_hz(59)

246.94165062806206

In [8]:
to_hz = lambda notenum: 2 ** ((notenum - 69)/12) * 440
piano_df['freq'] = piano_df.note.map(to_hz)
piano_df['duration'] = piano_df.end - piano_df.start
piano_df

Unnamed: 0,start,end,note,velocity,freq,duration
0,0.00,0.300,62,108,293.664768,0.300
1,0.30,0.450,62,108,293.664768,0.150
2,0.45,0.750,62,108,293.664768,0.300
3,0.75,0.900,62,108,293.664768,0.150
4,0.90,1.200,62,108,293.664768,0.300
...,...,...,...,...,...,...
555,75.30,75.550,76,108,659.255114,0.250
556,75.60,76.000,65,108,349.228231,0.400
557,75.60,76.000,69,108,440.000000,0.400
558,75.60,76.000,74,108,587.329536,0.400


In [7]:
# Max duration for each note -- how long you need to record for
piano_df.groupby('freq').duration.max()

freq
174.614116     0.300
195.997718     0.400
220.000000     0.525
233.081881     0.300
246.941651     0.150
261.625565     0.300
277.182631     0.450
293.664768     1.125
329.627557     0.450
349.228231     0.525
391.995436     0.300
440.000000     0.400
466.163762     0.300
523.251131     0.300
554.365262     0.300
587.329536     0.400
659.255114     0.300
698.456463     0.400
783.990872     0.300
880.000000     0.400
932.327523     0.400
1046.502261    0.250
Name: duration, dtype: float64

In [None]:
# Midi velocity to volume?