<a href="https://colab.research.google.com/github/curtiscu/LYIT/blob/master/MIDO_MidiFile_Wrapper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MIDI tools, and file loading using MIDO library

Re-write of MIDO MIDI file work using python objects

# Setup notebook env

In [1]:
# mount google drive

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# test ..
! ls -al '/content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/eval_session/'

Mounted at /content/drive
total 35
-rw------- 1 root root 2589 Apr 27 12:01 10_soul-groove10_102_beat_4-4.mid
-rw------- 1 root root 4793 Apr 27 12:01 1_funk-groove1_138_beat_4-4.mid
-rw------- 1 root root 3243 Apr 27 12:01 2_funk-groove2_105_beat_4-4.mid
-rw------- 1 root root 4466 Apr 27 12:01 3_soul-groove3_86_beat_4-4.mid
-rw------- 1 root root 2551 Apr 27 12:01 4_soul-groove4_80_beat_4-4.mid
-rw------- 1 root root 3798 Apr 27 12:01 5_funk-groove5_84_beat_4-4.mid
-rw------- 1 root root 3760 Apr 27 12:01 6_hiphop-groove6_87_beat_4-4.mid
-rw------- 1 root root 1894 Apr 27 12:01 7_pop-groove7_138_beat_4-4.mid
-rw------- 1 root root 2437 Apr 27 12:01 8_rock-groove8_65_beat_4-4.mid
-rw------- 1 root root 3448 Apr 27 12:01 9_soul-groove9_105_beat_4-4.mid


In [2]:
# install required libs
!pip install mido




In [0]:

# print all cell output
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# imports
import pandas as pd
import mido 
from mido import MidiFile


# Some MIDI tools

## MIDI note mapping to percussion instruments

In [4]:

class MidiTools:
  '''
  Convert to/ from MIDI notes to percussion instrumentz
  As per http://www.midi.org/techspecs/gm1sound.php
  '''

  note2Instrument = { 35: "Acoustic Bass Drum",
                36: "Bass Drum 1",
                37: "Side Stick", 
                38: "Acoustic Snare",
                39: "Hand Clap",
                40: "Electric Snare",
                41: "Low Floor Tom",
                42: "Closed Hi Hat",
                43: "High Floor Tom",
                44: "Pedal Hi-Hat",
                45: "Low Tom",
                46: "Open Hi-Hat",
                47: "Low-Mid Tom",
                48: "Hi-Mid Tom",
                49: "Crash Cymbal 1",
                50: "High Tom",
                51: "Ride Cymbal 1",
                52: "Chinese Cymbal",
                53: "Ride Bell",
                54: "Tambourine",
                55: "Splash Cymbal",
                56: "Cowbell",
                57: "Crash Cymbal 2",
                58: "Vibraslap",
                59: "Ride Cymbal 2",
                60: "Hi Bongo",
                61: "Low Bongo",
                62: "Mute Hi Conga",
                63: "Open Hi Conga",
                64: "Low Conga",
                65: "High Timbale",
                66: "Low Timbale",
                67: "High Agogo",
                68: "Low Agogo",
                69: "Cabasa",
                70: "Maracas",
                71: "Short Whistle",
                72: "Long Whistle",
                73: "Short Guiro",
                74: "Long Guiro",
                75: "Claves",
                76: "Hi Wood Block",
                77: "Low Wood Block",
                78: "Mute Cuica",
                79: "Open Cuica",
                80: "Mute Triangle",
                81: "Open Triangle" }
  
  def mapInstrument(midi_note):
    '''
    Takes MIDI note number, returns None if not found, otherwise 
    returns a string name of the percussion instrument
    '''
    answer = None
    if midi_note in MidiTools.note2Instrument:
      answer = MidiTools.note2Instrument[midi_note]

    return answer

  def getInstruments(instrument_list):
    '''
    Takes a list of MIDI numeric notes, returns a list
    of string names of instruments played on this track
    '''
    # NOTE: concise notation copied from https://stackoverflow.com/a/38702484
    return [*map(MidiTools.mapInstrument, instrument_list)]
    

# take a look at what's we've done so far..
dir(MidiTools)
print('')
display(MidiTools.note2Instrument)


['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'getInstruments',
 'mapInstrument',
 'note2Instrument']




{35: 'Acoustic Bass Drum',
 36: 'Bass Drum 1',
 37: 'Side Stick',
 38: 'Acoustic Snare',
 39: 'Hand Clap',
 40: 'Electric Snare',
 41: 'Low Floor Tom',
 42: 'Closed Hi Hat',
 43: 'High Floor Tom',
 44: 'Pedal Hi-Hat',
 45: 'Low Tom',
 46: 'Open Hi-Hat',
 47: 'Low-Mid Tom',
 48: 'Hi-Mid Tom',
 49: 'Crash Cymbal 1',
 50: 'High Tom',
 51: 'Ride Cymbal 1',
 52: 'Chinese Cymbal',
 53: 'Ride Bell',
 54: 'Tambourine',
 55: 'Splash Cymbal',
 56: 'Cowbell',
 57: 'Crash Cymbal 2',
 58: 'Vibraslap',
 59: 'Ride Cymbal 2',
 60: 'Hi Bongo',
 61: 'Low Bongo',
 62: 'Mute Hi Conga',
 63: 'Open Hi Conga',
 64: 'Low Conga',
 65: 'High Timbale',
 66: 'Low Timbale',
 67: 'High Agogo',
 68: 'Low Agogo',
 69: 'Cabasa',
 70: 'Maracas',
 71: 'Short Whistle',
 72: 'Long Whistle',
 73: 'Short Guiro',
 74: 'Long Guiro',
 75: 'Claves',
 76: 'Hi Wood Block',
 77: 'Low Wood Block',
 78: 'Mute Cuica',
 79: 'Open Cuica',
 80: 'Mute Triangle',
 81: 'Open Triangle'}

In [5]:
# testing method
note = 52.0
print('Testing, check perc table for {}: {}'. format(note, MidiTools.mapInstrument(note)))

Testing, check perc table for 52.0: Chinese Cymbal


# Python class to wrapper mido.MidiFile objects

Need to clean up code, make it re-usable for parsing multiple MIDI file performance files.



- time signature gathered from MIDI message within file, meta 'time_signature'
- typically 96 - 480 ticks per beat, from 'mido.MidiFile.ticks_per_beat' attribute
- tempo in microseconds per beat, gathered from 'set_tempo' MIDI message in file, converted using mido.tempo2bpm/ bpm2tempo, e.g. 434783 is 138 bpm
- conversion from ticks to seconds -> e.g. mido.tick2second(34, 480, 434783)
microseconds per tick = microseconds per quarter note / ticks per quarter note




## Class code: MIDI_File_Wrapper

NOTE: this next cell is the bulk of the work for this notebook, the rest of the cells are mostly testing for it.

In [0]:
class MIDI_File_Wrapper:
  '''
  Utility wrapper for loading, parsding a mido.MidiFile object
  '''

  # column headers for internal data frame 
  # containing MIDI messages loaded from file
  vel_col = 'velocity'
  note_col = 'note'
  type_col = 'msg_type'
  time_col = 'delta_ticks'
  cum_ticks_col = 'total_ticks'
  raw_col = 'raw_data'
  cum_ms_col = 'total_seconds'

  # used for setting order of columns in data model df
  __column_in_order = [type_col, time_col, cum_ticks_col, cum_ms_col, note_col, vel_col, raw_col]

  def __init__(self, file_name):
    self.my_file_name = file_name   # string filename
    self.my_file_midi = None        # mido.MidiFile instance   
    self.my_tempo = None            # stored as mido.Message instance
    self.my_time_sig = None         # stored as mido.Message instance
    self.df_midi_data = None        # DataFrame holding MIDI messages
    self.instruments = None         # list of instruments played in file

    # load file and gather data...
    self.parse_file()


  # For call to str(). Prints readable form 
  def __str__(self): 
    return str('file: {}'.format(self.my_file_midi))
    
  
  def parse_file(self):
    '''
    File must be: MIDI type 0 only;  one and only one tempo and time_sig meta messages in file. 
    '''

    print('__ FILE: {}'.format(self.my_file_name))

    # load file
    midi_file = MidiFile(self.my_file_name)
    self.my_file_midi = midi_file 

    # make sure it's MIDI type 0 (single track) ...
    if midi_file.type != 0:
      raise ValueError('ERROR! Can only process type 0 files, this file is type: {}'.format(midi_file.type))

    print('tracks: {}'.format(midi_file.tracks))

    # another check for single track ...
    if len(midi_file.tracks) != 1:
      raise ValueError('ERROR! Need a single MIDI track, this file has: {}, {}'.format(midi_file.tracks, midi_file))

    # parse messages for time_sig and tempo info ..
    for msg in midi_file:

      if msg.type == 'time_signature':
        print('time sig: {}'.format(msg))

        # make sure no time sig changes
        if self.my_time_sig != None:
          raise ValueError('ERROR! more than one time sig: {}, {}'.format(self.my_time_sig, msg))
      
        self.my_time_sig = msg

      elif msg.type == 'set_tempo':

        print('tempo: {}'.format(msg))

        # make sure no tempo changes
        if self.my_tempo != None:
          raise ValueError('ERROR! more than one tempo: {}, {}'.format(self.my_tempo, msg))
        
        self.my_tempo = msg

    # now check we actually have tempo and time_sig set, or complain...
    if self.my_time_sig is None:
      raise ValueError('ERROR! no time signature found: {}'.format(midi_file))
    if self.my_tempo is None:
      raise ValueError('ERROR! no tempo found: {}'.format(midi_file))

    # load MIDI messages from file into DF
    self.__load_df()

    # quick debug to show instruments in file
    print('my instruments: {}'.format(self.instruments))



  def tempo_us(self):
    ''' Tempo in microseconds'''
    return self.my_tempo.tempo

  def tempo_bpm(self):
    ''' Tempo in bpm'''
    return mido.tempo2bpm(self.tempo_us())

  def ticks(self):
    ''' Returns number of MIDI ticks configured in this file'''
    return self.my_file_midi.ticks_per_beat

  def length(self):
    ''' returns running time in seconds'''
    return self.my_file_midi.length

  def msg_counts(self):
    ''' handy for debug '''
    return self.df_midi_data['msg_type'].value_counts()

  def ts_num(self):
    ''' Time signature numerator (top number)'''
    return self.my_time_sig.numerator

  def ts_denom(self):
    ''' Time signature denominator (bottom number) '''
    return self.my_time_sig.denominator

  def calculate_seconds(self, ticks_since_start):
    ''' 
    Takes elapsed ticks since start of files, returns 
        position in file in absolute seconds'''

    # uses ticks and tempo saved from file loading time..
    return mido.tick2second(ticks_since_start, self.ticks(), self.tempo_us())



  def __row_to_seconds(self, row):
    return self.calculate_seconds(row[self.cum_ticks_col])


  def __load_df(self):
    df_setup = []

    # build df structure from the MIDI file...
    for msg in self.my_file_midi.tracks[0]:
      df_setup.append(
          {
              self.type_col: msg.dict()['type'],
              self.time_col: msg.dict()['time'],
              self.note_col: None if 'note' not in msg.dict() else msg.dict()['note'],
              self.vel_col: None if 'velocity' not in msg.dict() else msg.dict()['velocity'],
              self.raw_col:  str(msg.dict()) # saves whole message in case needed later
          } 
      )

    df_tmp = pd.DataFrame(df_setup)

    # tweak data types, change from 'object' columns to 'string'  ...
    df_tmp[self.type_col] = df_tmp[self.type_col].astype('string')
    df_tmp[self.raw_col] = df_tmp[self.raw_col].astype('string')
    
    # add cumulative tick count column, used to store a running total
    # giving time a message appears in the performance/ MIDI file.
    df_tmp[self.cum_ticks_col] = df_tmp[self.time_col].cumsum()

    # add cumulative milliseconds from start of file
    # NOTE: this timing needs to be recalculated if the tempo
    #         is ever changed!!!
    df_tmp[self.cum_ms_col] = df_tmp.apply(self.__row_to_seconds, axis=1)

    # grab list of instruments used in file
    drum_stuff = df_tmp.note.unique()
    drum_stuff.sort()
    self.instruments = drum_stuff[pd.notnull(drum_stuff)]  # filters NaN 

    # set column order
    df_tmp = df_tmp[MIDI_File_Wrapper.__column_in_order]
  
    # store final df
    self.df_midi_data = df_tmp

    


## Test 'MIDI_File_Wrapper' with single file




In [7]:
# test loading single file..
file_1 = '/content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/eval_session/1_funk-groove1_138_beat_4-4.mid'
mfw = MIDI_File_Wrapper(file_1)

__ FILE: /content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/eval_session/1_funk-groove1_138_beat_4-4.mid
tracks: [<midi track 'MIDI' 1300 messages>]
time sig: <meta message time_signature numerator=4 denominator=4 clocks_per_click=24 notated_32nd_notes_per_beat=8 time=0>
tempo: <meta message set_tempo tempo=434783 time=0>
my instruments: [36. 37. 38. 40. 43. 44. 51. 52. 53. 55.]


In [8]:
# use MidiTools to look at pretty list of instruments
print( MidiTools.getInstruments(mfw.instruments))


['Bass Drum 1', 'Side Stick', 'Acoustic Snare', 'Electric Snare', 'High Floor Tom', 'Pedal Hi-Hat', 'Ride Cymbal 1', 'Chinese Cymbal', 'Ride Bell', 'Splash Cymbal']


In [9]:
# take a look at what we got..
mfw.df_midi_data.info()
mfw.df_midi_data.tail(10)
mfw.df_midi_data
print('file length in seconds: {}'.format(mfw.length()))
print('tempo in ms: {}'.format(mfw.tempo_us()))
print('tempo in bpm: {}'.format(mfw.tempo_bpm()))


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1300 entries, 0 to 1299
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   msg_type       1300 non-null   string 
 1   delta_ticks    1300 non-null   int64  
 2   total_ticks    1300 non-null   int64  
 3   total_seconds  1300 non-null   float64
 4   note           820 non-null    float64
 5   velocity       820 non-null    float64
 6   raw_data       1300 non-null   string 
dtypes: float64(3), int64(2), string(2)
memory usage: 71.2 KB


Unnamed: 0,msg_type,delta_ticks,total_ticks,total_seconds,note,velocity,raw_data
1290,note_on,106,30448,27.579735,38.0,116.0,"{'type': 'note_on', 'time': 106, 'note': 38, '..."
1291,note_on,4,30452,27.583358,51.0,74.0,"{'type': 'note_on', 'time': 4, 'note': 51, 've..."
1292,note_off,107,30559,27.680279,38.0,64.0,"{'type': 'note_off', 'time': 107, 'note': 38, ..."
1293,note_off,4,30563,27.683902,51.0,64.0,"{'type': 'note_off', 'time': 4, 'note': 51, 'v..."
1294,control_change,51,30614,27.730097,,,"{'type': 'control_change', 'time': 51, 'contro..."
1295,control_change,44,30658,27.769953,,,"{'type': 'control_change', 'time': 44, 'contro..."
1296,note_on,0,30658,27.769953,44.0,25.0,"{'type': 'note_on', 'time': 0, 'note': 44, 've..."
1297,control_change,36,30694,27.802561,,,"{'type': 'control_change', 'time': 36, 'contro..."
1298,note_off,77,30771,27.872308,44.0,64.0,"{'type': 'note_off', 'time': 77, 'note': 44, '..."
1299,end_of_track,0,30771,27.872308,,,"{'type': 'end_of_track', 'time': 0}"


Unnamed: 0,msg_type,delta_ticks,total_ticks,total_seconds,note,velocity,raw_data
0,track_name,0,0,0.000000,,,"{'type': 'track_name', 'name': 'MIDI', 'time': 0}"
1,instrument_name,0,0,0.000000,,,"{'type': 'instrument_name', 'name': 'Brooklyn'..."
2,time_signature,0,0,0.000000,,,"{'type': 'time_signature', 'numerator': 4, 'de..."
3,key_signature,0,0,0.000000,,,"{'type': 'key_signature', 'key': 'C', 'time': 0}"
4,smpte_offset,0,0,0.000000,,,"{'type': 'smpte_offset', 'frame_rate': 24, 'ho..."
...,...,...,...,...,...,...,...
1295,control_change,44,30658,27.769953,,,"{'type': 'control_change', 'time': 44, 'contro..."
1296,note_on,0,30658,27.769953,44.0,25.0,"{'type': 'note_on', 'time': 0, 'note': 44, 've..."
1297,control_change,36,30694,27.802561,,,"{'type': 'control_change', 'time': 36, 'contro..."
1298,note_off,77,30771,27.872308,44.0,64.0,"{'type': 'note_off', 'time': 77, 'note': 44, '..."


file length in seconds: 27.872307693749978
tempo in ms: 434783
tempo in bpm: 137.99987580011177


## Test 'MIDI_File_Wrapper' by loading lots of files



## Build file list

In [10]:
# testing parsing sub directory..

# reminder: /content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/eval_session/

import os

for dirpath, dirs, files in os.walk('/content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/'):
  for f in files:
    print('in subdir: {}, first file: {}'.format(dirpath, f)) 
    break

in subdir: /content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/session2, first file: 4_jazz_120_beat_3-4.mid
in subdir: /content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/session3, first file: 6_dance-disco_120_beat_4-4.mid
in subdir: /content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/eval_session, first file: 1_funk-groove1_138_beat_4-4.mid
in subdir: /content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/session1, first file: 123_funk_95_fill_4-4.mid


In [11]:
import glob

root_dir = '/content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/eval_session/'
#root_dir = '/content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/session1/'

# recursively find all MIDI files..
my_files = []
for filename in glob.iglob(root_dir + '**/*.mid', recursive=True):
  my_files.append(filename)

# show what I found
my_files
print()
print('total # files: {}'.format(len(my_files)))


['/content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/eval_session/1_funk-groove1_138_beat_4-4.mid',
 '/content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/eval_session/2_funk-groove2_105_beat_4-4.mid',
 '/content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/eval_session/10_soul-groove10_102_beat_4-4.mid',
 '/content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/eval_session/4_soul-groove4_80_beat_4-4.mid',
 '/content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/eval_session/6_hiphop-groove6_87_beat_4-4.mid',
 '/content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/eval_session/8_rock-groove8_65_beat_4-4.mid',
 '/content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/eval_session/3_soul-groove3_86_beat_4-4.mid',
 '/content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/eval_session/9_soul-groove9_105_beat_4-4.mid',
 '/content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/eval_session/5_funk-groove5_84_beat_4-4


total # files: 10


## Bulk object creation


On creating a MIDI_File_Wrapper instance, a bunch of testing is performed, so the following is a good test to check a whole bunch of files at once to confirm bulk loading works, and to verify the files themselves meet criteria checked during loading..

In [12]:
my_midi_files = []

for f in my_files:
  next_midi = MIDI_File_Wrapper(f)
  #print(next_midi.df_midi_data.info())
  my_midi_files.append(next_midi)
  
print('')
print('TOTAL NUM OBJECTS CREATED: {}'.format(len(my_midi_files)))

__ FILE: /content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/eval_session/1_funk-groove1_138_beat_4-4.mid
tracks: [<midi track 'MIDI' 1300 messages>]
time sig: <meta message time_signature numerator=4 denominator=4 clocks_per_click=24 notated_32nd_notes_per_beat=8 time=0>
tempo: <meta message set_tempo tempo=434783 time=0>
my instruments: [36. 37. 38. 40. 43. 44. 51. 52. 53. 55.]
__ FILE: /content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/eval_session/2_funk-groove2_105_beat_4-4.mid
tracks: [<midi track 'MIDI' 829 messages>]
time sig: <meta message time_signature numerator=4 denominator=4 clocks_per_click=24 notated_32nd_notes_per_beat=8 time=0>
tempo: <meta message set_tempo tempo=571429 time=0>
my instruments: [22. 26. 36. 37. 38. 40. 42. 44. 52.]
__ FILE: /content/drive/My Drive/groove-v1.0.0-midionly/groove/drummer1/eval_session/10_soul-groove10_102_beat_4-4.mid
tracks: [<midi track 'MIDI' 639 messages>]
time sig: <meta message time_signature numerator=4 deno