* First, read data in default format using MIDO (more info on "MessinWithMIDO")

* This program pulls feature information from the default data format

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
pip install mido



In [None]:
import mido
from mido import MidiFile

In [None]:
# This example grabs a single midi file (song)
# Change file path to grab more files

path = '/content/drive/MyDrive/Colab Notebooks/Pattern rec/Music project/maestro-v3.0.0/2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi'
mid = mido.MidiFile(path)
mididict = []
output = []

# Put all note on/off in midinote as dictionary.
for i in mid:
    if i.type == 'note_on' or i.type == 'note_off' or i.type == 'time_signature':
        mididict.append(i.dict())

# change time values from delta to relative time.
mem1=0
for i in mididict:
    time = i['time'] + mem1
    i['time'] = time
    mem1 = i['time']

# May remove this section -- does it lose information about rests?
# make every note_on with 0 velocity note_off
    if i['type'] == 'note_on' and i['velocity'] == 0:
        i['type'] = 'note_off'

# put note, starttime, stoptime, as nested list in a list. # format is [type, note, time, velocity]
    mem2=[]
    if i['type'] == 'note_on' or i['type'] == 'note_off':
        mem2.append(i['type'])
        mem2.append(i['note'])
        mem2.append(i['time'])
        mem2.append(i['velocity'])
        output.append(mem2)


In [None]:
# Viewing the midimessages (a slice 0 to 10)
for i in output[:10]:
    print(i)

['note_on', 71, 0.13333333333333333, 60]
['note_off', 71, 0.134375, 0]
['note_on', 55, 0.14375, 44]
['note_on', 71, 0.15312499999999998, 54]
['note_on', 59, 0.16874999999999998, 55]
['note_off', 55, 0.1958333333333333, 0]
['note_off', 59, 0.28125, 0]
['note_on', 62, 0.2833333333333333, 52]
['note_off', 62, 0.3520833333333333, 0]
['note_on', 72, 0.38541666666666663, 76]


### How many notes are played at a time?

* Do we want a range of how many notes (find the max for the whole song?)

* Or how many notes per unit of time (measure?)

In [None]:
# Find the range of "notes at a time" for the whole song

maxNotes = 0
count = 0
for i in output:
  if i[0] == 'note_on':
    count+=1
  else:
    count-=1
  
  if count > maxNotes:
    maxNotes = count

print(f"Maximum number of notes played at the same time in the whole song is {maxNotes}")

Maximum number of notes played at the same time in the whole song is 6


6 seems small so I'm going to find out what the song is and listen to it to see if that makes sense

In [None]:
import pandas as pd

In [None]:
df3 = pd.read_json("/content/drive/MyDrive/Colab Notebooks/Pattern rec/Music project/maestro-v3.0.0/maestro-v3.0.0.json")

In [None]:
for i in range(df3.shape[0]):
  if df3['midi_filename'][i] == '2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi':
    print(df3['canonical_composer'][i])
    print(df3['canonical_title'][i])

Johann Sebastian Bach
French Suite No. 5 in G Major


I listened to the song and looked at (some of) the sheet music, and 6 notes looks reasonable

In [None]:
# Next, try # of notes per time interval


### Sequences of notes/distance between notes played in a row (interval)
* only look at note_on, calculate distance between note
* only look for certain time units?

In [None]:
# This will find the average "distance" (number of notes) between
# one note and the next
import numpy as np

note = output[0][1]
distanceSum = 0
numNoteOn = 0

for i in output:
  if i[0] == 'note_on':
    numNoteOn += 1
    # print(note)
    # print(i[1])
    distanceSum += np.abs(i[1] - note)
    # print(distanceSum)
    # print()
    note = i[1]

avgDist = distanceSum/numNoteOn
print(f"The average interval between notes played in a sequence is {avgDist}")

The average interval between notes played in a sequence is 13.710919685837345


I used the print lines to double check the number that I got, and based on looking at a couple of chunks of data, it looks like the loop works as intended

Another thing to check may be interval between notes played at the same time

### How quickly notes are played
* difference between time events

* note_on to note_on for any note

In [None]:
# Find the average difference between note_on events

time = output[0][2]
timeDiffSum = 0
numNoteOn = 0

for i in output:
  if i[0] == 'note_on':
    numNoteOn += 1
    timeDiffSum += np.abs(i[2] - time)
    time = i[2]

avgTime = timeDiffSum/numNoteOn

print(f"The average time between notes played in a sequence is {avgTime} time units (?)")

The average time between notes played in a sequence is 0.06786800101342855 time units (?)


Need to double check time units to see if this makes sense

### How long are notes on average
* note_on to note_off for the same note

In [None]:
# Might be better suited for the other format

### Velocity
* Range?

* Average velocity?

In [None]:
# Find the range of velocity

velocities = []

for i in output:
  velocities.append(i[3])

maxVelocity = max(velocities)
print(f"The max velocity is {maxVelocity}")

The max velocity is 96


In [None]:
# Find the average velocity

# I've found this value 3 times now... should really just reuse the value
# I found the first time ...
numNoteOn = 0
velocitySum = 0

for i in output:
  if i[0] == 'note_on':
    numNoteOn += 1
    velocitySum += i[3]

avgVelocity = velocitySum/numNoteOn
print(f"The average velocity for the whole song is {avgVelocity}")

The average velocity for the whole song is 69.02584241195845
