In [1]:
import music21 as ms
import numpy as np
import glob
import pickle
from pprint import pprint

In [2]:
scores = []
for i, file in enumerate(glob.glob("data/*.mid")):
    score = ms.converter.parse(file)
    scores.append(score)
    print(i, file, len(score.parts))

0 data/haydn_43_1.mid 2
1 data/ty_august.mid 2
2 data/muss_2.mid 3
3 data/waldstein_1.mid 2
4 data/schumm-1.mid 2
5 data/fruehlingsrauschen_format0.mid 1
6 data/chpn_op23.mid 2
7 data/chpn-p19.mid 2
8 data/chpn_op7_2.mid 2
9 data/beethoven_opus90_2.mid 2
10 data/gra_esp_3_format0.mid 1
11 data/chpn-p18.mid 2
12 data/chpn-p24.mid 2
13 data/mendel_op19_1.mid 3
14 data/alb_esp6_format0.mid 1
15 data/grieg_kobold.mid 2
16 data/muss_3.mid 2
17 data/haydn_43_2.mid 2
18 data/muss_1.mid 2
19 data/burg_sylphen.mid 2
20 data/clementi_opus36_5_2_format0.mid 1
21 data/rac_op23_2_format0.mid 1
22 data/waldstein_2.mid 2
23 data/schumm-2.mid 2
24 data/burg_quelle.mid 2
25 data/mendel_op19_3.mid 2
26 data/schub_d960_4.mid 2
27 data/chpn_op7_1.mid 2
28 data/schum_abegg.mid 2
29 data/beethoven_opus90_1.mid 2
30 data/clementi_opus36_1_2_format0.mid 1
31 data/mendel_op19_2.mid 2
32 data/schumm-3.mid 2
33 data/waldstein_3.mid 3
34 data/haydn_8_4.mid 2
35 data/rac_op32_13_format0.mid 1
36 data/haydn_43_3.mi

296 data/chpn_op27_2.mid 2
297 data/mz_311_1.mid 2
298 data/beethoven_hammerklavier_1.mid 2
299 data/liz_et5.mid 2
300 data/scn16_1.mid 2
301 data/chpn-p1.mid 2
302 data/chpn-p17.mid 2
303 data/bor_ps6.mid 2
304 data/bor_ps4.mid 2
305 data/alb_se7_format0.mid 1
306 data/liz_rhap02.mid 2
307 data/beethoven_opus22_4.mid 2
308 data/debussy_cc_2.mid 2
309 data/chpn-p15.mid 2
310 data/chpn-p3.mid 2
311 data/haydn_35_2.mid 2
312 data/scn16_3.mid 2
313 data/chpn_op27_1.mid 2
314 data/mz_311_2.mid 3
315 data/beethoven_hammerklavier_3.mid 2
316 data/liz_et6.mid 4
317 data/mz_311_3.mid 2
318 data/burg_trennung.mid 2
319 data/beethoven_hammerklavier_2.mid 2
320 data/scn16_2.mid 2
321 data/haydn_35_3.mid 2
322 data/chpn-p2.mid 2
323 data/grieg_elfentanz.mid 2
324 data/ravel_miroirs_1_format0.mid 1
325 data/chpn-p14.mid 2
326 data/debussy_cc_3.mid 2
327 data/gra_esp_4_format0.mid 1
328 data/alb_esp1_format0.mid 1
329 data/br_im2.mid 2
330 data/clementi_opus36_4_2_format0.mid 1
331 data/bor_ps5.mid 

In [30]:
# Understanding the data

num_tracks = [0, 0, 0, 0, 0, 0]
for score in scores:
    n = len(score.parts)
    if n > 5:
        n = 5
    num_tracks[n] += 1
print(num_tracks)

[0, 58, 257, 13, 4, 0]


In [101]:
# Understanding the data

deltas = {}
for score in scores:
    for part in score.parts:
        off = 0
        for note in part.flat.notes:
            delta = round(note.offset - off, 2)
            if delta in deltas:
                deltas[delta] += 1
            else:
                deltas[delta] = 1
            off = note.offset

delta_freqs = [(d, deltas[d]) for d in sorted(deltas, key=deltas.get, reverse=True)]
pprint(delta_freqs[:10])

[(0.25, 205725),
 (0.5, 130940),
 (1.0, 50368),
 (0.0, 39742),
 (0.33, 27449),
 (0.17, 22421),
 (0.08, 21941),
 (Fraction(33, 100), 13807),
 (0.75, 11334),
 (2.0, 10588)]


In [50]:
# Trying out options

note_entries = {}
for score in scores:
    for part in score.parts:
        for note in part.flat.notes:
            if isinstance(note, ms.note.Note):
                note = ms.chord.Chord([note.pitch])
            a = note.chordTablesAddress
            entry = (a.cardinality, a.forteClass, a.inversion, a.pcOriginal)
            if entry in note_entries:
                note_entries[entry] += 1
            else:
                note_entries[entry] = 1

note_freqs = [(n, note_entries[n]) for n in sorted(note_entries, key=note_entries.get, reverse=True)]
pprint(note_freqs[:10])

[((1, 1, 0, 7), 44928),
 ((1, 1, 0, 0), 44807),
 ((1, 1, 0, 2), 43359),
 ((1, 1, 0, 5), 39717),
 ((1, 1, 0, 9), 39307),
 ((1, 1, 0, 4), 36805),
 ((1, 1, 0, 10), 35013),
 ((1, 1, 0, 3), 33116),
 ((1, 1, 0, 8), 32585),
 ((1, 1, 0, 11), 31071)]


In [75]:
# Trying out options

ex = ms.chord.Chord('A4 A3 D#5')
addr = ex.chordTablesAddress
ql = ex.quarterLength
off = ex.offset
ot = ex.bass().octave

fn = ms.chord.tables.addressToForteName(addr)
ex2 = ms.chord.fromForteClass(fn)
ex2.quarterLength = ql
ex2.offset = off
ex2.transpose(addr.pcOriginal, inPlace=True)
ex2.closedPosition(forceOctave=ot, inPlace=True)
print(ex.pitches, ex.quarterLength, ex.offset)
print(ex2.pitches, ex2.quarterLength, ex2.offset)
print(addr, fn)

(<music21.pitch.Pitch A4>, <music21.pitch.Pitch A3>, <music21.pitch.Pitch D#5>) 1.0 0.0
(<music21.pitch.Pitch E-3>, <music21.pitch.Pitch A3>) 1.0 0.0
ChordTableAddress(cardinality=2, forteClass=6, inversion=0, pcOriginal=3) 2-6


In [111]:
# Understanding the data

pitch_entries = {}
for score in scores:
    for part in score.parts:
        for note in part.flat.notes:
            pitches = []
            if isinstance(note, ms.note.Note):
                pitches.append(note.pitch.midi)
            else:
                for pitch in note.pitches:
                    pitches.append(pitch.midi)
            for pitch in pitches:
                if pitch in pitch_entries:
                    pitch_entries[pitch] += 1
                else:
                    pitch_entries[pitch] = 1

pitch_freqs = [(p, pitch_entries[p]) for p in sorted(pitch_entries, key=pitch_entries.get, reverse=True)]
print(len(pitch_freqs))
pprint(pitch_freqs[:10])

87
[(62, 26329),
 (67, 25768),
 (60, 25559),
 (72, 23828),
 (74, 23422),
 (65, 23098),
 (64, 22272),
 (69, 22187),
 (55, 21373),
 (58, 20303)]


In [113]:
# Understanding the data

print(min(pitch_freqs))
print(max(pitch_freqs))

(21, 3)
(107, 1)


In [3]:
min_pitch = 21
midi_pitches = 88
steps_per_bar = 12
max_eventless = steps_per_bar
quantize_step = 1/steps_per_bar
initial_step = quantize_step/2
one_hot_size = midi_pitches*2 + max_eventless

def pitchOn(midi):
    return midi - min_pitch

def pitchOff(midi):
    return midi_pitches + midi - min_pitch

def standby(steps):
    return 2*midi_pitches + steps - 1

In [235]:
# Reconstructing midi

off = 0
notes = []
notesOn = {}
err = 0

for one_hot in sequence:
    cells = np.where(one_hot > 0)[0]
    for cell in reversed(cells): # notes off first
        if cell < midi_pitches: # note on
            midi = cell + min_pitch
            notesOn[midi] = off
        elif midi_pitches <= cell < 2*midi_pitches: # note off
            midi = cell - midi_pitches + min_pitch
            if midi in notesOn:
                offset = round(notesOn[midi], 2)
                dur = round(off - offset, 2)
                del notesOn[midi]
                note = ms.note.Note(midi, quarterLength=dur)
                note.offset = offset
                note.storedInstrument = ms.instrument.Piano()
                notes.append(note)
            else:
                err += 1
        else: # no-op
            skip = cell - 2*midi_pitches
            off += skip * quantize_step
    off += quantize_step

for midi in notesOn:
    offset = round(notesOn[midi], 2)
    dur = round(off - offset, 2)
    note = ms.note.Note(midi, quarterLength=dur)
    note.offset = offset
    note.storedInstrument = ms.instrument.Piano()
    notes.append(note)

stream = ms.stream.Stream(notes)
stream.write('midi', fp='test_output.mid')
print(err, len(notes))

1 1532


In [4]:
def getSequence(part):
    dur = part.duration.quarterLength
    notes = part.flat.notes
    n = len(notes)
    i = 0

    # validity check
    non = 0
    noff = 0
    nop = 0
    evl = {}
    evf = 0

    sequence = []
    eventless = 0
    notesOn = {}
    off = initial_step
    while off < dur:
        one_hot = np.zeros(one_hot_size)
        event = False

        for midi in notesOn:
            notesOn[midi] -= quantize_step
        notesOff = [midi for midi, remaining in notesOn.items() if remaining <= 0]
        if notesOff:
            event = True
            for midi in notesOff:
                del notesOn[midi]
                one_hot[pitchOff(midi)] = 1
                noff += 1

        while i < n and notes[i].offset < off:
            note = notes[i]
            event = True
            if isinstance(note, ms.note.Note):
                if note.pitch.midi in notesOn:
                    one_hot[pitchOff(note.pitch.midi)] = 1
                    noff += 1
                notesOn[note.pitch.midi] = note.quarterLength
                one_hot[pitchOn(note.pitch.midi)] = 1
                non += 1
            else:
                for pitch in note.pitches:
                    if pitch.midi in notesOn:
                        one_hot[pitchOff(pitch.midi)] = 1
                        noff += 1
                    notesOn[pitch.midi] = note.quarterLength
                    one_hot[pitchOn(pitch.midi)] = 1
                    non += 1
            i += 1

        if event:
            if eventless > 0:
                standby_one_hot = np.zeros(one_hot_size)
                standby_one_hot[standby(eventless)] = 1
                sequence.append(standby_one_hot)
                if eventless in evl:
                    evl[eventless] += 1
                else:
                    evl[eventless] = 1
                eventless = 0
            evf += 1
            sequence.append(one_hot)
        else:
            nop += 1
            eventless += 1
            if eventless >= max_eventless:
                standby_one_hot = np.zeros(one_hot_size)
                standby_one_hot[standby(max_eventless)] = 1
                sequence.append(standby_one_hot)
                if max_eventless in evl:
                    evl[max_eventless] += 1
                else:
                    evl[max_eventless] = 1
                eventless = 0

        off += quantize_step

    if eventless > 0:
        standby_one_hot = np.zeros(one_hot_size)
        standby_one_hot[standby(eventless)] = 1
        sequence.append(standby_one_hot)
        if eventless in evl:
            evl[eventless] += 1
        else:
            evl[eventless] = 1

    # ending
    one_hot = np.zeros(one_hot_size)
    for midi in notesOn:
        one_hot[pitchOff(midi)] = 1
        noff += 1
    evf += 1
    sequence.append(one_hot)

    assert(len(sequence) == evf + sum(evl.values())) # eventful + eventless = sequence
    assert(non == noff) # total notes on = total notes off
    assert(dur*steps_per_bar + 1 == evf + nop) # eventful dur + eventless dur = total dur + ending
    assert(all(np.count_nonzero(s) > 0 for s in sequence[:-1])) # sequence non-zero except ending
    assert(sum(k*v for k, v in evl.items()) == nop) # total eventless = total no-ops
    
    return sequence

In [5]:
seq_length = 4*steps_per_bar
pad = np.zeros((seq_length, one_hot_size))

nn_input = []
nn_output = []

for s_idx, score in enumerate(scores):
    part = ms.instrument.partitionByInstrument(score)[0]
    try:
        seq = np.array(getSequence(part))
        padded = np.concatenate([pad, seq, pad])
        for i in range(len(padded) - seq_length):
            nn_input.append(padded[i:i+seq_length])
            nn_output.append(padded[i+seq_length])
    except:
        print(s_idx)

input_arr = np.array(nn_input)
output_arr = np.array(nn_output)
print(input_arr.shape, output_arr.shape)

(1100852, 48, 188) (1100852, 188)
