This notebook is to implement the data processing of midi files, including **read from directory, transform files into one hot vectors and cut the notes into fixed length to train.**

In [1]:
from music21 import *
import numpy as np
from fractions import Fraction
import os

## Read from directory

In [2]:
def read_midi(filepath):
    right = []
    left = []
    offset = -1

    midi_data = converter.parse(filepath)

    s2 = instrument.partitionByInstrument(midi_data)

    #Looping over all the instruments
    for part in s2.parts:

        #select elements of only piano
        if 'Piano' in str(part): 
            notes_to_parse = part.recurse() 

            #finding whether a particular element is note or a chord
            for i, element in enumerate(notes_to_parse):

                #note
                if isinstance(element, note.Note):
                    ## velocity processing, delete value too big or too small 
                    velocity = element.volume.velocity
                    if velocity > 95:
                        velocity = 95
                    elif velocity < 20:
                        velocity = 20
                    
                    ## duration processing, delete the ones with less appearance
                    duration = element.duration.quarterLength
                    if duration > 2:
                        duration = 2    
                    if type(duration) is Fraction:
                        if duration >= Fraction(5, 6):
                            duration = round(duration)
                        if duration == Fraction(5, 12):
                            duration = 0.5
                        
                    if element.offset != offset:
                        offset = element.offset
                        right.append((element.pitch.midi, duration, velocity))
                        left.append((0,0,0,0))
                    elif element.offset == offset:
                        left[-1] = (element.pitch.midi, duration, velocity)

                #chord
                elif isinstance(element, chord.Chord):
                    note_in_chord = []
                    for n in element.notes:
                        note_in_chord.append(n.pitch.midi)
                    
                    velocity = element.volume.velocity
                    if velocity > 95:
                        velocity = 95
                    elif velocity < 20:
                        velocity = 20
                        
                    duration = element.duration.quarterLength
                    if duration > 2:
                        duration = 2
                    if duration == 2.25 or duration == 2.75:
                        duration = 2.5
                    if type(duration) is Fraction:
                        if duration >= Fraction(5, 6):
                            duration = round(duration)
                        if duration == Fraction(5, 12):
                            duration = 0.5

                    if element.offset != offset:
                        offset = element.offset
                        right.append((note_in_chord, duration, velocity))
                        left.append((0,0,0,0))
                    elif element.offset == offset:
                        left[-1] = (note_in_chord, duration, velocity)
                    
    return right, left

In [3]:
## Read from directory

path = 'E:/2022Fall/ECE685D/Project/Data/'

right_all = []
left_all = []
composer = []
one_hot = [0, 0, 0]
idx = 0

for folder in os.listdir(path):
    directory = os.path.join(path, folder)
    for file in os.listdir(directory):
        if file.endswith(".mid"):
            print(os.path.join(directory, file))    
            right, left= read_midi(os.path.join(directory, file))
            right_all.append(right)
            left_all.append(left)
            one_hot[idx] = 1
            composer.append(one_hot)
            one_hot = [0, 0, 0]
    idx += 1 

E:/2022Fall/ECE685D/Project/Data/beeth\appass_1.mid
E:/2022Fall/ECE685D/Project/Data/beeth\appass_2.mid
E:/2022Fall/ECE685D/Project/Data/beeth\appass_3.mid
E:/2022Fall/ECE685D/Project/Data/beeth\beethoven_hammerklavier_1.mid
E:/2022Fall/ECE685D/Project/Data/beeth\beethoven_hammerklavier_2.mid
E:/2022Fall/ECE685D/Project/Data/beeth\beethoven_hammerklavier_3.mid
E:/2022Fall/ECE685D/Project/Data/beeth\beethoven_hammerklavier_4.mid
E:/2022Fall/ECE685D/Project/Data/beeth\beethoven_les_adieux_1.mid
E:/2022Fall/ECE685D/Project/Data/beeth\beethoven_les_adieux_2.mid
E:/2022Fall/ECE685D/Project/Data/beeth\beethoven_les_adieux_3.mid
E:/2022Fall/ECE685D/Project/Data/beeth\beethoven_opus10_1.mid
E:/2022Fall/ECE685D/Project/Data/beeth\beethoven_opus10_2.mid
E:/2022Fall/ECE685D/Project/Data/beeth\beethoven_opus10_3.mid
E:/2022Fall/ECE685D/Project/Data/beeth\beethoven_opus22_1.mid
E:/2022Fall/ECE685D/Project/Data/beeth\beethoven_opus22_2.mid
E:/2022Fall/ECE685D/Project/Data/beeth\beethoven_opus22_3.mi

## Find all notes and generate a index dictionary

In [4]:
p_li = []
t_li = []
v_li = []

for i in range(len(right_all)):
    right = right_all[i]
    left = left_all[i]
    for j in range(len(right)):
        r_ele = right[j]
        l_ele = left[j]
        
        if type(r_ele[0]) is list:
            for e in r_ele[0]:
                p_li.append(e)
        else:
            if r_ele[0] != 0:
                p_li.append(r_ele[0])
        
        if type(l_ele[0]) is list:
            for e in l_ele[0]:
                p_li.append(e)
        else:
            if l_ele[0] != 0:
                p_li.append(l_ele[0])
        
        if r_ele[1] != 0:
            t_li.append(r_ele[1])
        if l_ele[1] != 0:
            t_li.append(l_ele[1])
        if r_ele[2] != 0:
            v_li.append(r_ele[2])
        if l_ele[2] != 0:
            v_li.append(l_ele[2])

In [6]:
### Duration vector dictionary 

time_set = list(set(t_li))
time_set.sort()
tdict = {t:i for i,t in enumerate(list(set(time_set)))}
tdict = {0.25: 0,
 0.5: 1,
 0.75: 2,
 1.0: 3,
 1.25: 4,
 1.5: 5,
 1.75: 6,
 2: 7,
 Fraction(1, 6): 8,
 Fraction(2, 3): 9,
 Fraction(1, 3): 10,
 Fraction(1, 12): 11}

In [7]:
### Pitch vector dictionary 

pitch_set = list(set(p_li))
pitch_set.sort()
pdict = {p:i for i,p in enumerate(pitch_set)}
pdict = {24: 0,
 25: 1,
 26: 2,
 27: 3,
 28: 4,
 29: 5,
 30: 6,
 31: 7,
 32: 8,
 33: 9,
 34: 10,
 35: 11,
 36: 12,
 37: 13,
 38: 14,
 39: 15,
 40: 16,
 41: 17,
 42: 18,
 43: 19,
 44: 20,
 45: 21,
 46: 22,
 47: 23,
 48: 24,
 49: 25,
 50: 26,
 51: 27,
 52: 28,
 53: 29,
 54: 30,
 55: 31,
 56: 32,
 57: 33,
 58: 34,
 59: 35,
 60: 36,
 61: 37,
 62: 38,
 63: 39,
 64: 40,
 65: 41,
 66: 42,
 67: 43,
 68: 44,
 69: 45,
 70: 46,
 71: 47,
 72: 48,
 73: 49,
 74: 50,
 75: 51,
 76: 52,
 77: 53,
 78: 54,
 79: 55,
 80: 56,
 81: 57,
 82: 58,
 83: 59,
 84: 60,
 85: 61,
 86: 62,
 87: 63,
 88: 64,
 89: 65,
 90: 66,
 91: 67,
 92: 68,
 93: 69,
 94: 70,
 95: 71,
 96: 72,
 97: 73,
 98: 74,
 99: 75,
 100: 76,
 101: 77}

In [8]:
### Velocity vector dictionary 

vel_set = list(set(v_li))
vel_set.sort()
vdict = {v:i for i,v in enumerate(vel_set)}
vdict = {20: 0,
 21: 1,
 22: 2,
 23: 3,
 24: 4,
 25: 5,
 26: 6,
 27: 7,
 28: 8,
 29: 9,
 30: 10,
 31: 11,
 32: 12,
 33: 13,
 34: 14,
 35: 15,
 36: 16,
 37: 17,
 38: 18,
 39: 19,
 40: 20,
 41: 21,
 42: 22,
 43: 23,
 44: 24,
 45: 25,
 46: 26,
 47: 27,
 48: 28,
 49: 29,
 50: 30,
 51: 31,
 52: 32,
 53: 33,
 54: 34,
 55: 35,
 56: 36,
 57: 37,
 58: 38,
 59: 39,
 60: 40,
 61: 41,
 62: 42,
 63: 43,
 64: 44,
 65: 45,
 66: 46,
 67: 47,
 68: 48,
 69: 49,
 70: 50,
 71: 51,
 72: 52,
 73: 53,
 74: 54,
 75: 55,
 76: 56,
 77: 57,
 78: 58,
 79: 59,
 80: 60,
 81: 61,
 82: 62,
 83: 63,
 84: 64,
 85: 65,
 86: 66,
 87: 67,
 88: 68,
 89: 69,
 90: 70,
 91: 71,
 92: 72,
 93: 73,
 94: 74,
 95: 75}

## Transform into one hot vectors

In [9]:
right_onehot = []
left_onehot = []

for i in range(len(right_all)):
    right = right_all[i]
    left = left_all[i]
    
    right_arr = []
    left_arr = []
    
    for j in range(len(right)):
        r_ele = right[j]
        l_ele = left[j]
        
        # pitch
        prarr = np.zeros(len(pdict))
        plarr = np.zeros(len(pdict))
        if type(r_ele[0]) is list:
            for e in r_ele[0]:
                prarr[pdict[e]] = 1
        else:
            if r_ele[0] != 0:
                prarr[pdict[r_ele[0]]] = 1
            
        if type(l_ele[0]) is list:
            for e in l_ele[0]:
                plarr[pdict[e]] = 1
        else:
            if l_ele[0] != 0:
                plarr[pdict[l_ele[0]]] = 1 
        
        # Duration
        trarr = np.zeros(len(tdict))
        tlarr = np.zeros(len(tdict))
        if r_ele[1] != 0:
            trarr[tdict[r_ele[1]]] = 1
        
        if l_ele[1] != 0:
            tlarr[tdict[l_ele[1]]] = 1
            
        # velocity
        vrarr = np.zeros(len(vdict))
        vlarr = np.zeros(len(vdict))
        if r_ele[2] != 0:
            vrarr[vdict[r_ele[2]]] = 1
        
        if l_ele[2] != 0:
            vlarr[vdict[l_ele[2]]] = 1
        
        rarr = np.concatenate([prarr, trarr, vrarr])
        larr = np.concatenate([plarr, tlarr, vlarr])
        right_arr.append(rarr)
        left_arr.append(larr)
    print('left', len(right_arr))
    print('right', len(left_arr))
    right_onehot.append(right_arr)
    left_onehot.append(left_arr)


left 3800
right 3800
left 874
right 874
left 3946
right 3946
left 3708
right 3708
left 758
right 758
left 2256
right 2256
left 4527
right 4527
left 1754
right 1754
left 427
right 427
left 2499
right 2499
left 1642
right 1642
left 1003
right 1003
left 1419
right 1419
left 3147
right 3147
left 918
right 918
left 1081
right 1081
left 1429
right 1429
left 1335
right 1335
left 2151
right 2151
left 734
right 734
left 824
right 824
left 374
right 374
left 3655
right 3655
left 2924
right 2924
left 542
right 542
left 1529
right 1529
left 4818
right 4818
left 213
right 213
left 3951
right 3951
left 155
right 155
left 145
right 145
left 148
right 148
left 442
right 442
left 473
right 473
left 217
right 217
left 725
right 725
left 707
right 707
left 541
right 541
left 235
right 235
left 614
right 614
left 166
right 166
left 61
right 61
left 344
right 344
left 237
right 237
left 350
right 350
left 813
right 813
left 515
right 515
left 192
right 192
left 222
right 222
left 183
right 183
left 49
righ

## Cut the vector into fixed length with fixed step

In [10]:
length = 50 # length of a music sample
step = 50 # step when generating each sample
## This means generating music samples with 50 notes and move 50 for each generatoin.

onehot_len = len(tdict) + len(vdict) + len(pdict)

right_data = np.empty((0,length,onehot_len))
left_data = np.empty((0,length,onehot_len))
labels = []
for i in range(len(right_onehot)):
    
    right = right_onehot[i]
    left = left_onehot[i]
    
    if len(right) < length:
        continue
    
    idx = 0
    flag = True
    while(flag):
        if idx+length < len(right):
            right_cut = right[idx: idx+length]
            left_cut = left[idx: idx+length]
            idx += step
        else:
            right_cut = right[-1-length:-1]
            left_cut = left[-1-length:-1]
            flag = False
        right_arr = np.expand_dims(np.array(right_cut), axis=0)
        right_data = np.append(right_data, right_arr, axis=0)
        left_arr = np.expand_dims(np.array(left_cut), axis=0)
        left_data = np.append(left_data, left_arr, axis=0)
        labels.append(composer[i])

In [12]:
### Save the data into npy files

np.save('right_data_lowt.npy', right_data)    # .npy extension is added if not given
np.save('left_data_lowt.npy',left_data)
np.save('labels_lowt.npy', np.array(labels))

In [116]:
### Convert the one hot vectors to midi files 

def convert2notes(data, offset,right=True):
    parr = data[:len(pdict)]
    tarr = data[len(pdict):len(pdict)+len(tdict)]
    varr = data[len(pdict)+len(tdict):]
    
    # has a note 
    if ~np.all(parr == 0):
        pid = np.where(parr == 1)[0]
        tid = np.where(tarr == 1)[0][0]
        vid = np.where(varr == 1)[0][0]
    else:
        return False
    
    if len(pid) > 1:
        notes = []
        for current_note in pid:
            cn=int(id2pdict[current_note])
            new_note = note.Note(cn)
            new_note.storedInstrument = instrument.Piano()
            notes.append(new_note)

        result = chord.Chord(notes)
        result.offset = offset
        result.duration.quarterLength = id2tdict[tid]
        result.volume.velocity = id2vdict[vid]
    
    else:
        result = note.Note(id2pdict[pid[0]])
        result.offset = offset
        result.storedInstrument = instrument.Piano()
        result.duration.quarterLength = id2tdict[tid]
        result.volume.velocity = id2vdict[vid]

    return result

In [150]:
offset = 0
id2tdict = {i:t for t,i in tdict.items()}
id2pdict = {i:p for p,i in pdict.items()}
id2vdict = {i:v for v,i in vdict.items()}

right_notes = []
left_notes = []
rdata = right_data[0]
ldata = left_data[0]

for i in range(len(rdata)):
    rnote = convert2notes(rdata[i], offset)
    lnote = convert2notes(ldata[i], offset)
    if rnote:
        offset += rnote.duration.quarterLength
        right_notes.append(rnote)
    if lnote:
        left_notes.append(lnote)

rightpart = stream.Part(right_notes, id='Piano Right')
leftpart = stream.Part(left_notes, id='Piano Left')
midi_stream = stream.Stream([rightpart, leftpart])
midi_stream.write('midi', fp='music.mid')

'music.mid'