# Initial Keras model

Simple keras model to make sure that I know how everything can fit together.

I threw together the `separateVoices` code and the preprocessing code (changed to match `separateVoices` naming convention) and ran it through a keras model... not any keras model we care about though.

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(123)

from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM

import music21 as m21

Using Theano backend.


In [2]:
song = m21.converter.parse('data/bach-chorales/bach-1.mid')

In [3]:
"""separateVoices may put the same note in two different voices...
(This is probably not a problem: these Bach chorals don't appear to have rests)

when getElementsByOffset is called in a situation like:
1   2   3   4
XXX XXX
YYY YYY
    ZZZ
WWW WWW

Note W will be added to both the Tenor and the Bass part...
"""

def separateVoices(song):
    timeSig = song.recurse().getElementsByClass('TimeSignature')[0]
    tempo = song.recurse().getElementsByClass('MetronomeMark')[0]

    sepVoices = m21.stream.Score()

    parts = []
    for voice in ['Soprano','Alto','Tenor','Bass']:
        part = m21.stream.Part()
        part.partName = voice
        part.append(timeSig)
        part.append(tempo)
        #part.append(m21.clef.TrebleClef() #just for readability
        parts.append(part)

    sepVoices.append(parts)

    # takes Chord, returns list of component Notes
    def chord2notes(chord):
        return [m21.note.Note(pitch, duration=chord.duration) for pitch in chord.pitches]

    # Replace chords with notes
    def removeChords(stream):
        import copy
        newStream = copy.deepcopy(stream)
        for noteOrChord in newStream.flat.notes:
            if noteOrChord.isChord:
                offset = noteOrChord.offset
                newStream.remove(noteOrChord, recurse=True)   

                for note in chord2notes(noteOrChord):
                    newStream.insert(offset,note)
        return newStream

    #Remove all chords
    chordless = removeChords(song)

    # At each note, identify and separate the voices
    for note in chordless.flat.notes:
#     for note in chordless.flat.getElementsByOffset(0,11, classList=['Note']):
        simulNotes = list(chordless.flat.getElementsByOffset(note.offset, mustBeginInSpan=False, classList=['Note']))
        simulNotes.sort(key=lambda x: x.pitch, reverse=True)
        

        for part, newnote in zip(parts, simulNotes):                
            if newnote.id not in [n.id for n in part.flat.notes] and note==newnote:
                part.insert(newnote.offset,newnote)
                
    return sepVoices

In [4]:
# A0 is 21 and C8 is 108
HOLD_NOTE = 20
SILENCE = 19


# Converts note or chord to a midi pitch (int)
def pitch(note):
    try:
        #... Should probably change this part
        return note.pitches[0].midi # If it's a chord, get top note
    except AttributeError:
        return note.pitch.midi # if it's not a chord


# music21.Stream -> [int...]
# Converts a 'voice' (with no chords) to series of pitches
def makePitchArray(voice):
    # Divisions are in quarter notes... we may want to change this
    array = []
    current_note = 0
    for time in range(0, int(voice.duration.quarterLength)):
        # Figure out current note
        notes = voice.flat.getElementsByOffset(time, mustBeginInSpan=False, classList=['Note'])
        if len(notes) == 0:
            array.append(SILENCE)
        else:
            note = notes[0]
            if note.offset > time-1:
                # The note started in the past quarternote
                array.append(note.pitch.midi)
            else:
                array.append(HOLD_NOTE)
    return array

# Converts pitch number to a tuple with "one hot encoding"
LOWEST_PITCH=SILENCE
HIGHEST_PITCH=108 # C8
def pitchToTuple(pitch):
    list = [0] * (HIGHEST_PITCH-LOWEST_PITCH)
    list[pitch-LOWEST_PITCH] = 1
    return tuple(list)

def tupleToPitch(tuple):
    index = max(enumerate(tuple), key=lambda x: x[1])[0]
    return LOWEST_PITCH + index

def pitchToStream(pitch_array):
    stream = m21.stream.Stream()
    for pitch in pitch_array:
        if pitch == HOLD_NOTE:
            stream[-1].quarterLength += 1
        elif pitch != SILENCE:
            stream.append(m21.note.Note(pitch))
    return stream

In [5]:
voices = separateVoices(song)

In [6]:
def preprocess(song):
    voices = separateVoices(song)
    tuple_arrays = []
    for voice in voices:
        pitch_array = makePitchArray(voice)
        tuple_arrays.append(list(map(pitchToTuple, pitch_array)))
    return tuple_arrays

preprocessed = preprocess(song)

## Simple Keras model 
A naively stupid deep learning model to predict the next note.

Most of the keras code comes straight off of the "Getting Started" guide on keras.io.

In [7]:
PITCH_LEN = len(pitchToTuple(SILENCE))

model = Sequential()
model.add(Dense(output_dim=PITCH_LEN, input_dim=PITCH_LEN))
model.add(Activation('relu'))
model.add(Dense(output_dim=PITCH_LEN))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])

# Train to match the next note
data = np.array(preprocessed[0][:-1])
labels = np.array(preprocessed[0][1:])
model.fit(data, labels, nb_epoch=10, batch_size=32)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x11396aac8>

In [22]:
# Using the stupid keras model to predict  the next pitch given G2#
# It should predict SILENCE
tupleToPitch(model.predict(np.array([pitchToTuple(31)]))[0])

20

In [19]:
tupleToPitch(model.predict(np.array([pitchToTuple(20)]))[0])

20

# A slightly less naive and foolish keras model

(Not working yet)


In [66]:
from keras.layers.recurrent import LSTM, GRU
from keras.layers.embeddings import Embedding
from keras.preprocessing.sequence import pad_sequences
from keras.layers.wrappers import TimeDistributed
import glob
from tqdm import tqdm

In [67]:
PITCH_LEN = len(pitchToTuple(SILENCE))
ARBITRARY_DIMENSIONS=12
MIN_SONG_LENGTH=200 #TODO figure out/look up

model = Sequential([
        Embedding(PITCH_LEN, ARBITRARY_DIMENSIONS, input_length=MIN_SONG_LENGTH),
        LSTM(ARBITRARY_DIMENSIONS, return_sequences=True),
        # Dropout(0.5),
        TimeDistributed(Dense(89)),
        Activation('sigmoid')
        ])
# Binary crossentropy?
model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])



In [None]:

def loadSongs():
    print('Listing files')
    files = glob.glob('data/bach-chorales/*.mid')
    print('Loading files')
    songs = [m21.converter.parse(file) for file in tqdm(files)]
    print('Calculating minimum song length')
    min_length = min([song.duration.quarterLength for song in tqdm(songs)])
    print('Separating voices')
    voices_by_song = [separateVoices(song) for song in tqdm(songs)]
    print('Getting soprano part (input data)')
    soprano_pitches = [makePitchArray(voices[0])[:min_length] for voices in voices_by_song]
    print('Getting bass part (output labels)')
    bass_tuples = [ list(map(pitchToTuple, makePitchArray(voices[3])))[:min_length] for voices in voices_by_song]
    
    return (soprano_pitches, bass_tuples, min_length)

loadedData = loadSongs()

  0%|          | 4/5000 [00:00<02:19, 35.70it/s]

Listing files
Loading files


 15%|█▍        | 733/5000 [01:38<36:19,  1.96it/s]

In [None]:
data = loadedData[0]
labels = loadedData[1]
min_song_length=loadedData[2]

model.fit(data, labels, nb_epoch=10, batch_size=32)

In [47]:
# def predict_harmony(melody_pitches_so_far):
#     return tupleToPitch([model.predict([melody_pitches_so_far])])


# list(map(predict_harmony, data))

[tupleToPitch(n) for n in model.predict(data)]

[20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
 20,
