## Homework: Deep Jazz

In [1]:
import numpy as np
import tensorflow as tf

from music21 import stream, midi, tempo, note

from grammar import unparse_grammar
from preprocess import get_musical_data, get_corpus_data

from qa import prune_grammar, prune_notes, clean_up_notes
from generator import __sample, __generate_grammar, __predict

In [2]:
N_epochs = 128  # default
data_fn = 'midi/' + 'original_metheny.mid'  # 'And Then I Knew' by Pat Metheny
out_fn = 'midi/' 'deepjazz_on_metheny...' + str(N_epochs)

In [3]:
max_len = 20
max_tries = 1000
diversity = 0.5

# musical settings
bpm = 130

# get data
chords, abstract_grammars = get_musical_data(data_fn)
corpus, values, val_indices, indices_val = get_corpus_data(abstract_grammars)
print('corpus length:', len(corpus))
print('total # of values:', len(values))

corpus length: 193
total # of values: 78


In [4]:
chords[0]

[<music21.instrument.Piano Piano>,
 <music21.tempo.MetronomeMark Quarter=112.0>,
 <music21.key.Key of G major>,
 <music21.meter.TimeSignature 4/4>]

In [5]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM

def get_keras_model(max_len, N_values):
    # build a 2 stacked LSTM
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape=(max_len, N_values)))
    model.add(Dropout(0.2))
    model.add(LSTM(128, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(N_values))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    return model

Using TensorFlow backend.


## Task

Replace previous model with equivalent in prettytensor or tf.slim

Try to make you code as compact as possible

In [6]:
import tflearn

class TflearnModel:
    def __init__(self, max_len, N_values):
        net = tflearn.input_data([None, max_len, N_values])
        net = tflearn.lstm(net, 128, dropout=0.8, return_seq=True)
        net = tflearn.lstm(net, 128, dropout=0.8)
        net = tflearn.fully_connected(net, N_values, activation='softmax')
        net = tflearn.regression(net, optimizer='RMSProp', loss='categorical_crossentropy', learning_rate=0.1)

        self.model = tflearn.DNN(net)
        
    def fit(self, X, y, batch_size, epochs):
        return self.model.fit(X_inputs=X, Y_targets=y, n_epoch=epochs, batch_size=batch_size)
    
    def predict(self, x, verbose):
        return self.model.predict(X=x)

In [7]:
def get_tflearn_model(max_len, N_values):
    return TflearnModel(max_len, N_values)

In [8]:
# get_model = get_keras_model
get_model = get_tflearn_model

In [9]:
import numpy as np

''' Build a 2-layer LSTM from a training corpus '''

def build_model(corpus, val_indices, max_len, N_epochs=128):
    # number of different values or words in corpus
    N_values = len(set(corpus))

    # cut the corpus into semi-redundant sequences of max_len values
    step = 3
    sentences = []
    next_values = []
    for i in range(0, len(corpus) - max_len, step):
        sentences.append(corpus[i: i + max_len])
        next_values.append(corpus[i + max_len])
    print('nb sequences:', len(sentences))

    # transform data into binary matrices
    X = np.zeros((len(sentences), max_len, N_values), dtype=np.bool)
    y = np.zeros((len(sentences), N_values), dtype=np.bool)
    for i, sentence in enumerate(sentences):
        for t, val in enumerate(sentence):
            X[i, t, val_indices[val]] = 1
        y[i, val_indices[next_values[i]]] = 1

    model = get_model(max_len, N_values)
    model.fit(X, y, batch_size=128, epochs=N_epochs)

    return model

In [10]:
# build model
model = build_model(corpus=corpus, val_indices=val_indices,
                         max_len=max_len, N_epochs=N_epochs)

# set up audio stream
out_stream = stream.Stream()

# generation loop
curr_offset = 0.0
loopEnd = len(chords)
for loopIndex in range(1, loopEnd):
    # get chords from file
    curr_chords = stream.Voice()
    for j in chords[loopIndex]:
        curr_chords.insert((j.offset % 4), j)

    # generate grammar
    curr_grammar = __generate_grammar(model=model, corpus=corpus,
                                      abstract_grammars=abstract_grammars,
                                      values=values, val_indices=val_indices,
                                      indices_val=indices_val,
                                      max_len=max_len, max_tries=max_tries,
                                      diversity=diversity)

    curr_grammar = curr_grammar.replace(' A', ' C').replace(' X', ' C')

    # Pruning #1: smoothing measure
    curr_grammar = prune_grammar(curr_grammar)

    # Get notes from grammar and chords
    curr_notes = unparse_grammar(curr_grammar, curr_chords)

    # Pruning #2: removing repeated and too close together notes
    curr_notes = prune_notes(curr_notes)

    # quality assurance: clean up notes
    curr_notes = clean_up_notes(curr_notes)

    # print # of notes in curr_notes
    print('After pruning: %s notes' % (len([i for i in curr_notes
                                            if isinstance(i, note.Note)])))

    # insert into the output stream
    for m in curr_notes:
        out_stream.insert(curr_offset + m.offset, m)
    for mc in curr_chords:
        out_stream.insert(curr_offset + mc.offset, mc)

    curr_offset += 4.0

out_stream.insert(0.0, tempo.MetronomeMark(number=bpm))

# Play the final stream through output (see 'play' lambda function above)
# play = lambda x: midi.realtime.StreamPlayer(x).play()
# play(out_stream)

# save stream
mf = midi.translate.streamToMidiFile(out_stream)
mf.open(out_fn, 'wb')
mf.write()
mf.close()    

nb sequences: 58
---------------------------------
Run id: B8IMUN
Log directory: /tmp/tflearn_logs/
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'list' object has no attribute 'name'
---------------------------------
Training samples: 58
Validation samples: 0
--
Training Step: 1  | time: 0.768s
| RMSProp | epoch: 001 | loss: 0.00000 -- iter: 58/58
--
Training Step: 2  | total loss: [1m[32m3.92125[0m[0m | time: 0.048s
| RMSProp | epoch: 002 | loss: 3.92125 -- iter: 58/58
--
Training Step: 3  | total loss: [1m[32m4.27520[0m[0m | time: 0.047s
| RMSProp | epoch: 003 | loss: 4.27520 -- iter: 58/58
--
Training Step: 4  | total loss: [1m[32m4.33228[0m[0m | time: 0.047s
| RMSProp | epoch: 004 | loss: 4.33228 -- iter: 58/58
--
Training Step: 5  | total loss: [1m[32m4.34342[0m[0m | time: 0.047s
| RMSProp | epoch: 005 | loss: 4.34342 -- iter: 58/58
--
Training Step: 6  | total loss: [1m[32m4.34423[0m[0m | time: 0.043s
| RMSProp | epoc

Training Step: 62  | total loss: [1m[32m3.66526[0m[0m | time: 0.043s
| RMSProp | epoch: 062 | loss: 3.66526 -- iter: 58/58
--
Training Step: 63  | total loss: [1m[32m3.64854[0m[0m | time: 0.043s
| RMSProp | epoch: 063 | loss: 3.64854 -- iter: 58/58
--
Training Step: 64  | total loss: [1m[32m3.63226[0m[0m | time: 0.043s
| RMSProp | epoch: 064 | loss: 3.63226 -- iter: 58/58
--
Training Step: 65  | total loss: [1m[32m3.62481[0m[0m | time: 0.042s
| RMSProp | epoch: 065 | loss: 3.62481 -- iter: 58/58
--
Training Step: 66  | total loss: [1m[32m3.64950[0m[0m | time: 0.042s
| RMSProp | epoch: 066 | loss: 3.64950 -- iter: 58/58
--
Training Step: 67  | total loss: [1m[32m3.70492[0m[0m | time: 0.043s
| RMSProp | epoch: 067 | loss: 3.70492 -- iter: 58/58
--
Training Step: 68  | total loss: [1m[32m3.72967[0m[0m | time: 0.043s
| RMSProp | epoch: 068 | loss: 3.72967 -- iter: 58/58
--
Training Step: 69  | total loss: [1m[32m3.71974[0m[0m | time: 0.043s
| RMSProp | epoch

Training Step: 125  | total loss: [1m[32m3.60595[0m[0m | time: 0.038s
| RMSProp | epoch: 125 | loss: 3.60595 -- iter: 58/58
--
Training Step: 126  | total loss: [1m[32m3.60090[0m[0m | time: 0.040s
| RMSProp | epoch: 126 | loss: 3.60090 -- iter: 58/58
--
Training Step: 127  | total loss: [1m[32m3.59317[0m[0m | time: 0.040s
| RMSProp | epoch: 127 | loss: 3.59317 -- iter: 58/58
--
Training Step: 128  | total loss: [1m[32m3.58696[0m[0m | time: 0.039s
| RMSProp | epoch: 128 | loss: 3.58696 -- iter: 58/58
--
After pruning: 14 notes
After pruning: 13 notes
After pruning: 12 notes
After pruning: 16 notes
After pruning: 13 notes
After pruning: 13 notes
After pruning: 13 notes
After pruning: 14 notes
After pruning: 11 notes
After pruning: 15 notes
After pruning: 12 notes
After pruning: 16 notes
After pruning: 14 notes
After pruning: 14 notes
After pruning: 13 notes
After pruning: 11 notes
After pruning: 14 notes
After pruning: 11 notes


You can play generated sample using any midi player

Under linux I prefer timidity

In [12]:
!! timidity midi/deepjazz_on_metheny...128_epochs.midi

['/bin/bash: timidity: command not found']