## Homework: Deep Jazz

In [2]:
import numpy as np
from music21 import stream, midi, tempo, note

from grammar import unparse_grammar
from preprocess import get_musical_data, get_corpus_data

from qa import prune_grammar, prune_notes, clean_up_notes
from generator import __sample, __generate_grammar, __predict

In [3]:
import tflearn

In [4]:
N_epochs = 128  # default
data_fn = 'midi/' + 'original_metheny.mid'  # 'And Then I Knew' by Pat Metheny
out_fn = 'midi/' 'deepjazz_on_metheny...' + str(N_epochs)

In [5]:
max_len = 20
max_tries = 1000
diversity = 0.5

# musical settings
bpm = 130

# get data
chords, abstract_grammars = get_musical_data(data_fn)
corpus, values, val_indices, indices_val = get_corpus_data(abstract_grammars)
print('corpus length:', len(corpus))
print('total # of values:', len(values))

corpus length: 193
total # of values: 78


In [6]:
chords[0]

[<music21.instrument.Piano Piano>,
 <music21.tempo.MetronomeMark Quarter=112.0>,
 <music21.key.Key of G major>,
 <music21.meter.TimeSignature 4/4>]

In [7]:
def get_keras_model(max_len, N_values):
    # build a 2 stacked LSTM
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape=(max_len, N_values)))
    model.add(Dropout(0.2))
    model.add(LSTM(128, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(N_values))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    return model

## Task

Replace previous model with equivalent in prettytensor or tf.slim

Try to make you code as compact as possible

In [8]:
from tflearn import input_data, lstm, fully_connected, regression, dropout, activation

def get_tflearn_model(max_len, N_values):
    
    # Network building
    net = input_data([None, max_len, N_values])
    net = lstm(net, 128, return_seq=True)
    net = dropout(net, 0.2)
    net = tflearn.lstm(net, 128, return_seq=False)
    net = dropout(net, 0.2)
    net = fully_connected(net, N_values)
    net = activation(net, activation='softmax')
    net = tflearn.regression(net, loss='categorical_crossentropy', optimizer='rmsprop')
    
    # Training
    model = tflearn.DNN(net, tensorboard_dir='tflearn_log')
    
    return model

In [9]:
get_model = get_tflearn_model

In [10]:
import numpy as np

''' Build a 2-layer LSTM from a training corpus '''


def build_model(corpus, val_indices, max_len, N_epochs=128):
    # number of different values or words in corpus
    N_values = len(set(corpus))

    # cut the corpus into semi-redundant sequences of max_len values
    step = 3
    sentences = []
    next_values = []
    for i in range(0, len(corpus) - max_len, step):
        sentences.append(corpus[i: i + max_len])
        next_values.append(corpus[i + max_len])
    print('nb sequences:', len(sentences))

    # transform data into binary matrices
    X = np.zeros((len(sentences), max_len, N_values), dtype=np.bool)
    y = np.zeros((len(sentences), N_values), dtype=np.bool)
    for i, sentence in enumerate(sentences):
        for t, val in enumerate(sentence):
            X[i, t, val_indices[val]] = 1
        y[i, val_indices[next_values[i]]] = 1

    model = get_model(max_len, N_values)
    model.fit(X, y, batch_size=128, n_epoch=N_epochs)

    return model

In [14]:
import tensorflow as tf
tf.reset_default_graph() 

In [15]:
# build model
model = build_model(corpus=corpus, val_indices=val_indices,
                         max_len=max_len, N_epochs=N_epochs)

# set up audio stream
out_stream = stream.Stream()

# generation loop
curr_offset = 0.0
loopEnd = len(chords)
for loopIndex in range(1, loopEnd):
    # get chords from file
    curr_chords = stream.Voice()
    for j in chords[loopIndex]:
        curr_chords.insert((j.offset % 4), j)

    # generate grammar
    curr_grammar = __generate_grammar(model=model, corpus=corpus,
                                      abstract_grammars=abstract_grammars,
                                      values=values, val_indices=val_indices,
                                      indices_val=indices_val,
                                      max_len=max_len, max_tries=max_tries,
                                      diversity=diversity)

    curr_grammar = curr_grammar.replace(' A', ' C').replace(' X', ' C')

    # Pruning #1: smoothing measure
    curr_grammar = prune_grammar(curr_grammar)

    # Get notes from grammar and chords
    curr_notes = unparse_grammar(curr_grammar, curr_chords)

    # Pruning #2: removing repeated and too close together notes
    curr_notes = prune_notes(curr_notes)

    # quality assurance: clean up notes
    curr_notes = clean_up_notes(curr_notes)

    # print # of notes in curr_notes
    print('After pruning: %s notes' % (len([i for i in curr_notes
                                            if isinstance(i, note.Note)])))

    # insert into the output stream
    for m in curr_notes:
        out_stream.insert(curr_offset + m.offset, m)
    for mc in curr_chords:
        out_stream.insert(curr_offset + mc.offset, mc)

    curr_offset += 4.0

out_stream.insert(0.0, tempo.MetronomeMark(number=bpm))

# Play the final stream through output (see 'play' lambda function above)
# play = lambda x: midi.realtime.StreamPlayer(x).play()
# play(out_stream)

# save stream
mf = midi.translate.streamToMidiFile(out_stream)
mf.open(out_fn, 'wb')
mf.write()
mf.close()    

nb sequences: 58
---------------------------------
Run id: W9XXQJ
Log directory: tflearn_log/
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'list' object has no attribute 'name'
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'list' object has no attribute 'name'
---------------------------------
Training samples: 58
Validation samples: 0
--
Training Step: 1  | time: 0.290s
| RMSProp | epoch: 001 | loss: 0.00000 -- iter: 58/58
--
Training Step: 2  | total loss: [1m[32m3.92082[0m[0m | time: 0.030s
| RMSProp | epoch: 002 | loss: 3.92082 -- iter: 58/58
--
Training Step: 3  | total loss: [1m[32m4.27760[0m[0m | time: 0.028s
| RMSProp | epoch: 003 | loss: 4.27760 -- iter: 58/58
--
Training Step: 4  | total loss: [1m[32m4.33673[0m[0m | time: 0.028s
| RMSProp | epoch: 004 | loss: 4.33673 -- iter: 58/58
--
Training Step: 5  | total loss: [1m[32m4.35013[0m[0m | time: 0.028s
| RMSProp | epoch: 005 | 

Training Step: 60  | total loss: [1m[32m4.34931[0m[0m | time: 0.025s
| RMSProp | epoch: 060 | loss: 4.34931 -- iter: 58/58
--
Training Step: 61  | total loss: [1m[32m4.34900[0m[0m | time: 0.025s
| RMSProp | epoch: 061 | loss: 4.34900 -- iter: 58/58
--
Training Step: 62  | total loss: [1m[32m4.34857[0m[0m | time: 0.026s
| RMSProp | epoch: 062 | loss: 4.34857 -- iter: 58/58
--
Training Step: 63  | total loss: [1m[32m4.34805[0m[0m | time: 0.025s
| RMSProp | epoch: 063 | loss: 4.34805 -- iter: 58/58
--
Training Step: 64  | total loss: [1m[32m4.34767[0m[0m | time: 0.025s
| RMSProp | epoch: 064 | loss: 4.34767 -- iter: 58/58
--
Training Step: 65  | total loss: [1m[32m4.34727[0m[0m | time: 0.025s
| RMSProp | epoch: 065 | loss: 4.34727 -- iter: 58/58
--
Training Step: 66  | total loss: [1m[32m4.34684[0m[0m | time: 0.025s
| RMSProp | epoch: 066 | loss: 4.34684 -- iter: 58/58
--
Training Step: 67  | total loss: [1m[32m4.34645[0m[0m | time: 0.025s
| RMSProp | epoch

Training Step: 123  | total loss: [1m[32m4.19848[0m[0m | time: 0.024s
| RMSProp | epoch: 123 | loss: 4.19848 -- iter: 58/58
--
Training Step: 124  | total loss: [1m[32m4.17956[0m[0m | time: 0.024s
| RMSProp | epoch: 124 | loss: 4.17956 -- iter: 58/58
--
Training Step: 125  | total loss: [1m[32m4.15168[0m[0m | time: 0.025s
| RMSProp | epoch: 125 | loss: 4.15168 -- iter: 58/58
--
Training Step: 126  | total loss: [1m[32m4.12572[0m[0m | time: 0.024s
| RMSProp | epoch: 126 | loss: 4.12572 -- iter: 58/58
--
Training Step: 127  | total loss: [1m[32m4.09588[0m[0m | time: 0.025s
| RMSProp | epoch: 127 | loss: 4.09588 -- iter: 58/58
--
Training Step: 128  | total loss: [1m[32m4.06470[0m[0m | time: 0.024s
| RMSProp | epoch: 128 | loss: 4.06470 -- iter: 58/58
--
After pruning: 14 notes
After pruning: 12 notes
After pruning: 11 notes
After pruning: 12 notes
After pruning: 14 notes
After pruning: 13 notes
After pruning: 13 notes
After pruning: 15 notes
After pruning: 14 note

You can play generated sample using any midi player

Under linux I prefer timidity

In [None]:
!! timidity midi/deepjazz_on_metheny...128_epochs.midi