## Homework: Deep Jazz

In [1]:
import numpy as np
from music21 import stream, midi, tempo, note

# import lstm
from grammar import unparse_grammar
from preprocess import get_musical_data, get_corpus_data

from qa import prune_grammar, prune_notes, clean_up_notes
from generator import __sample, __generate_grammar, __predict

In [2]:
N_epochs = 128  # default
data_fn = 'midi/' + 'original_metheny.mid'  # 'And Then I Knew' by Pat Metheny
out_fn = 'midi/' 'deepjazz_on_metheny...' + str(N_epochs) + '_epochs.midi'

In [3]:
max_len = 20
max_tries = 1000
diversity = 0.5

# musical settings
bpm = 130

# get data
chords, abstract_grammars = get_musical_data(data_fn)
corpus, values, val_indices, indices_val = get_corpus_data(abstract_grammars)
print('corpus length:', len(corpus))
print('total # of values:', len(values))

corpus length: 193
total # of values: 78


In [4]:
chords[0]

[<music21.instrument.Piano Piano>,
 <music21.tempo.MetronomeMark Quarter=112.0>,
 <music21.key.Key of G major>,
 <music21.meter.TimeSignature 4/4>]

In [5]:
import keras
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM

def get_keras_model(max_len, N_values):
    # build a 2 stacked LSTM
    model = Sequential()
    model.add(LSTM(128, return_sequences=False, input_shape=(max_len, N_values)))
#     model.add(Dropout(0.2))
#     model.add(LSTM(128, return_sequences=False))
#     model.add(Dropout(0.2))
    model.add(Dense(N_values))
    model.add(Activation('softmax'))

#     model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(lr=0.001))
    return model

Using TensorFlow backend.


## Task

Replace previous model with equivalent in prettytensor or tf.slim

Try to make you code as compact as possible

In [6]:
import tensorflow as tf
import tensorflow.contrib.slim as slim
import numpy as np
import prettytensor as pt

from sklearn.utils import shuffle, resample

try:
    from tensorflow.contrib.rnn import MultiRNNCell, LSTMCell, DropoutWrapper, LayerNormBasicLSTMCell
except ImportError:
    MultiRNNCell = tf.nn.rnn_cell.MultiRNNCell
    LSTMCell = tf.nn.rnn_cell.LSTMCell
    LayerNormBasicLSTMCell = tf.nn.rnn.LayerNormBasicLSTMCell
    DROPLSTM= tf.nn.rnn_cell.DropoutWrapper

class JazzNet:
    def __init__(self, max_len, N_values):
        self.max_len = max_len
        self.N_values = N_values
        self.hiddens = [128, 128]
        self.keep_prob = 0.8
        self.layers = len(self.hiddens)
        self.predictions = None
        self.loss = None
        self.train_op = None
        self.accuracy = None
        self.sess = None
        self.create_placeholders()
        self.build()
    
    def create_placeholders(self):
        self.data = tf.placeholder(tf.float32, 
                                   [None, self.max_len, self.N_values]) 
        
        self.target = tf.placeholder(tf.float32, [None, self.N_values])
    
    def build(self):
        
        with tf.name_scope('recurrent_layers'):
            output, _ = tf.nn.dynamic_rnn(
                MultiRNNCell([LSTMCell(hidden,
                                       initializer=tf.random_normal_initializer(0.0, 0.3)) 
                              for hidden in self.hiddens]),
                self.data, 
                dtype=tf.float32
            )
            
            last = self._last_relevant(output, self.max_len)
                
        with slim.arg_scope([slim.fully_connected],
                            activation_fn=tf.nn.softmax,
                            weights_initializer=tf.truncated_normal_initializer(0.0, 0.3),
                            biases_initializer=tf.truncated_normal_initializer(0.0, 0.3)):
            
            self.predictions = slim.fully_connected(last, self.N_values, scope='final')
        
        self.loss = tf.losses.softmax_cross_entropy(self.target, self.predictions)
        self.train_op = tf.train.AdamOptimizer(learning_rate=.001).minimize(self.loss)
                
        correct_prediction = tf.equal(tf.argmax(self.target, 1), tf.argmax(self.predictions, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        
    def fit(self, X, y, batch_size, epochs):
        config = tf.ConfigProto(allow_soft_placement=True)
        self.sess = tf.Session(config=config)
        i = 0
        self.sess.run(tf.global_variables_initializer())
        
        for epoch in range(epochs):
            fd = {self.data: X, self.target: y}
            loss, _, acc = self.sess.run([self.loss, self.train_op, self.accuracy], feed_dict=fd)
            print('epoch_{} loss: {:.3f}, acc: {:.3f}'.format(epoch, loss, acc))


    def predict(self, x, verbose=0):
        return self.sess.run(self.predictions, {self.data: x})
        
        
    def _last_relevant(self, output, length):
        with tf.name_scope("last_relevant"):
            batch_size = tf.shape(output)[0]
            index = tf.range(0, batch_size) * length + (length - 1)
            flat = tf.reshape(output, [-1, self.hiddens[-1]])
            return tf.gather(flat, index)
            
def get_slim_model(max_len, N_values):
    return JazzNet(max_len, N_values)

In [7]:
get_model = get_slim_model
# get_model = get_keras_model

In [8]:
import numpy as np

''' Build a 2-layer LSTM from a training corpus '''


def build_model(corpus, val_indices, max_len, N_epochs=128):
    # number of different values or words in corpus
    N_values = len(set(corpus))
    print('corpus len: {}'.format(len(corpus)))
    # cut the corpus into semi-redundant sequences of max_len values
    step = 3
    sentences = []
    next_values = []
    for i in range(0, len(corpus) - max_len, step):
        sentences.append(corpus[i: i + max_len])
        next_values.append(corpus[i + max_len])
    print('nb sequences:', len(sentences))
    
    # transform data into binary matrices
    X = np.zeros((len(sentences), max_len, N_values), dtype=np.bool)
    y = np.zeros((len(sentences), N_values), dtype=np.bool)
    
    print(X.shape)
    print(y.shape)
    
    for i, sentence in enumerate(sentences):
        for t, val in enumerate(sentence):
            X[i, t, val_indices[val]] = 1
        y[i, val_indices[next_values[i]]] = 1

    model = get_model(max_len, N_values)
    model.fit(X, y, batch_size=128, epochs=N_epochs)

    return model


In [9]:
# build model
model = build_model(corpus=corpus, val_indices=val_indices,
                    max_len=max_len, N_epochs=N_epochs)

# set up audio stream
out_stream = stream.Stream()

# generation loop
curr_offset = 0.0
loopEnd = len(chords)
for loopIndex in range(1, loopEnd):
    # get chords from file
    curr_chords = stream.Voice()
    for j in chords[loopIndex]:
        curr_chords.insert((j.offset % 4), j)

    # generate grammar
    curr_grammar = __generate_grammar(model=model, corpus=corpus,
                                      abstract_grammars=abstract_grammars,
                                      values=values, val_indices=val_indices,
                                      indices_val=indices_val,
                                      max_len=max_len, max_tries=max_tries,
                                      diversity=diversity)

    curr_grammar = curr_grammar.replace(' A', ' C').replace(' X', ' C')

    # Pruning #1: smoothing measure
    curr_grammar = prune_grammar(curr_grammar)

    # Get notes from grammar and chords
    curr_notes = unparse_grammar(curr_grammar, curr_chords)

    # Pruning #2: removing repeated and too close together notes
    curr_notes = prune_notes(curr_notes)

    # quality assurance: clean up notes
    curr_notes = clean_up_notes(curr_notes)

    # print # of notes in curr_notes
    print('After pruning: %s notes' % (len([i for i in curr_notes
                                            if isinstance(i, note.Note)])))

    # insert into the output stream
    for m in curr_notes:
        out_stream.insert(curr_offset + m.offset, m)
    for mc in curr_chords:
        out_stream.insert(curr_offset + mc.offset, mc)

    curr_offset += 4.0

out_stream.insert(0.0, tempo.MetronomeMark(number=bpm))

# Play the final stream through output (see 'play' lambda function above)
play = lambda x: midi.realtime.StreamPlayer(x).play()
play(out_stream)

# save stream
mf = midi.translate.streamToMidiFile(out_stream)
mf.open(out_fn, 'wb')
mf.write()
mf.close()    

corpus len: 193
nb sequences: 58
(58, 20, 78)
(58, 78)
Epoch 1/128
Epoch 2/128
Epoch 3/128
Epoch 4/128
Epoch 5/128
Epoch 6/128
Epoch 7/128
Epoch 8/128
Epoch 9/128
Epoch 10/128
Epoch 11/128
Epoch 12/128
Epoch 13/128
Epoch 14/128
Epoch 15/128
Epoch 16/128
Epoch 17/128
Epoch 18/128
Epoch 19/128
Epoch 20/128
Epoch 21/128
Epoch 22/128
Epoch 23/128
Epoch 24/128
Epoch 25/128
Epoch 26/128
Epoch 27/128
Epoch 28/128
Epoch 29/128
Epoch 30/128
Epoch 31/128
Epoch 32/128
Epoch 33/128
Epoch 34/128
Epoch 35/128
Epoch 36/128
Epoch 37/128
Epoch 38/128
Epoch 39/128
Epoch 40/128
Epoch 41/128
Epoch 42/128
Epoch 43/128
Epoch 44/128
Epoch 45/128
Epoch 46/128
Epoch 47/128
Epoch 48/128
Epoch 49/128
Epoch 50/128
Epoch 51/128
Epoch 52/128
Epoch 53/128
Epoch 54/128
Epoch 55/128
Epoch 56/128
Epoch 57/128
Epoch 58/128
Epoch 59/128
Epoch 60/128
Epoch 61/128
Epoch 62/128
Epoch 63/128
Epoch 64/128
Epoch 65/128
Epoch 66/128
Epoch 67/128
Epoch 68/128
Epoch 69/128
Epoch 70/128
Epoch 71/128
Epoch 72/128
Epoch 73/128
Epoch

Epoch 100/128
Epoch 101/128
Epoch 102/128
Epoch 103/128
Epoch 104/128
Epoch 105/128
Epoch 106/128
Epoch 107/128
Epoch 108/128
Epoch 109/128
Epoch 110/128
Epoch 111/128
Epoch 112/128
Epoch 113/128
Epoch 114/128
Epoch 115/128
Epoch 116/128
Epoch 117/128
Epoch 118/128
Epoch 119/128
Epoch 120/128
Epoch 121/128
Epoch 122/128
Epoch 123/128
Epoch 124/128
Epoch 125/128
Epoch 126/128
Epoch 127/128
Epoch 128/128
After pruning: 13 notes
After pruning: 15 notes
After pruning: 12 notes
After pruning: 14 notes
After pruning: 15 notes
After pruning: 14 notes
After pruning: 12 notes
After pruning: 13 notes
After pruning: 14 notes
After pruning: 13 notes
After pruning: 13 notes
After pruning: 15 notes
After pruning: 13 notes
After pruning: 12 notes
After pruning: 12 notes
After pruning: 13 notes
After pruning: 13 notes
After pruning: 14 notes


You can play generated sample using any midi player

Under linux I prefer timidity

In [10]:
# !! timidity midi/deepjazz_on_metheny...128_epochs.midi