## Homework: Deep Jazz

In [1]:
import numpy as np
from music21 import stream, midi, tempo, note

#import lstm
from grammar import unparse_grammar
from preprocess import get_musical_data, get_corpus_data

from qa import prune_grammar, prune_notes, clean_up_notes
from generator import __sample, __generate_grammar, __predict

In [6]:
import tensorflow as tf
import tensorflow.contrib.slim as slim
import numpy as np
import prettytensor as pt

from sklearn.utils import shuffle, resample

In [20]:
import tensorflow.contrib.slim as slim
from tensorflow.contrib.rnn import DropoutWrapper
from tqdm import tqdm

In [2]:
N_epochs = 128  # default
data_fn = 'midi/' + 'original_metheny.mid'  # 'And Then I Knew' by Pat Metheny
out_fn = 'midi/' 'deepjazz_on_metheny...' + str(N_epochs)

In [3]:
max_len = 20
max_tries = 1000
diversity = 0.5

# musical settings
bpm = 130

# get data
chords, abstract_grammars = get_musical_data(data_fn)
corpus, values, val_indices, indices_val = get_corpus_data(abstract_grammars)
print('corpus length:', len(corpus))
print('total # of values:', len(values))

corpus length: 193
total # of values: 78


In [4]:
chords[0]

[<music21.instrument.Piano Piano>,
 <music21.tempo.MetronomeMark Quarter=112.0>,
 <music21.key.Key of G major>,
 <music21.meter.TimeSignature 4/4>]

In [5]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM

def get_keras_model(max_len, N_values):
    # build a 2 stacked LSTM
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape=(max_len, N_values)))
    #model.add(Dropout(0.2))
    #model.add(LSTM(128, return_sequences=False))
    #model.add(Dropout(0.2))
    model.add(Dense(N_values))
    model.add(Activation('softmax'))

    #model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.001))
    return model

Using TensorFlow backend.


## Task

Replace previous model with equivalent in prettytensor or tf.slim

Try to make you code as compact as possible

In [10]:
def get_pretty_tensor_model(max_len, N_values):
    # your code here
    pass

In [50]:
class SlimModel:
    def __init__(self, max_len, N_values):
        tf.reset_default_graph()
        self.max_len = max_len
        self.N_values = N_values
        self.create_placeholders()
        self.build()        
    
    def create_placeholders(self):
        self.x_placeholder = tf.placeholder(tf.float32, shape=(None, self.max_len, self.N_values))
        self.y_placeholder = tf.placeholder(tf.float32, shape=(None,self.N_values))
    
    def build(self):
        with tf.variable_scope('part1'):
            cell = tf.nn.rnn_cell.LSTMCell(128, initializer=tf.random_normal_initializer(0.0, 0.7))
            dropout = DropoutWrapper(cell, output_keep_prob=0.8) 
            outputs, last_state = tf.nn.dynamic_rnn(dropout, self.x_placeholder, dtype=tf.float32)
#             outputs, last_state = tf.nn.dynamic_rnn(cell, self.x_placeholder, dtype=tf.float32)
        with tf.variable_scope('part2'):
            cell2 = tf.nn.rnn_cell.LSTMCell(128, initializer=tf.random_normal_initializer(0.0, 0.7))
            dropout2 = DropoutWrapper(cell2, output_keep_prob=0.8) 
            outputs2, last_state2 = tf.nn.dynamic_rnn(dropout2, outputs, dtype=tf.float32)
#             outputs2, last_state2 = tf.nn.dynamic_rnn(cell2, outputs, dtype=tf.float32)
        logits = slim.fully_connected(inputs=last_state2[1], num_outputs= self.N_values)
        
        print(outputs2.shape, last_state2[1].shape, logits.shape)

        self.prediction = slim.softmax(logits)
        self.loss = tf.losses.softmax_cross_entropy(self.y_placeholder, logits)
        self.optimizer = tf.train.RMSPropOptimizer(learning_rate=0.1).minimize(self.loss)
        
        
    def predict(self, x, verbose=0):
        return self.session.run(self.prediction, feed_dict={self.x_placeholder: x})
    
    @staticmethod
    def __rand_idx__(batch_size, shape):
        return np.random.randint(low=0, high= shape - 1, size=batch_size) 
        
    def __gen_batch__(self, X, Y, batch_size):
        idxs = SlimModel.__rand_idx__(batch_size=batch_size, shape=X.shape[0])
        return X[idxs], Y[idxs]
    
    def fit(self, X, y, batch_size, epochs):
        self.session = tf.Session()
        self.session.run(tf.global_variables_initializer())
        
        for epoch in range(epochs):
            X_batch, y_batch = self.__gen_batch__(X, y, batch_size)
            _, loss = self.session.run([self.optimizer, self.loss], feed_dict = {self.x_placeholder: X_batch, 
                                                                                 self.y_placeholder: y_batch})
            print('epoch_{} loss: {:.3f}'.format(epoch, loss))

def get_slim_model(max_len, N_values):
    return SlimModel(max_len, N_values)

In [18]:
get_model = get_slim_model #get_keras_model # get_pretty_tensor_model

In [10]:
import numpy as np

''' Build a 2-layer LSTM from a training corpus '''


def build_model(corpus, val_indices, max_len, N_epochs=128):
    # number of different values or words in corpus
    N_values = len(set(corpus))

    # cut the corpus into semi-redundant sequences of max_len values
    step = 3
    sentences = []
    next_values = []
    for i in range(0, len(corpus) - max_len, step):
        sentences.append(corpus[i: i + max_len])
        next_values.append(corpus[i + max_len])
    print('nb sequences:', len(sentences))

    # transform data into binary matrices
    X = np.zeros((len(sentences), max_len, N_values), dtype=np.bool)
    y = np.zeros((len(sentences), N_values), dtype=np.bool)
    for i, sentence in enumerate(sentences):
        for t, val in enumerate(sentence):
            X[i, t, val_indices[val]] = 1
        y[i, val_indices[next_values[i]]] = 1

    model = get_model(max_len, N_values)
    model.fit(X, y, batch_size=128, epochs=N_epochs)

    return model


In [51]:
# build model
model = build_model(corpus=corpus, val_indices=val_indices,
                         max_len=max_len, N_epochs=N_epochs)

# set up audio stream
out_stream = stream.Stream()

# generation loop
curr_offset = 0.0
loopEnd = len(chords)
for loopIndex in range(1, loopEnd):
    # get chords from file
    curr_chords = stream.Voice()
    for j in chords[loopIndex]:
        curr_chords.insert((j.offset % 4), j)

    # generate grammar
    curr_grammar = __generate_grammar(model=model, corpus=corpus,
                                      abstract_grammars=abstract_grammars,
                                      values=values, val_indices=val_indices,
                                      indices_val=indices_val,
                                      max_len=max_len, max_tries=max_tries,
                                      diversity=diversity)

    curr_grammar = curr_grammar.replace(' A', ' C').replace(' X', ' C')

    # Pruning #1: smoothing measure
    curr_grammar = prune_grammar(curr_grammar)

    # Get notes from grammar and chords
    curr_notes = unparse_grammar(curr_grammar, curr_chords)

    # Pruning #2: removing repeated and too close together notes
    curr_notes = prune_notes(curr_notes)

    # quality assurance: clean up notes
    curr_notes = clean_up_notes(curr_notes)

    # print # of notes in curr_notes
    print('After pruning: %s notes' % (len([i for i in curr_notes
                                            if isinstance(i, note.Note)])))

    # insert into the output stream
    for m in curr_notes:
        out_stream.insert(curr_offset + m.offset, m)
    for mc in curr_chords:
        out_stream.insert(curr_offset + mc.offset, mc)

    curr_offset += 4.0

out_stream.insert(0.0, tempo.MetronomeMark(number=bpm))

# Play the final stream through output (see 'play' lambda function above)
play = lambda x: midi.realtime.StreamPlayer(x).play()
play(out_stream)

# save stream
mf = midi.translate.streamToMidiFile(out_stream)
mf.open(out_fn, 'wb')
mf.write()
mf.close()    

nb sequences: 58
(?, 20, 128) (?, 128) (?, 78)
epoch_0 loss: 4.428
epoch_1 loss: 4.404
epoch_2 loss: 4.414
epoch_3 loss: 4.419
epoch_4 loss: 4.401
epoch_5 loss: 4.424
epoch_6 loss: 4.440
epoch_7 loss: 4.438
epoch_8 loss: 4.348
epoch_9 loss: 4.356
epoch_10 loss: 4.377
epoch_11 loss: 4.428
epoch_12 loss: 4.309
epoch_13 loss: 4.424
epoch_14 loss: 4.379
epoch_15 loss: 4.404
epoch_16 loss: 4.272
epoch_17 loss: 4.302
epoch_18 loss: 4.206
epoch_19 loss: 4.153
epoch_20 loss: 4.202
epoch_21 loss: 4.145
epoch_22 loss: 4.042
epoch_23 loss: 4.139
epoch_24 loss: 4.058
epoch_25 loss: 4.046
epoch_26 loss: 4.020
epoch_27 loss: 4.029
epoch_28 loss: 4.041
epoch_29 loss: 3.934
epoch_30 loss: 4.000
epoch_31 loss: 3.779
epoch_32 loss: 3.792
epoch_33 loss: 3.842
epoch_34 loss: 3.861
epoch_35 loss: 3.821
epoch_36 loss: 3.824
epoch_37 loss: 3.767
epoch_38 loss: 3.725
epoch_39 loss: 3.505
epoch_40 loss: 3.741
epoch_41 loss: 4.083
epoch_42 loss: 4.136
epoch_43 loss: 4.233
epoch_44 loss: 4.269
epoch_45 loss: 4.2

StreamPlayerException: StreamPlayer requires pygame.  Install first

You can play generated sample using any midi player

Under linux I prefer timidity

In [None]:
!!timidity midi/deepjazz_on_metheny...128_epochs.midi