In [1]:
'''
Author:     Ji-Sung Kim
Project:    deepjazz
Purpose:    Generate jazz using a deep learning model (LSTM in deepjazz).

Some code adapted from Evan Chow's jazzml, https://github.com/evancchow/jazzml 
with express permission.

Code was built while significantly referencing public examples from the
Keras documentation on GitHub:
https://github.com/fchollet/keras/blob/master/examples/lstm_text_generation.py

GPU run command:
    THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python generator.py [# of epochs]

    Note: running Keras/Theano on GPU is formally supported for only NVIDIA cards (CUDA backend).
'''
from __future__ import print_function
import sys

from music21 import *
import numpy as np

from grammar import *
from preprocess import *
from qa import *
import lstm

Using TensorFlow backend.


In [2]:
#----------------------------HELPER FUNCTIONS----------------------------------#

''' Helper function to sample an index from a probability array '''
def __sample(a, temperature=1.0):
    a = np.log(a) / temperature
    a = np.exp(a) / np.sum(np.exp(a))
    return np.argmax(np.random.multinomial(1, a, 1))

In [3]:
''' Helper function to generate a predicted value from a given matrix '''
def __predict(model, x, indices_val, diversity):
    preds = model.predict(x, verbose=0)[0]
    next_index = __sample(preds, diversity)
    next_val = indices_val[next_index]

    return next_val

In [4]:
''' Helper function which uses the given model to generate a grammar sequence 
    from a given corpus, indices_val (mapping), abstract_grammars (list), 
    and diversity floating point value. '''
def __generate_grammar(model, corpus, abstract_grammars, values, val_indices,
                       indices_val, max_len, max_tries, diversity):
    curr_grammar = ''
    # np.random.randint is exclusive to high
    start_index = np.random.randint(0, len(corpus) - max_len)
    sentence = corpus[start_index: start_index + max_len]    # seed
    running_length = 0.0
    while running_length <= 4.1:    # arbitrary, from avg in input file
        # transform sentence (previous sequence) to matrix
        x = np.zeros((1, max_len, len(values)))
        for t, val in enumerate(sentence):
            if (not val in val_indices): print(val)
            x[0, t, val_indices[val]] = 1.

        next_val = __predict(model, x, indices_val, diversity)

        # fix first note: must not have < > and not be a rest
        if (running_length < 0.00001):
            tries = 0
            while (next_val.split(',')[0] == 'R' or 
                len(next_val.split(',')) != 2):
                # give up after 1000 tries; random from input's first notes
                if tries >= max_tries:
                    print('Gave up on first note generation after', max_tries, 
                        'tries')
                    # np.random is exclusive to high
                    rand = np.random.randint(0, len(abstract_grammars))
                    next_val = abstract_grammars[rand].split(' ')[0]
                else:
                    next_val = __predict(model, x, indices_val, diversity)

                tries += 1

        # shift sentence over with new value
        sentence = sentence[1:] 
        sentence.append(next_val)

        # except for first case, add a ' ' separator
        if (running_length > 0.00001): curr_grammar += ' '
        curr_grammar += next_val

        length = float(next_val.split(',')[1])
        running_length += length

    return curr_grammar

In [5]:
#----------------------------PUBLIC FUNCTIONS----------------------------------#
''' Generates musical sequence based on the given data filename and settings.
    Plays then stores (MIDI file) the generated output. '''
def generate(data_fn, out_fn, N_epochs):
    # model settings
    max_len = 20
    max_tries = 1000
    diversity = 0.5

    # musical settings
    bpm = 130

    # get data
    chords, abstract_grammars = get_musical_data(data_fn)
    corpus, values, val_indices, indices_val = get_corpus_data(abstract_grammars)
    print('corpus length:', len(corpus))
    print('total # of values:', len(values))

    # build model
    model = lstm.build_model(corpus=corpus, val_indices=val_indices, 
                             max_len=max_len, N_epochs=N_epochs)

    # set up audio stream
    out_stream = stream.Stream()

    # generation loop
    curr_offset = 0.0
    loopEnd = len(chords)
    for loopIndex in range(1, loopEnd):
        # get chords from file
        curr_chords = stream.Voice()
        for j in chords[loopIndex]:
            curr_chords.insert((j.offset % 4), j)

        # generate grammar
        curr_grammar = __generate_grammar(model=model, corpus=corpus, 
                                          abstract_grammars=abstract_grammars, 
                                          values=values, val_indices=val_indices, 
                                          indices_val=indices_val, 
                                          max_len=max_len, max_tries=max_tries,
                                          diversity=diversity)

        curr_grammar = curr_grammar.replace(' A',' C').replace(' X',' C')

        # Pruning #1: smoothing measure
        curr_grammar = prune_grammar(curr_grammar)

        # Get notes from grammar and chords
        curr_notes = unparse_grammar(curr_grammar, curr_chords)

        # Pruning #2: removing repeated and too close together notes
        curr_notes = prune_notes(curr_notes)

        # quality assurance: clean up notes
        curr_notes = clean_up_notes(curr_notes)

        # print # of notes in curr_notes
        print('After pruning: %s notes' % (len([i for i in curr_notes
            if isinstance(i, note.Note)])))

        # insert into the output stream
        for m in curr_notes:
            out_stream.insert(curr_offset + m.offset, m)
        for mc in curr_chords:
            out_stream.insert(curr_offset + mc.offset, mc)

        curr_offset += 4.0

    out_stream.insert(0.0, tempo.MetronomeMark(number=bpm))

    # Play the final stream through output (see 'play' lambda function above)
    play = lambda x: midi.realtime.StreamPlayer(x).play()
    play(out_stream)

    # save stream
    mf = midi.translate.streamToMidiFile(out_stream)
    mf.open(out_fn, 'wb')
    mf.write()
    mf.close()

In [6]:
''' Runs generate() -- generating, playing, then storing a musical sequence --
    with the default Metheny file. '''
def main(args):
    try:
        N_epochs = int(args[1])
    except:
        N_epochs = 128 # default

    # i/o settings
    data_fn = 'midi/' + 'original_metheny.mid' # 'And Then I Knew' by Pat Metheny 
    out_fn = 'midi/' 'deepjazz_on_metheny...' + str(N_epochs)
    if (N_epochs == 1): out_fn += '_epoch.midi'
    else:               out_fn += '_epochs.midi'

    generate(data_fn, out_fn, N_epochs)

In [7]:
data_fn = 'midi/' + 'original_metheny.mid'

In [8]:
N_epochs = 1

In [9]:
out_fn = 'midi/' 'deepjazz_on_metheny...' + str(N_epochs)

In [10]:
out_fn += '_epoch.midi'

In [11]:
out_fn

'midi/deepjazz_on_metheny...1_epoch.midi'

In [12]:
max_len = 20
max_tries = 1000
diversity = 0.5

# musical settings
bpm = 130

# get data

In [13]:
chords, abstract_grammars = get_musical_data(data_fn)

In [26]:
chords[0]

[<music21.instrument.Instrument Piano>,
 <music21.tempo.MetronomeMark Quarter=112.0>,
 <music21.key.Key of G major>,
 <music21.meter.TimeSignature 4/4>]

In [27]:
abstract_grammars[0]

'C,0.500 C,0.333,<P1,d-5> C,0.250,<M-2,m-6> C,0.250,<M3,d-3> C,0.250,<d6,m2> C,0.250,<d5,P1> C,0.667,<M2,d-4> C,0.250,<d1,P-5> C,0.250,<P-4,d-8> S,0.250,<d4,M-2> S,0.250,<P5,A1> C,0.250,<d5,P1>'

In [29]:
chords[1]

[<music21.chord.Chord E-4 G4 C4 B-3 G#2>, <music21.chord.Chord B-3 F4 D4 A3>]

In [30]:
abstract_grammars[1]

'C,0.500 S,0.250,<m2,P-4> C,0.250,<P4,m-2> A,0.250,<P4,m-2> S,0.500,<d1,P-5> C,0.250,<P1,d-5> C,0.250,<m2,P-4> A,0.250,<m2,P-4> C,0.250,<M2,d-4> A,0.250,<d4,M-2> C,0.250,<P4,m-2> C,0.250,<P4,m-2>'

In [31]:
corpus, values, val_indices, indices_val = get_corpus_data(abstract_grammars)

In [32]:
corpus

['C,0.500',
 'C,0.333,<P1,d-5>',
 'C,0.250,<M-2,m-6>',
 'C,0.250,<M3,d-3>',
 'C,0.250,<d6,m2>',
 'C,0.250,<d5,P1>',
 'C,0.667,<M2,d-4>',
 'C,0.250,<d1,P-5>',
 'C,0.250,<P-4,d-8>',
 'S,0.250,<d4,M-2>',
 'S,0.250,<P5,A1>',
 'C,0.250,<d5,P1>',
 'C,0.500',
 'S,0.250,<m2,P-4>',
 'C,0.250,<P4,m-2>',
 'A,0.250,<P4,m-2>',
 'S,0.500,<d1,P-5>',
 'C,0.250,<P1,d-5>',
 'C,0.250,<m2,P-4>',
 'A,0.250,<m2,P-4>',
 'C,0.250,<M2,d-4>',
 'A,0.250,<d4,M-2>',
 'C,0.250,<P4,m-2>',
 'C,0.250,<P4,m-2>',
 'C,0.250',
 'C,0.250,<d2,A-4>',
 'A,0.250,<m-2,d-6>',
 'C,0.250,<d3,M-3>',
 'A,0.250,<M3,d-3>',
 'C,0.250,<d6,m2>',
 'C,0.250,<A4,d-2>',
 'C,0.250,<d3,M-3>',
 'C,0.250,<m2,P-4>',
 'A,0.250,<m2,P-4>',
 'S,0.250,<P1,d-5>',
 'C,0.250,<m2,P-4>',
 'C,0.250,<P1,d-5>',
 'C,0.250,<m2,P-4>',
 'A,0.250,<m2,P-4>',
 'C,0.250',
 'S,0.250,<d6,m2>',
 'X,0.250,<A4,d-2>',
 'S,0.250,<dd5,d1>',
 'C,0.250,<P4,m-2>',
 'C,0.250,<M3,d-3>',
 'C,0.250,<dd5,d1>',
 'S,0.250,<P4,m-2>',
 'C,0.250,<P4,m-2>',
 'C,0.250,<M3,d-3>',
 'C,0.250,

In [33]:
values

{'A,0.250,<M2,d-4>',
 'A,0.250,<M3,d-3>',
 'A,0.250,<P-4,d-8>',
 'A,0.250,<P1,d-5>',
 'A,0.250,<P4,m-2>',
 'A,0.250,<d4,M-2>',
 'A,0.250,<d5,P1>',
 'A,0.250,<m-2,d-6>',
 'A,0.250,<m2,P-4>',
 'A,0.333,<A4,d-2>',
 'A,0.333,<M2,d-4>',
 'A,0.333,<P1,d-5>',
 'C,0.250',
 'C,0.250,<A-4,P-8>',
 'C,0.250,<A4,d-2>',
 'C,0.250,<M-2,m-6>',
 'C,0.250,<M2,d-4>',
 'C,0.250,<M3,d-3>',
 'C,0.250,<P-4,d-8>',
 'C,0.250,<P1,d-5>',
 'C,0.250,<P11,M7>',
 'C,0.250,<P4,m-2>',
 'C,0.250,<P5,A1>',
 'C,0.250,<d1,P-5>',
 'C,0.250,<d2,A-4>',
 'C,0.250,<d3,M-3>',
 'C,0.250,<d4,M-2>',
 'C,0.250,<d5,P1>',
 'C,0.250,<d6,m2>',
 'C,0.250,<dd5,d1>',
 'C,0.250,<m-2,d-6>',
 'C,0.250,<m2,P-4>',
 'C,0.250,<m3,m-3>',
 'C,0.250,<m7,M3>',
 'C,0.333,<A4,d-2>',
 'C,0.333,<M2,d-4>',
 'C,0.333,<P1,d-5>',
 'C,0.333,<P5,A1>',
 'C,0.333,<d1,P-5>',
 'C,0.333,<m2,P-4>',
 'C,0.333,<m3,m-3>',
 'C,0.333,<m7,M3>',
 'C,0.500',
 'C,0.500,<P4,m-2>',
 'C,0.500,<m2,P-4>',
 'C,0.500,<m6,M2>',
 'C,0.667,<M2,d-4>',
 'C,0.667,<d6,m2>',
 'C,0.750,<m3

In [34]:
val_indices

{'A,0.250,<M2,d-4>': 28,
 'A,0.250,<M3,d-3>': 53,
 'A,0.250,<P-4,d-8>': 41,
 'A,0.250,<P1,d-5>': 73,
 'A,0.250,<P4,m-2>': 13,
 'A,0.250,<d4,M-2>': 50,
 'A,0.250,<d5,P1>': 39,
 'A,0.250,<m-2,d-6>': 64,
 'A,0.250,<m2,P-4>': 7,
 'A,0.333,<A4,d-2>': 22,
 'A,0.333,<M2,d-4>': 77,
 'A,0.333,<P1,d-5>': 70,
 'C,0.250': 12,
 'C,0.250,<A-4,P-8>': 58,
 'C,0.250,<A4,d-2>': 32,
 'C,0.250,<M-2,m-6>': 52,
 'C,0.250,<M2,d-4>': 61,
 'C,0.250,<M3,d-3>': 11,
 'C,0.250,<P-4,d-8>': 63,
 'C,0.250,<P1,d-5>': 1,
 'C,0.250,<P11,M7>': 27,
 'C,0.250,<P4,m-2>': 34,
 'C,0.250,<P5,A1>': 10,
 'C,0.250,<d1,P-5>': 40,
 'C,0.250,<d2,A-4>': 76,
 'C,0.250,<d3,M-3>': 69,
 'C,0.250,<d4,M-2>': 51,
 'C,0.250,<d5,P1>': 55,
 'C,0.250,<d6,m2>': 8,
 'C,0.250,<dd5,d1>': 15,
 'C,0.250,<m-2,d-6>': 35,
 'C,0.250,<m2,P-4>': 17,
 'C,0.250,<m3,m-3>': 6,
 'C,0.250,<m7,M3>': 75,
 'C,0.333,<A4,d-2>': 18,
 'C,0.333,<M2,d-4>': 74,
 'C,0.333,<P1,d-5>': 49,
 'C,0.333,<P5,A1>': 30,
 'C,0.333,<d1,P-5>': 23,
 'C,0.333,<m2,P-4>': 45,
 'C,0.333,<m3

In [35]:
indices_val

{0: 'S,0.333,<d7,m3>',
 1: 'C,0.250,<P1,d-5>',
 2: 'S,0.667,<d5,P1>',
 3: 'X,0.250,<d1,P-5>',
 4: 'S,0.250,<A4,d-2>',
 5: 'C,0.500,<m2,P-4>',
 6: 'C,0.250,<m3,m-3>',
 7: 'A,0.250,<m2,P-4>',
 8: 'C,0.250,<d6,m2>',
 9: 'S,0.250,<d1,P-5>',
 10: 'C,0.250,<P5,A1>',
 11: 'C,0.250,<M3,d-3>',
 12: 'C,0.250',
 13: 'A,0.250,<P4,m-2>',
 14: 'C,0.333,<m7,M3>',
 15: 'C,0.250,<dd5,d1>',
 16: 'S,0.250,<P4,m-2>',
 17: 'C,0.250,<m2,P-4>',
 18: 'C,0.333,<A4,d-2>',
 19: 'S,0.250,<m2,P-4>',
 20: 'S,0.250,<d6,m2>',
 21: 'S,0.250,<m3,m-3>',
 22: 'A,0.333,<A4,d-2>',
 23: 'C,0.333,<d1,P-5>',
 24: 'C,0.667,<d6,m2>',
 25: 'C,0.750,<m3,m-3>',
 26: 'S,0.250',
 27: 'C,0.250,<P11,M7>',
 28: 'A,0.250,<M2,d-4>',
 29: 'S,0.333,<d1,P-5>',
 30: 'C,0.333,<P5,A1>',
 31: 'S,0.250,<d5,P1>',
 32: 'C,0.250,<A4,d-2>',
 33: 'X,0.250,<d2,A-4>',
 34: 'C,0.250,<P4,m-2>',
 35: 'C,0.250,<m-2,d-6>',
 36: 'S,0.333,<m2,P-4>',
 37: 'X,0.250,<m6,M2>',
 38: 'X,0.250,<M-2,m-6>',
 39: 'A,0.250,<d5,P1>',
 40: 'C,0.250,<d1,P-5>',
 41: 'A,0.25

In [36]:
print('corpus length:', len(corpus))
print('total # of values:', len(values))

corpus length: 193
total # of values: 78


In [39]:
indices_val[10]

'C,0.250,<P5,A1>'

In [40]:
val_indices['C,0.250,<P5,A1>']

10

In [41]:
N_values = len(set(corpus))

In [42]:
# cut the corpus into semi-redundant sequences of max_len values
step = 3
sentences = []
next_values = []

In [43]:
for i in range(0, len(corpus) - max_len, step):
    sentences.append(corpus[i: i + max_len])
    next_values.append(corpus[i + max_len])
print('nb sequences:', len(sentences))

nb sequences: 58


In [47]:
max_len

20

In [48]:
sentences[:2]

[['C,0.500',
  'C,0.333,<P1,d-5>',
  'C,0.250,<M-2,m-6>',
  'C,0.250,<M3,d-3>',
  'C,0.250,<d6,m2>',
  'C,0.250,<d5,P1>',
  'C,0.667,<M2,d-4>',
  'C,0.250,<d1,P-5>',
  'C,0.250,<P-4,d-8>',
  'S,0.250,<d4,M-2>',
  'S,0.250,<P5,A1>',
  'C,0.250,<d5,P1>',
  'C,0.500',
  'S,0.250,<m2,P-4>',
  'C,0.250,<P4,m-2>',
  'A,0.250,<P4,m-2>',
  'S,0.500,<d1,P-5>',
  'C,0.250,<P1,d-5>',
  'C,0.250,<m2,P-4>',
  'A,0.250,<m2,P-4>'],
 ['C,0.250,<M3,d-3>',
  'C,0.250,<d6,m2>',
  'C,0.250,<d5,P1>',
  'C,0.667,<M2,d-4>',
  'C,0.250,<d1,P-5>',
  'C,0.250,<P-4,d-8>',
  'S,0.250,<d4,M-2>',
  'S,0.250,<P5,A1>',
  'C,0.250,<d5,P1>',
  'C,0.500',
  'S,0.250,<m2,P-4>',
  'C,0.250,<P4,m-2>',
  'A,0.250,<P4,m-2>',
  'S,0.500,<d1,P-5>',
  'C,0.250,<P1,d-5>',
  'C,0.250,<m2,P-4>',
  'A,0.250,<m2,P-4>',
  'C,0.250,<M2,d-4>',
  'A,0.250,<d4,M-2>',
  'C,0.250,<P4,m-2>']]

In [49]:
# transform data into binary matrices
X = np.zeros((len(sentences), max_len, N_values), dtype=np.bool)

In [50]:
y = np.zeros((len(sentences), N_values), dtype=np.bool)

In [51]:
X.shape

(58, 20, 78)

In [52]:
y.shape

(58, 78)

In [53]:
for i, sentence in enumerate(sentences):
    for t, val in enumerate(sentence):
        X[i, t, val_indices[val]] = 1
    y[i, val_indices[next_values[i]]] = 1

In [55]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM

In [56]:
# build a 2 stacked LSTM
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape=(max_len, N_values)))
model.add(Dropout(0.2))
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(N_values))
model.add(Activation('softmax'))

In [57]:
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

In [58]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 20, 128)           105984    
_________________________________________________________________
dropout_1 (Dropout)          (None, 20, 128)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 78)                10062     
_________________________________________________________________
activation_1 (Activation)    (None, 78)                0         
Total params: 247,630.0
Trainable params: 247,630.0
Non-trainable params: 0.0
________________________________________________________________

In [61]:
N_epochs=10

In [62]:
model.fit(X, y, batch_size=128, epochs=N_epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x15444860>

### __generate_grammar

In [63]:
curr_grammar = ''
# np.random.randint is exclusive to high
start_index = np.random.randint(0, len(corpus) - max_len)

In [64]:
start_index

101

In [65]:
sentence = corpus[start_index: start_index + max_len]    # seed

In [66]:
sentence

['C,0.250,<m2,P-4>',
 'A,0.333,<M2,d-4>',
 'C,0.250,<d3,M-3>',
 'X,0.250,<d1,P-5>',
 'S,0.250,<A4,d-2>',
 'S,0.250,<d1,P-5>',
 'C,0.250',
 'C,0.250,<P1,d-5>',
 'C,0.333,<m3,m-3>',
 'S,0.333,<m2,P-4>',
 'C,0.333,<m2,P-4>',
 'C,0.250,<m3,m-3>',
 'C,0.250,<P1,d-5>',
 'C,0.333,<m3,m-3>',
 'C,0.250,<d5,P1>',
 'C,0.250,<P5,A1>',
 'S,0.250',
 'S,0.250,<P1,d-5>',
 'C,0.333,<m7,M3>',
 'S,0.333,<d1,P-5>']

In [67]:
len(sentence)

20

In [68]:
running_length = 0.0

In [None]:
def __generate_grammar(model, corpus, abstract_grammars, values, val_indices,
                       indices_val, max_len, max_tries, diversity):
    curr_grammar = ''
    # np.random.randint is exclusive to high
    start_index = np.random.randint(0, len(corpus) - max_len)
    sentence = corpus[start_index: start_index + max_len]    # seed
    running_length = 0.0
    while running_length <= 4.1:    # arbitrary, from avg in input file
        # transform sentence (previous sequence) to matrix
        x = np.zeros((1, max_len, len(values)))
        for t, val in enumerate(sentence):
            if (not val in val_indices): print(val)
            x[0, t, val_indices[val]] = 1.

        next_val = __predict(model, x, indices_val, diversity)

        # fix first note: must not have < > and not be a rest
        if (running_length < 0.00001):
            tries = 0
            while (next_val.split(',')[0] == 'R' or 
                len(next_val.split(',')) != 2):
                # give up after 1000 tries; random from input's first notes
                if tries >= max_tries:
                    print('Gave up on first note generation after', max_tries, 
                        'tries')
                    # np.random is exclusive to high
                    rand = np.random.randint(0, len(abstract_grammars))
                    next_val = abstract_grammars[rand].split(' ')[0]
                else:
                    next_val = __predict(model, x, indices_val, diversity)

                tries += 1

        # shift sentence over with new value
        sentence = sentence[1:] 
        sentence.append(next_val)

        # except for first case, add a ' ' separator
        if (running_length > 0.00001): curr_grammar += ' '
        curr_grammar += next_val

        length = float(next_val.split(',')[1])
        running_length += length

    return curr_grammar