In [1]:
from __future__ import print_function
import IPython
import sys
import pygame
from music21 import *
import numpy as np
from grammar import *
from qa import *
from preprocess import *
from keras.models import load_model, Model
from keras.layers import Dense, Activation, Dropout, Input, LSTM, Reshape, Lambda, RepeatVector
from keras.initializers import glorot_uniform
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras import backend as K

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


Using TensorFlow backend.


# Process Music Data

Value as a note, which comprises a pitch and a duration. For example, if you press down a specific piano key for 0.5 seconds, then you have just played a note. 

In [2]:
#"R,0.125" : a rest element of  (1/32) length, or 1/8 quarter note. 
#"C,0.125<M-2,m-6>" : chord note of (1/32) length, generated
#                             anywhere from minor 6th down to major 2nd down.

In [3]:
# get data
chords, abstract_grammars = get_musical_data('./midi/original_metheny.mid')
corpus, values, val_indices, indices_val = get_corpus_data(abstract_grammars)
print('corpus length:', len(corpus))
print('total # of values:', len(values))

corpus length: 193
total # of values: 78


In [4]:
chords[1][0].offset

476.0

In [5]:
chords[1][1].offset

478.0

In [6]:
chords[2][0].offset

480.0

In [7]:
abstract_grammars

['C,0.500 C,0.333,<P1,d-5> C,0.250,<M-2,m-6> C,0.250,<M3,d-3> C,0.250,<d6,m2> C,0.250,<d5,P1> C,0.667,<M2,d-4> C,0.250,<d1,P-5> C,0.250,<P-4,d-8> S,0.250,<d4,M-2> S,0.250,<P5,A1> C,0.250,<d5,P1>',
 'C,0.500 S,0.250,<m2,P-4> C,0.250,<P4,m-2> A,0.250,<P4,m-2> S,0.500,<d1,P-5> C,0.250,<P1,d-5> C,0.250,<m2,P-4> A,0.250,<m2,P-4> C,0.250,<M2,d-4> A,0.250,<d4,M-2> C,0.250,<P4,m-2> C,0.250,<P4,m-2>',
 'C,0.250 C,0.250,<d2,A-4> A,0.250,<m-2,d-6> C,0.250,<d3,M-3> A,0.250,<M3,d-3> C,0.250,<d6,m2> C,0.250,<A4,d-2> C,0.250,<d3,M-3> C,0.250,<m2,P-4> A,0.250,<m2,P-4> S,0.250,<P1,d-5> C,0.250,<m2,P-4> C,0.250,<P1,d-5> C,0.250,<m2,P-4> A,0.250,<m2,P-4>',
 'C,0.250 S,0.250,<d6,m2> X,0.250,<A4,d-2> S,0.250,<dd5,d1> C,0.250,<P4,m-2> C,0.250,<M3,d-3> C,0.250,<dd5,d1> S,0.250,<P4,m-2> C,0.250,<P4,m-2> C,0.250,<M3,d-3> C,0.250,<dd5,d1> A,0.250,<M3,d-3>',
 'C,0.500 A,0.333,<M2,d-4> C,0.250,<d4,M-2> A,0.333,<M2,d-4> C,0.250,<m2,P-4> C,0.250,<A-4,P-8> C,0.250,<P4,m-2> A,0.250,<P4,m-2> S,0.250,<d5,P1> A,0.333,<P

In [8]:
corpus

['C,0.500',
 'C,0.333,<P1,d-5>',
 'C,0.250,<M-2,m-6>',
 'C,0.250,<M3,d-3>',
 'C,0.250,<d6,m2>',
 'C,0.250,<d5,P1>',
 'C,0.667,<M2,d-4>',
 'C,0.250,<d1,P-5>',
 'C,0.250,<P-4,d-8>',
 'S,0.250,<d4,M-2>',
 'S,0.250,<P5,A1>',
 'C,0.250,<d5,P1>',
 'C,0.500',
 'S,0.250,<m2,P-4>',
 'C,0.250,<P4,m-2>',
 'A,0.250,<P4,m-2>',
 'S,0.500,<d1,P-5>',
 'C,0.250,<P1,d-5>',
 'C,0.250,<m2,P-4>',
 'A,0.250,<m2,P-4>',
 'C,0.250,<M2,d-4>',
 'A,0.250,<d4,M-2>',
 'C,0.250,<P4,m-2>',
 'C,0.250,<P4,m-2>',
 'C,0.250',
 'C,0.250,<d2,A-4>',
 'A,0.250,<m-2,d-6>',
 'C,0.250,<d3,M-3>',
 'A,0.250,<M3,d-3>',
 'C,0.250,<d6,m2>',
 'C,0.250,<A4,d-2>',
 'C,0.250,<d3,M-3>',
 'C,0.250,<m2,P-4>',
 'A,0.250,<m2,P-4>',
 'S,0.250,<P1,d-5>',
 'C,0.250,<m2,P-4>',
 'C,0.250,<P1,d-5>',
 'C,0.250,<m2,P-4>',
 'A,0.250,<m2,P-4>',
 'C,0.250',
 'S,0.250,<d6,m2>',
 'X,0.250,<A4,d-2>',
 'S,0.250,<dd5,d1>',
 'C,0.250,<P4,m-2>',
 'C,0.250,<M3,d-3>',
 'C,0.250,<dd5,d1>',
 'S,0.250,<P4,m-2>',
 'C,0.250,<P4,m-2>',
 'C,0.250,<M3,d-3>',
 'C,0.250,

In [9]:
values

{'A,0.250,<M2,d-4>',
 'A,0.250,<M3,d-3>',
 'A,0.250,<P-4,d-8>',
 'A,0.250,<P1,d-5>',
 'A,0.250,<P4,m-2>',
 'A,0.250,<d4,M-2>',
 'A,0.250,<d5,P1>',
 'A,0.250,<m-2,d-6>',
 'A,0.250,<m2,P-4>',
 'A,0.333,<A4,d-2>',
 'A,0.333,<M2,d-4>',
 'A,0.333,<P1,d-5>',
 'C,0.250',
 'C,0.250,<A-4,P-8>',
 'C,0.250,<A4,d-2>',
 'C,0.250,<M-2,m-6>',
 'C,0.250,<M2,d-4>',
 'C,0.250,<M3,d-3>',
 'C,0.250,<P-4,d-8>',
 'C,0.250,<P1,d-5>',
 'C,0.250,<P11,M7>',
 'C,0.250,<P4,m-2>',
 'C,0.250,<P5,A1>',
 'C,0.250,<d1,P-5>',
 'C,0.250,<d2,A-4>',
 'C,0.250,<d3,M-3>',
 'C,0.250,<d4,M-2>',
 'C,0.250,<d5,P1>',
 'C,0.250,<d6,m2>',
 'C,0.250,<dd5,d1>',
 'C,0.250,<m-2,d-6>',
 'C,0.250,<m2,P-4>',
 'C,0.250,<m3,m-3>',
 'C,0.250,<m7,M3>',
 'C,0.333,<A4,d-2>',
 'C,0.333,<M2,d-4>',
 'C,0.333,<P1,d-5>',
 'C,0.333,<P5,A1>',
 'C,0.333,<d1,P-5>',
 'C,0.333,<m2,P-4>',
 'C,0.333,<m3,m-3>',
 'C,0.333,<m7,M3>',
 'C,0.500',
 'C,0.500,<P4,m-2>',
 'C,0.500,<m2,P-4>',
 'C,0.500,<m6,M2>',
 'C,0.667,<M2,d-4>',
 'C,0.667,<d6,m2>',
 'C,0.750,<m3

In [10]:
val_indices

{'C,0.250,<M-2,m-6>': 0,
 'C,0.250,<P-4,d-8>': 1,
 'C,0.250,<A-4,P-8>': 2,
 'S,0.750,<d5,P1>': 3,
 'S,0.250,<m3,m-3>': 4,
 'C,0.250,<P1,d-5>': 5,
 'C,0.667,<M2,d-4>': 6,
 'S,0.333': 7,
 'S,0.500,<d1,P-5>': 8,
 'C,0.250,<d1,P-5>': 9,
 'C,0.250,<dd5,d1>': 10,
 'C,0.333,<P5,A1>': 11,
 'C,0.250,<P4,m-2>': 12,
 'C,0.333,<M2,d-4>': 13,
 'C,0.250': 14,
 'S,0.250,<d5,P1>': 15,
 'C,0.250,<d5,P1>': 16,
 'C,0.250,<m2,P-4>': 17,
 'C,0.250,<A4,d-2>': 18,
 'A,0.250,<P1,d-5>': 19,
 'S,0.250,<P5,A1>': 20,
 'C,0.333,<m3,m-3>': 21,
 'S,0.250,<d4,M-2>': 22,
 'A,0.250,<M3,d-3>': 23,
 'C,0.250,<M3,d-3>': 24,
 'S,0.250,<dd5,d1>': 25,
 'C,0.250,<P11,M7>': 26,
 'X,0.250,<m6,M2>': 27,
 'A,0.250,<P-4,d-8>': 28,
 'S,0.250': 29,
 'S,0.250,<d6,m2>': 30,
 'A,0.250,<d5,P1>': 31,
 'A,0.333,<M2,d-4>': 32,
 'C,0.333,<m7,M3>': 33,
 'S,0.250,<P4,m-2>': 34,
 'S,0.750,<m3,m-3>': 35,
 'X,0.250,<M-2,m-6>': 36,
 'C,0.333,<m2,P-4>': 37,
 'C,0.667,<d6,m2>': 38,
 'S,0.250,<M-2,m-6>': 39,
 'A,0.250,<P4,m-2>': 40,
 'A,0.333,<A4,d-

In [11]:
indices_val

{0: 'C,0.250,<M-2,m-6>',
 1: 'C,0.250,<P-4,d-8>',
 2: 'C,0.250,<A-4,P-8>',
 3: 'S,0.750,<d5,P1>',
 4: 'S,0.250,<m3,m-3>',
 5: 'C,0.250,<P1,d-5>',
 6: 'C,0.667,<M2,d-4>',
 7: 'S,0.333',
 8: 'S,0.500,<d1,P-5>',
 9: 'C,0.250,<d1,P-5>',
 10: 'C,0.250,<dd5,d1>',
 11: 'C,0.333,<P5,A1>',
 12: 'C,0.250,<P4,m-2>',
 13: 'C,0.333,<M2,d-4>',
 14: 'C,0.250',
 15: 'S,0.250,<d5,P1>',
 16: 'C,0.250,<d5,P1>',
 17: 'C,0.250,<m2,P-4>',
 18: 'C,0.250,<A4,d-2>',
 19: 'A,0.250,<P1,d-5>',
 20: 'S,0.250,<P5,A1>',
 21: 'C,0.333,<m3,m-3>',
 22: 'S,0.250,<d4,M-2>',
 23: 'A,0.250,<M3,d-3>',
 24: 'C,0.250,<M3,d-3>',
 25: 'S,0.250,<dd5,d1>',
 26: 'C,0.250,<P11,M7>',
 27: 'X,0.250,<m6,M2>',
 28: 'A,0.250,<P-4,d-8>',
 29: 'S,0.250',
 30: 'S,0.250,<d6,m2>',
 31: 'A,0.250,<d5,P1>',
 32: 'A,0.333,<M2,d-4>',
 33: 'C,0.333,<m7,M3>',
 34: 'S,0.250,<P4,m-2>',
 35: 'S,0.750,<m3,m-3>',
 36: 'X,0.250,<M-2,m-6>',
 37: 'C,0.333,<m2,P-4>',
 38: 'C,0.667,<d6,m2>',
 39: 'S,0.250,<M-2,m-6>',
 40: 'A,0.250,<P4,m-2>',
 41: 'A,0.333,<A

In [12]:
# number of different values or words in corpus
N_values = len(set(corpus))

# Create Sentences

In [13]:
# cut the corpus into semi-redundant sequences of max_len values
max_len = 20
sentences = []
next_values = []
step = 3

for i in range(0, len(corpus) - max_len, step):
    sentences.append(corpus[i: i + max_len]) #[i, i+max_len)
    next_values.append(corpus[i + max_len])
print('nb sequences:', len(sentences))

nb sequences: 58


# Transform data into binary matrices

In [14]:
X = np.zeros((len(sentences), max_len, N_values), dtype=np.bool)
y = np.zeros((len(sentences), N_values), dtype=np.bool)

In [15]:
for i, sentence in enumerate(sentences):
    for t, val in enumerate(sentence):
        X[i, t, val_indices[val]] = 1
    y[i, val_indices[next_values[i]]] = 1

In [16]:
X.shape

(58, 20, 78)

# LSTM Models

In [17]:
from __future__ import print_function

from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
import numpy as np

In [18]:
N_epochs=128
# build a 2 stacked LSTM
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape=(max_len, N_values)))
model.add(Dropout(0.2))
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(N_values))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
model.fit(X, y, batch_size=128, nb_epoch=N_epochs)

  if sys.path[0] == '':


Epoch 1/128
Epoch 2/128
Epoch 3/128
Epoch 4/128
Epoch 5/128
Epoch 6/128
Epoch 7/128
Epoch 8/128
Epoch 9/128
Epoch 10/128
Epoch 11/128
Epoch 12/128
Epoch 13/128
Epoch 14/128
Epoch 15/128
Epoch 16/128
Epoch 17/128
Epoch 18/128
Epoch 19/128
Epoch 20/128
Epoch 21/128
Epoch 22/128
Epoch 23/128
Epoch 24/128
Epoch 25/128
Epoch 26/128
Epoch 27/128
Epoch 28/128
Epoch 29/128
Epoch 30/128
Epoch 31/128
Epoch 32/128
Epoch 33/128
Epoch 34/128
Epoch 35/128
Epoch 36/128
Epoch 37/128
Epoch 38/128
Epoch 39/128
Epoch 40/128
Epoch 41/128
Epoch 42/128
Epoch 43/128
Epoch 44/128
Epoch 45/128
Epoch 46/128
Epoch 47/128
Epoch 48/128
Epoch 49/128
Epoch 50/128
Epoch 51/128
Epoch 52/128
Epoch 53/128
Epoch 54/128
Epoch 55/128
Epoch 56/128
Epoch 57/128
Epoch 58/128
Epoch 59/128
Epoch 60/128
Epoch 61/128
Epoch 62/128
Epoch 63/128
Epoch 64/128
Epoch 65/128
Epoch 66/128
Epoch 67/128
Epoch 68/128
Epoch 69/128
Epoch 70/128
Epoch 71/128
Epoch 72/128
Epoch 73/128
Epoch 74/128
Epoch 75/128
Epoch 76/128
Epoch 77/128
Epoch 78

Epoch 101/128
Epoch 102/128
Epoch 103/128
Epoch 104/128
Epoch 105/128
Epoch 106/128
Epoch 107/128
Epoch 108/128
Epoch 109/128
Epoch 110/128
Epoch 111/128
Epoch 112/128
Epoch 113/128
Epoch 114/128
Epoch 115/128
Epoch 116/128
Epoch 117/128
Epoch 118/128
Epoch 119/128
Epoch 120/128
Epoch 121/128
Epoch 122/128
Epoch 123/128
Epoch 124/128
Epoch 125/128
Epoch 126/128
Epoch 127/128
Epoch 128/128


<keras.callbacks.callbacks.History at 0xa3da5a860>

# Predict

In [19]:
def __sample(a, temperature=1.0):
    a = np.log(a) / temperature
    a = np.exp(a) / np.sum(np.exp(a))
    return np.argmax(np.random.multinomial(1, a, 1))

In [20]:
def __predict(model, x, indices_val, diversity):
    preds = model.predict(x, verbose=0)[0]
    next_index = __sample(preds, diversity)
    next_val = indices_val[next_index]
    
    return next_val

In [21]:
#def generate_sonnet(init, model, rounds=600, is_sample=True, temperature=1.0):
#    in_string = init.lower()
#    for i in range(rounds):
#        output = predict_next(string_to_x(in_string), model)
#        if is_sample:
#            n = y_to_char_sample(output, temperature)
#        else:
#            n = y_to_char(output)
#        in_string += n
#    return in_string

In [22]:
def __generate_grammar(model, corpus, abstract_grammars, values, val_indices,
                       indices_val, max_len, max_tries, diversity):
    curr_grammar = ''
    
    # init
    start_index = np.random.randint(0, len(corpus) - max_len)
    sentence = corpus[start_index: start_index + max_len] 
    
    running_length = 0.0
    while running_length <= 4.1:
        x = np.zeros((1, max_len, len(values)))
        for t, val in enumerate(sentence):
            x[0, t, val_indices[val]] = 1.
        
        next_val = __predict(model, x, indices_val, diversity)
        
        if (running_length < 0.00001):
            tries = 0
            while (next_val.split(',')[0] == 'R' or 
                len(next_val.split(',')) != 2):
                
                if tries >= max_tries:
                    print('Gave up on first note generation after', max_tries, 
                        'tries')
                    rand = np.random.randint(0, len(abstract_grammars))
                    next_val = abstract_grammars[rand].split(' ')[0]
                else:
                    next_val = __predict(model, x, indices_val, diversity)
                
                tries += 1
        
        sentence = sentence[1:]  
        sentence.append(next_val)
        
        if (running_length > 0.00001): curr_grammar += ' '
        curr_grammar += next_val
        
        length = float(next_val.split(',')[1])
        running_length += length
    return curr_grammar

In [23]:
max_len = 20
max_tries = 1000
diversity = 1

# musical settings
bpm = 130

In [24]:
# set up audio stream
out_stream = stream.Stream()



In [25]:
curr_offset = 0.0

In [26]:
import pygame

from pygame.locals import *

In [27]:
loopEnd = len(chords)

In [28]:
for loopIndex in range(1, loopEnd):
    curr_chords = stream.Voice()
    for j in chords[loopIndex]:
        curr_chords.insert((j.offset % 4), j)
        
    # generate grammar
    curr_grammar = __generate_grammar(model=model, corpus=corpus, 
                                      abstract_grammars=abstract_grammars, 
                                      values=values, val_indices=val_indices, 
                                      indices_val=indices_val, 
                                      max_len=max_len, max_tries=max_tries,
                                      diversity=diversity)
    
    print(curr_grammar)

    curr_grammar = curr_grammar.replace(' A',' C').replace(' X',' C')

    # Pruning #1: smoothing measure
    curr_grammar = prune_grammar(curr_grammar)

    # Get notes from grammar and chords
    curr_notes = unparse_grammar(curr_grammar, curr_chords)
    
    print(curr_notes)
    
    # Pruning #2: removing repeated and too close together notes
    curr_notes = prune_notes(curr_notes)

    # quality assurance: clean up notes
    curr_notes = clean_up_notes(curr_notes)

    # print # of notes in curr_notes
    print('After pruning: %s notes' % (len([i for i in curr_notes
        if isinstance(i, note.Note)])))
    
    # insert into the output stream
    for m in curr_notes:
        out_stream.insert(curr_offset + m.offset, m)
    
    for mc in curr_chords:
        out_stream.insert(curr_offset + mc.offset, mc)
        
    curr_offset += 4.0

out_stream.insert(0.0, tempo.MetronomeMark(number=bpm))

C,0.250 S,0.250,<P4,m-2> S,0.250,<d2,A-4> C,0.250,<d1,P-5> S,0.250,<d2,A-4> C,0.250,<d2,A-4> C,0.250,<d2,A-4> C,0.250,<d2,A-4> C,0.250,<d1,P-5> A,0.250,<d5,P1> C,0.250,<d4,M-2> A,0.250,<d5,P1> A,0.250,<P1,d-5> A,0.250,<P1,d-5> A,0.250,<P1,d-5> A,0.250,<P1,d-5> C,0.250,<M3,d-3>
<music21.stream.Voice 0xa3c93ab38>
After pruning: 17 notes
C,0.250 C,0.250,<P4,m-2> C,0.250 C,0.250 C,0.250 C,0.250,<m2,P-4> C,0.250,<m2,P-4> C,0.250,<m2,P-4> C,0.250,<m2,P-4> S,0.250 C,0.333,<d1,P-5> C,0.250,<m2,P-4> C,0.250,<m2,P-4> C,0.250,<m2,P-4> C,0.250,<m2,P-4> C,0.250,<m2,P-4> C,0.250,<m2,P-4>
<music21.stream.Voice 0xa3df55f60>
After pruning: 17 notes
S,0.333 A,0.250,<m-2,d-6> C,0.250,<d5,P1> C,0.250,<d5,P1> C,0.250,<d5,P1> S,0.333 S,0.333 S,0.333 C,0.250,<m2,P-4> C,0.250,<m2,P-4> C,0.250,<m2,P-4> C,0.250,<m2,P-4> S,0.250,<m2,P-4> C,0.250,<m7,M3> C,0.250,<m7,M3> C,0.250,<m7,M3>
<music21.stream.Voice 0xa3e7fe4e0>
After pruning: 16 notes
S,0.250 S,0.250 C,0.250,<m2,P-4> C,0.250,<m2,P-4> C,0.250,<m2,P-4> C,0

In [34]:
play = lambda x: midi.realtime.StreamPlayer(x).play()
play(out_stream)

In [35]:
out_fn = './midi/generated1.mid'

In [32]:
mf = midi.translate.streamToMidiFile(out_stream)


In [36]:
mf.open(out_fn, 'wb')
mf.write()
mf.close()