In [3]:
'''
Author:     Ji-Sung Kim
Project:    deepjazz
Purpose:    Parse, cleanup and process data.

Code adapted from Evan Chow's jazzml, https://github.com/evancchow/jazzml with
express permission.
'''

from __future__ import print_function

from music21 import *
from collections import defaultdict, OrderedDict
from itertools import groupby, zip_longest
from grammar import *

#----------------------------HELPER FUNCTIONS----------------------------------#

''' Helper function to parse a MIDI file into its measures and chords '''
def __parse_midi(data_fn):
    # Parse the MIDI data for separate melody and accompaniment parts.
    midi_data = converter.parse(data_fn)
    # Get melody part, compress into single voice.
    melody_stream = midi_data[5]     # For Metheny piece, Melody is Part #5.
    melody1, melody2 = melody_stream.getElementsByClass(stream.Voice)
    for j in melody2:
        melody1.insert(j.offset, j)
    melody_voice = melody1

    for i in melody_voice:
        if i.quarterLength == 0.0:
            i.quarterLength = 0.25

    # Change key signature to adhere to comp_stream (1 sharp, mode = major).
    # Also add Electric Guitar. 
    melody_voice.insert(0, instrument.ElectricGuitar())
    

    # The accompaniment parts. Take only the best subset of parts from
    # the original data. Maybe add more parts, hand-add valid instruments.
    # Should add least add a string part (for sparse solos).
    # Verified are good parts: 0, 1, 6, 7 '''
    partIndices = [0, 1, 6, 7]
    comp_stream = stream.Voice()
    comp_stream.append([j.flat for i, j in enumerate(midi_data) 
        if i in partIndices])

    # Full stream containing both the melody and the accompaniment. 
    # All parts are flattened. 
    full_stream = stream.Voice()
    for i in range(len(comp_stream)):
        full_stream.append(comp_stream[i])
    full_stream.append(melody_voice)

    # Extract solo stream, assuming you know the positions ..ByOffset(i, j).
    # Note that for different instruments (with stream.flat), you NEED to use
    # stream.Part(), not stream.Voice().
    # Accompanied solo is in range [478, 548)
    solo_stream = stream.Voice()
    for part in full_stream:
        curr_part = stream.Part()
        curr_part.append(part.getElementsByClass(instrument.Instrument))
        curr_part.append(part.getElementsByClass(tempo.MetronomeMark))
        curr_part.append(part.getElementsByClass(key.KeySignature))
        curr_part.append(part.getElementsByClass(meter.TimeSignature))
        curr_part.append(part.getElementsByOffset(476, 548, 
                                                  includeEndBoundary=True))
        cp = curr_part.flat
        solo_stream.insert(cp)

    # Group by measure so you can classify. 
    # Note that measure 0 is for the time signature, metronome, etc. which have
    # an offset of 0.0.
    melody_stream = solo_stream[-1]
    measures = OrderedDict()
    offsetTuples = [(int(n.offset / 4), n) for n in melody_stream]
    measureNum = 0 # for now, don't use real m. nums (119, 120)
    for key_x, group in groupby(offsetTuples, lambda x: x[0]):
        measures[measureNum] = [n[1] for n in group]
        measureNum += 1

    # Get the stream of chords.
    # offsetTuples_chords: group chords by measure number.
    chordStream = solo_stream[0]
    chordStream.removeByClass(note.Rest)
    chordStream.removeByClass(note.Note)
    offsetTuples_chords = [(int(n.offset / 4), n) for n in chordStream]

    # Generate the chord structure. Use just track 1 (piano) since it is
    # the only instrument that has chords. 
    # Group into 4s, just like before. 
    chords = OrderedDict()
    measureNum = 0
    for key_x, group in groupby(offsetTuples_chords, lambda x: x[0]):
        chords[measureNum] = [n[1] for n in group]
        measureNum += 1

    # Fix for the below problem.
    #   1) Find out why len(measures) != len(chords).
    #   ANSWER: resolves at end but melody ends 1/16 before last measure so doesn't
    #           actually show up, while the accompaniment's beat 1 right after does.
    #           Actually on second thought: melody/comp start on Ab, and resolve to
    #           the same key (Ab) so could actually just cut out last measure to loop.
    #           Decided: just cut out the last measure. 
    del chords[len(chords) - 1]
    assert len(chords) == len(measures)

    return measures, chords

''' Helper function to get the grammatical data from given musical data. '''
def __get_abstract_grammars(measures, chords):
    # extract grammars
    abstract_grammars = []
    for ix in range(1, len(measures)):
        m = stream.Voice()
        for i in measures[ix]:
            m.insert(i.offset, i)
        c = stream.Voice()
        for j in chords[ix]:
            c.insert(j.offset, j)
        parsed = parse_melody(m, c)
        abstract_grammars.append(parsed)

    return abstract_grammars

#----------------------------PUBLIC FUNCTIONS----------------------------------#

''' Get musical data from a MIDI file '''
def get_musical_data(data_fn):
    measures, chords = __parse_midi(data_fn)
    abstract_grammars = __get_abstract_grammars(measures, chords)

    return chords, abstract_grammars

''' Get corpus data from grammatical data '''
def get_corpus_data(abstract_grammars):
    corpus = [x for sublist in abstract_grammars for x in sublist.split(' ')]
    values = set(corpus)
    val_indices = dict((v, i) for i, v in enumerate(values))
    indices_val = dict((i, v) for i, v in enumerate(values))

    return corpus, values, val_indices, indices_val

In [4]:
'''
Author:     Ji-Sung Kim
Project:    deepjazz
Purpose:    Generate jazz using a deep learning model (LSTM in deepjazz).

Some code adapted from Evan Chow's jazzml, https://github.com/evancchow/jazzml 
with express permission.

Code was built while significantly referencing public examples from the
Keras documentation on GitHub:
https://github.com/fchollet/keras/blob/master/examples/lstm_text_generation.py

GPU run command:
    THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python generator.py [# of epochs]

    Note: running Keras/Theano on GPU is formally supported for only NVIDIA cards (CUDA backend).
'''
from __future__ import print_function
import sys

from music21 import *
import numpy as np

from grammar import *
#from preprocess import *
from qa import *
import lstm

In [5]:
#----------------------------HELPER FUNCTIONS----------------------------------#

''' Helper function to sample an index from a probability array '''
def __sample(a, temperature=1.0):
    a = np.log(a) / temperature
    a = np.exp(a) / np.sum(np.exp(a))
    return np.argmax(np.random.multinomial(1, a, 1))

In [6]:
''' Helper function to generate a predicted value from a given matrix '''
def __predict(model, x, indices_val, diversity):
    preds = model.predict(x, verbose=0)[0]
    next_index = __sample(preds, diversity)
    next_val = indices_val[next_index]

    return next_val

In [7]:
''' Helper function which uses the given model to generate a grammar sequence 
    from a given corpus, indices_val (mapping), abstract_grammars (list), 
    and diversity floating point value. '''
def __generate_grammar(model, corpus, abstract_grammars, values, val_indices,
                       indices_val, max_len, max_tries, diversity):
    curr_grammar = ''
    # np.random.randint is exclusive to high
    start_index = np.random.randint(0, len(corpus) - max_len)
    sentence = corpus[start_index: start_index + max_len]    # seed
    running_length = 0.0
    while running_length <= 4.1:    # arbitrary, from avg in input file
        # transform sentence (previous sequence) to matrix
        x = np.zeros((1, max_len, len(values)))
        for t, val in enumerate(sentence):
            if (not val in val_indices): print(val)
            x[0, t, val_indices[val]] = 1.

        next_val = __predict(model, x, indices_val, diversity)

        # fix first note: must not have < > and not be a rest
        if (running_length < 0.00001):
            tries = 0
            while (next_val.split(',')[0] == 'R' or 
                len(next_val.split(',')) != 2):
                # give up after 1000 tries; random from input's first notes
                if tries >= max_tries:
                    print('Gave up on first note generation after', max_tries, 
                        'tries')
                    # np.random is exclusive to high
                    rand = np.random.randint(0, len(abstract_grammars))
                    next_val = abstract_grammars[rand].split(' ')[0]
                else:
                    next_val = __predict(model, x, indices_val, diversity)

                tries += 1

        # shift sentence over with new value
        sentence = sentence[1:] 
        sentence.append(next_val)

        # except for first case, add a ' ' separator
        if (running_length > 0.00001): curr_grammar += ' '
        curr_grammar += next_val

        length = float(next_val.split(',')[1])
        running_length += length

    return curr_grammar

In [8]:
#----------------------------PUBLIC FUNCTIONS----------------------------------#
''' Generates musical sequence based on the given data filename and settings.
    Plays then stores (MIDI file) the generated output. '''
def generate(data_fn, out_fn, N_epochs):
    # model settings
    max_len = 20
    max_tries = 1000
    diversity = 0.5

    # musical settings
    bpm = 130

    # get data
    chords, abstract_grammars = get_musical_data(data_fn)
    corpus, values, val_indices, indices_val = get_corpus_data(abstract_grammars)
    print('corpus length:', len(corpus))
    print('total # of values:', len(values))

    # build model
    model = lstm.build_model(corpus=corpus, val_indices=val_indices, 
                             max_len=max_len, N_epochs=N_epochs)

    # set up audio stream
    out_stream = stream.Stream()

    # generation loop
    curr_offset = 0.0
    loopEnd = len(chords)
    for loopIndex in range(1, loopEnd):
        # get chords from file
        curr_chords = stream.Voice()
        for j in chords[loopIndex]:
            curr_chords.insert((j.offset % 4), j)

        # generate grammar
        curr_grammar = __generate_grammar(model=model, corpus=corpus, 
                                          abstract_grammars=abstract_grammars, 
                                          values=values, val_indices=val_indices, 
                                          indices_val=indices_val, 
                                          max_len=max_len, max_tries=max_tries,
                                          diversity=diversity)

        curr_grammar = curr_grammar.replace(' A',' C').replace(' X',' C')

        # Pruning #1: smoothing measure
        curr_grammar = prune_grammar(curr_grammar)

        # Get notes from grammar and chords
        curr_notes = unparse_grammar(curr_grammar, curr_chords)

        # Pruning #2: removing repeated and too close together notes
        curr_notes = prune_notes(curr_notes)

        # quality assurance: clean up notes
        curr_notes = clean_up_notes(curr_notes)

        # print # of notes in curr_notes
        print('After pruning: %s notes' % (len([i for i in curr_notes
            if isinstance(i, note.Note)])))

        # insert into the output stream
        for m in curr_notes:
            out_stream.insert(curr_offset + m.offset, m)
        for mc in curr_chords:
            out_stream.insert(curr_offset + mc.offset, mc)

        curr_offset += 4.0

    out_stream.insert(0.0, tempo.MetronomeMark(number=bpm))

    # Play the final stream through output (see 'play' lambda function above)
    play = lambda x: midi.realtime.StreamPlayer(x).play()
    play(out_stream)

    # save stream
    mf = midi.translate.streamToMidiFile(out_stream)
    mf.open(out_fn, 'wb')
    mf.write()
    mf.close()

In [9]:
''' Runs generate() -- generating, playing, then storing a musical sequence --
    with the default Metheny file. '''
def main():
    try:
        N_epochs = 1
    except:
        N_epochs = 128 # default

    # i/o settings
    data_fn = 'midi/' + 'original_metheny.mid' # 'And Then I Knew' by Pat Metheny 
    out_fn = 'midi/' 'deepjazz_on_metheny...' + str(N_epochs)
    if (N_epochs == 1): out_fn += '_epoch.midi'
    else:               out_fn += '_epochs.midi'

    generate(data_fn, out_fn, N_epochs)

In [11]:
''' If run as script, execute main '''
if __name__ == '__main__':
    main()

corpus length: 193
total # of values: 78
nb sequences: 58




Epoch 1/1
After pruning: 14 notes
After pruning: 12 notes
After pruning: 12 notes
After pruning: 13 notes
After pruning: 14 notes
After pruning: 12 notes
After pruning: 12 notes
After pruning: 16 notes
After pruning: 13 notes
After pruning: 12 notes
After pruning: 13 notes
After pruning: 12 notes
After pruning: 13 notes
After pruning: 14 notes
After pruning: 15 notes
After pruning: 12 notes
After pruning: 16 notes
After pruning: 11 notes


In [12]:
N_epochs = 1

In [13]:
data_fn = 'midi/' + 'original_metheny.mid' # 'And Then I Knew' by Pat Metheny 

In [14]:
out_fn = 'midi/' 'deepjazz_on_metheny...' + str(N_epochs)

In [15]:
out_fn += '_epoch.midi'

In [16]:
data_fn

'midi/original_metheny.mid'

In [17]:
out_fn

'midi/deepjazz_on_metheny...1_epoch.midi'

In [None]:
generate(data_fn, out_fn, N_epochs)

In [18]:
# model settings
max_len = 20
max_tries = 1000
diversity = 0.5

# musical settings
bpm = 130

In [19]:
# get data
chords, abstract_grammars = get_musical_data(data_fn)

In [26]:
chords[3]

[<music21.chord.Chord E-4 G#3 C4 F3>, <music21.chord.Chord F4 D4 B-3 G3>]

In [27]:
corpus, values, indices_val = get_corpus_data(abstract_grammars)

In [28]:
corpus

['C,0.500',
 'C,0.333,<P1,d-5>',
 'C,0.250,<M-2,m-6>',
 'C,0.250,<M3,d-3>',
 'C,0.250,<d6,m2>',
 'C,0.250,<d5,P1>',
 'C,0.667,<M2,d-4>',
 'C,0.250,<d1,P-5>',
 'C,0.250,<P-4,d-8>',
 'S,0.250,<d4,M-2>',
 'S,0.250,<P5,A1>',
 'C,0.250,<d5,P1>',
 'C,0.500',
 'S,0.250,<m2,P-4>',
 'C,0.250,<P4,m-2>',
 'A,0.250,<P4,m-2>',
 'S,0.500,<d1,P-5>',
 'C,0.250,<P1,d-5>',
 'C,0.250,<m2,P-4>',
 'A,0.250,<m2,P-4>',
 'C,0.250,<M2,d-4>',
 'A,0.250,<d4,M-2>',
 'C,0.250,<P4,m-2>',
 'C,0.250,<P4,m-2>',
 'C,0.250',
 'C,0.250,<d2,A-4>',
 'A,0.250,<m-2,d-6>',
 'C,0.250,<d3,M-3>',
 'A,0.250,<M3,d-3>',
 'C,0.250,<d6,m2>',
 'C,0.250,<A4,d-2>',
 'C,0.250,<d3,M-3>',
 'C,0.250,<m2,P-4>',
 'A,0.250,<m2,P-4>',
 'S,0.250,<P1,d-5>',
 'C,0.250,<m2,P-4>',
 'C,0.250,<P1,d-5>',
 'C,0.250,<m2,P-4>',
 'A,0.250,<m2,P-4>',
 'C,0.250',
 'S,0.250,<d6,m2>',
 'X,0.250,<A4,d-2>',
 'S,0.250,<dd5,d1>',
 'C,0.250,<P4,m-2>',
 'C,0.250,<M3,d-3>',
 'C,0.250,<dd5,d1>',
 'S,0.250,<P4,m-2>',
 'C,0.250,<P4,m-2>',
 'C,0.250,<M3,d-3>',
 'C,0.250,

In [29]:
values

{'A,0.250,<M2,d-4>',
 'A,0.250,<M3,d-3>',
 'A,0.250,<P-4,d-8>',
 'A,0.250,<P1,d-5>',
 'A,0.250,<P4,m-2>',
 'A,0.250,<d4,M-2>',
 'A,0.250,<d5,P1>',
 'A,0.250,<m-2,d-6>',
 'A,0.250,<m2,P-4>',
 'A,0.333,<A4,d-2>',
 'A,0.333,<M2,d-4>',
 'A,0.333,<P1,d-5>',
 'C,0.250',
 'C,0.250,<A-4,P-8>',
 'C,0.250,<A4,d-2>',
 'C,0.250,<M-2,m-6>',
 'C,0.250,<M2,d-4>',
 'C,0.250,<M3,d-3>',
 'C,0.250,<P-4,d-8>',
 'C,0.250,<P1,d-5>',
 'C,0.250,<P11,M7>',
 'C,0.250,<P4,m-2>',
 'C,0.250,<P5,A1>',
 'C,0.250,<d1,P-5>',
 'C,0.250,<d2,A-4>',
 'C,0.250,<d3,M-3>',
 'C,0.250,<d4,M-2>',
 'C,0.250,<d5,P1>',
 'C,0.250,<d6,m2>',
 'C,0.250,<dd5,d1>',
 'C,0.250,<m-2,d-6>',
 'C,0.250,<m2,P-4>',
 'C,0.250,<m3,m-3>',
 'C,0.250,<m7,M3>',
 'C,0.333,<A4,d-2>',
 'C,0.333,<M2,d-4>',
 'C,0.333,<P1,d-5>',
 'C,0.333,<P5,A1>',
 'C,0.333,<d1,P-5>',
 'C,0.333,<m2,P-4>',
 'C,0.333,<m3,m-3>',
 'C,0.333,<m7,M3>',
 'C,0.500',
 'C,0.500,<P4,m-2>',
 'C,0.500,<m2,P-4>',
 'C,0.500,<m6,M2>',
 'C,0.667,<M2,d-4>',
 'C,0.667,<d6,m2>',
 'C,0.750,<m3

In [30]:
val_indices

{'A,0.250,<M2,d-4>': 32,
 'A,0.250,<M3,d-3>': 17,
 'A,0.250,<P-4,d-8>': 44,
 'A,0.250,<P1,d-5>': 66,
 'A,0.250,<P4,m-2>': 47,
 'A,0.250,<d4,M-2>': 29,
 'A,0.250,<d5,P1>': 72,
 'A,0.250,<m-2,d-6>': 26,
 'A,0.250,<m2,P-4>': 76,
 'A,0.333,<A4,d-2>': 61,
 'A,0.333,<M2,d-4>': 20,
 'A,0.333,<P1,d-5>': 52,
 'C,0.250': 39,
 'C,0.250,<A-4,P-8>': 40,
 'C,0.250,<A4,d-2>': 63,
 'C,0.250,<M-2,m-6>': 41,
 'C,0.250,<M2,d-4>': 45,
 'C,0.250,<M3,d-3>': 35,
 'C,0.250,<P-4,d-8>': 48,
 'C,0.250,<P1,d-5>': 0,
 'C,0.250,<P11,M7>': 70,
 'C,0.250,<P4,m-2>': 8,
 'C,0.250,<P5,A1>': 74,
 'C,0.250,<d1,P-5>': 15,
 'C,0.250,<d2,A-4>': 16,
 'C,0.250,<d3,M-3>': 6,
 'C,0.250,<d4,M-2>': 1,
 'C,0.250,<d5,P1>': 5,
 'C,0.250,<d6,m2>': 34,
 'C,0.250,<dd5,d1>': 51,
 'C,0.250,<m-2,d-6>': 71,
 'C,0.250,<m2,P-4>': 49,
 'C,0.250,<m3,m-3>': 18,
 'C,0.250,<m7,M3>': 64,
 'C,0.333,<A4,d-2>': 13,
 'C,0.333,<M2,d-4>': 62,
 'C,0.333,<P1,d-5>': 9,
 'C,0.333,<P5,A1>': 14,
 'C,0.333,<d1,P-5>': 59,
 'C,0.333,<m2,P-4>': 46,
 'C,0.333,<m3,m

In [31]:
indices_val

{0: 'C,0.250,<P1,d-5>',
 1: 'C,0.250,<d4,M-2>',
 2: 'C,0.500',
 3: 'S,0.250,<P4,m-2>',
 4: 'S,0.250,<d2,A-4>',
 5: 'C,0.250,<d5,P1>',
 6: 'C,0.250,<d3,M-3>',
 7: 'X,0.250,<d1,P-5>',
 8: 'C,0.250,<P4,m-2>',
 9: 'C,0.333,<P1,d-5>',
 10: 'S,0.500,<d1,P-5>',
 11: 'S,0.250,<dd5,d1>',
 12: 'S,0.250,<P5,A1>',
 13: 'C,0.333,<A4,d-2>',
 14: 'C,0.333,<P5,A1>',
 15: 'C,0.250,<d1,P-5>',
 16: 'C,0.250,<d2,A-4>',
 17: 'A,0.250,<M3,d-3>',
 18: 'C,0.250,<m3,m-3>',
 19: 'S,0.250,<A4,d-2>',
 20: 'A,0.333,<M2,d-4>',
 21: 'S,0.250,<d1,P-5>',
 22: 'C,0.500,<m6,M2>',
 23: 'S,0.333',
 24: 'X,0.250,<d2,A-4>',
 25: 'S,0.250,<m3,m-3>',
 26: 'A,0.250,<m-2,d-6>',
 27: 'C,0.333,<m7,M3>',
 28: 'S,0.750,<d5,P1>',
 29: 'A,0.250,<d4,M-2>',
 30: 'S,0.250,<d4,M-2>',
 31: 'C,0.750,<m3,m-3>',
 32: 'A,0.250,<M2,d-4>',
 33: 'S,0.750,<m3,m-3>',
 34: 'C,0.250,<d6,m2>',
 35: 'C,0.250,<M3,d-3>',
 36: 'S,0.333,<d1,P-5>',
 37: 'C,0.500,<P4,m-2>',
 38: 'X,0.250,<A4,d-2>',
 39: 'C,0.250',
 40: 'C,0.250,<A-4,P-8>',
 41: 'C,0.250,<M-

In [33]:
abstract_grammars

['C,0.500 C,0.333,<P1,d-5> C,0.250,<M-2,m-6> C,0.250,<M3,d-3> C,0.250,<d6,m2> C,0.250,<d5,P1> C,0.667,<M2,d-4> C,0.250,<d1,P-5> C,0.250,<P-4,d-8> S,0.250,<d4,M-2> S,0.250,<P5,A1> C,0.250,<d5,P1>',
 'C,0.500 S,0.250,<m2,P-4> C,0.250,<P4,m-2> A,0.250,<P4,m-2> S,0.500,<d1,P-5> C,0.250,<P1,d-5> C,0.250,<m2,P-4> A,0.250,<m2,P-4> C,0.250,<M2,d-4> A,0.250,<d4,M-2> C,0.250,<P4,m-2> C,0.250,<P4,m-2>',
 'C,0.250 C,0.250,<d2,A-4> A,0.250,<m-2,d-6> C,0.250,<d3,M-3> A,0.250,<M3,d-3> C,0.250,<d6,m2> C,0.250,<A4,d-2> C,0.250,<d3,M-3> C,0.250,<m2,P-4> A,0.250,<m2,P-4> S,0.250,<P1,d-5> C,0.250,<m2,P-4> C,0.250,<P1,d-5> C,0.250,<m2,P-4> A,0.250,<m2,P-4>',
 'C,0.250 S,0.250,<d6,m2> X,0.250,<A4,d-2> S,0.250,<dd5,d1> C,0.250,<P4,m-2> C,0.250,<M3,d-3> C,0.250,<dd5,d1> S,0.250,<P4,m-2> C,0.250,<P4,m-2> C,0.250,<M3,d-3> C,0.250,<dd5,d1> A,0.250,<M3,d-3>',
 'C,0.500 A,0.333,<M2,d-4> C,0.250,<d4,M-2> A,0.333,<M2,d-4> C,0.250,<m2,P-4> C,0.250,<A-4,P-8> C,0.250,<P4,m-2> A,0.250,<P4,m-2> S,0.250,<d5,P1> A,0.333,<P

In [None]:
print('corpus length:', len(corpus))
print('total # of values:', len(values))