Modularized code

jisungk · Apr 21, 2016 · 1aec4c2 · 1aec4c2
1 parent c4c22b8
commit 1aec4c2
Show file tree

Hide file tree

Showing 4 changed files with 119 additions and 100 deletions.
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 ### Using Keras & Theano for deep learning driven jazz generation
 
-I built [*deepjazz*](http://deepjazz.io) in 36 hours for HackPrinceton, Spring 2016. It uses Keras & Theano, two deep learning libraries, to generate jazz music. Specifically, it builds a two-layer [LSTM](http://deeplearning.net/tutorial/lstm.html), learning from the given MIDI file. It uses deep learning, the AI tech that powers [Google's AlphaGo](https://deepmind.com/alpha-go.html) and [IBM's Watson](https://www.ibm.com/smarterplanet/us/en/ibmwatson/what-is-watson.html), **to make music -- something that's considered as deeply human**.
+I built [*deepjazz*](http://deepjazz.io) in 36 hours at a hackathon. It uses Keras & Theano, two deep learning libraries, to generate jazz music. Specifically, it builds a two-layer [LSTM](http://deeplearning.net/tutorial/lstm.html), learning from the given MIDI file. It uses deep learning, the AI tech that powers [Google's AlphaGo](https://deepmind.com/alpha-go.html) and [IBM's Watson](https://www.ibm.com/smarterplanet/us/en/ibmwatson/what-is-watson.html), **to make music -- something that's considered as deeply human**.
 
 [![SoundCloud](https://jisungk.github.io/deepjazz/img/button_soundcloud.png)](https://soundcloud.com/deepjazz-ai)  
 Check out deepjazz's music on **[SoundCloud](https://soundcloud.com/deepjazz-ai)**!
@@ -35,7 +35,7 @@ jisungk@princeton.edu
 
 ### Citations
 
-This project was inspired by and adapts a lot of preprocessing code (with permission) from Evan Chow's [jazzml](https://github.com/evancchow/jazzml). Thank you [Evan](https://www.linkedin.com/in/evancchow)! Public examples from the [Keras documentation](https://github.com/fchollet/keras) were also referenced.
+This project develops a lot of preprocessing code (with permission) from Evan Chow's [jazzml](https://github.com/evancchow/jazzml). Thank you [Evan](https://www.linkedin.com/in/evancchow)! Public examples from the [Keras documentation](https://github.com/fchollet/keras) were also referenced.
 
 ### Code License, Media Copyright
 

diff --git a/generator.py b/generator.py
@@ -43,16 +43,16 @@ def __predict(model, x, indices_val, diversity):
 ''' Helper function which uses the given model to generate a grammar sequence 
     from a given corpus, indices_val (mapping), abstract_grammars (list), 
     and diversity floating point value. '''
-def __generate_grammar(model, corpus, abstract_grammars, indices_val, 
-                       diversity):
+def __generate_grammar(model, corpus, abstract_grammars, values, val_indices,
+                       indices_val, max_len, max_tries, diversity):
     curr_grammar = ''
     # np.random.randint is exclusive to high
-    start_index = np.random.randint(0, len(corpus) - maxlen)
-    sentence = corpus[start_index: start_index + maxlen]    # seed
+    start_index = np.random.randint(0, len(corpus) - max_len)
+    sentence = corpus[start_index: start_index + max_len]    # seed
     running_length = 0.0
     while running_length <= 4.1:    # arbitrary, from avg in input file
         # transform sentence (previous sequence) to matrix
-        x = np.zeros((1, maxlen, len(values)))
+        x = np.zeros((1, max_len, len(values)))
         for t, val in enumerate(sentence):
             if (not val in val_indices): print(val)
             x[0, t, val_indices[val]] = 1.
@@ -89,84 +89,103 @@ def __generate_grammar(model, corpus, abstract_grammars, indices_val,
 
     return curr_grammar
 
-#---------------------------------SCRIPT---------------------------------------#
-
-# model settings
-diversity = 0.5
-maxlen = 20
-max_tries = 1000
-N_epochs = int(sys.argv[1])
-
-# i/o settings
-fn = 'midi/' + 'original_metheny.mid' # 'And Then I Knew' by Pat Metheny 
-outfn = 'midi/' 'deepjazz_on_metheny...' + str(N_epochs)
-if (N_epochs == 1): outfn += '_epoch.midi'
-else:               outfn += '_epochs.midi'
-
-# musical settings
-bpm = 130
-
-# get data
-chords, abstract_grammars = get_musical_data(fn)
-corpus, val_indices, indices_val = get_corpus_data(abstract_grammars)
-values = set(corpus)
-print('corpus length:', len(corpus))
-print('total # of values:', len(values))
-
-# build model
-model = lstm.build_model(corpus=corpus, val_indices=val_indices, maxlen=maxlen,
-                         N_epochs=N_epochs)
-
-# set up audio stream
-out_stream = stream.Stream()
-play = lambda x: midi.realtime.StreamPlayer(x).play()
-stop = lambda: pygame.mixer.music.stop()
-
-# generation loop
-curr_offset = 0.0
-loopEnd = len(chords)
-for loopIndex in range(1, loopEnd):
-    # get chords from file
-    curr_chords = stream.Voice()
-    for j in chords[loopIndex]:
-        curr_chords.insert((j.offset % 4), j)
-
-    # generate grammar
-    curr_grammar = __generate_grammar(model, corpus, abstract_grammars, 
-                                      indices_val, diversity)
-    curr_grammar = curr_grammar.replace(' A',' C').replace(' X',' C')
-
-    # Pruning #1: smoothing measure
-    curr_grammar = prune_grammar(curr_grammar)
-
-    # Get notes from grammar and chords
-    curr_notes = unparse_grammar(curr_grammar, curr_chords)
-
-    # Pruning #2: removing repeated and too close together notes
-    curr_notes = prune_notes(curr_notes)
-
-    # quality assurance: clean up notes
-    curr_notes = clean_up_notes(curr_notes)
-
-    # print # of notes in curr_notes
-    print('After pruning: %s notes' % (len([i for i in curr_notes
-        if isinstance(i, note.Note)])))
-
-    # insert into the output stream
-    for m in curr_notes:
-        out_stream.insert(curr_offset + m.offset, m)
-    for mc in curr_chords:
-        out_stream.insert(curr_offset + mc.offset, mc)
-
-    curr_offset += 4.0
-
-out_stream.insert(0.0, tempo.MetronomeMark(number=bpm))
-
-# Play the final stream through output (see 'play' lambda function above)
-play(out_stream)
-
-# save stream
-mf = midi.translate.streamToMidiFile(out_stream)
-mf.open(outfn, 'wb')
-mf.write()
-mf.close()
+#----------------------------PUBLIC FUNCTIONS----------------------------------#
+''' Generates musical sequence based on the given data filename and settings.
+    Plays then stores (MIDI file) the generated output. '''
+def generate(data_fn, out_fn, N_epochs):
+    # model settings
+    max_len = 20
+    max_tries = 1000
+    diversity = 0.5
+
+    # musical settings
+    bpm = 130
+
+    # get data
+    chords, abstract_grammars = get_musical_data(data_fn)
+    corpus, values, val_indices, indices_val = get_corpus_data(abstract_grammars)
+    print('corpus length:', len(corpus))
+    print('total # of values:', len(values))
+
+    # build model
+    model = lstm.build_model(corpus=corpus, val_indices=val_indices, 
+                             max_len=max_len, N_epochs=N_epochs)
+
+    # set up audio stream
+    out_stream = stream.Stream()
+
+    # generation loop
+    curr_offset = 0.0
+    loopEnd = len(chords)
+    for loopIndex in range(1, loopEnd):
+        # get chords from file
+        curr_chords = stream.Voice()
+        for j in chords[loopIndex]:
+            curr_chords.insert((j.offset % 4), j)
+
+        # generate grammar
+        curr_grammar = __generate_grammar(model=model, corpus=corpus, 
+                                          abstract_grammars=abstract_grammars, 
+                                          values=values, val_indices=val_indices, 
+                                          indices_val=indices_val, 
+                                          max_len=max_len, max_tries=max_tries,
+                                          diversity=diversity)
+
+        curr_grammar = curr_grammar.replace(' A',' C').replace(' X',' C')
+
+        # Pruning #1: smoothing measure
+        curr_grammar = prune_grammar(curr_grammar)
+
+        # Get notes from grammar and chords
+        curr_notes = unparse_grammar(curr_grammar, curr_chords)
+
+        # Pruning #2: removing repeated and too close together notes
+        curr_notes = prune_notes(curr_notes)
+
+        # quality assurance: clean up notes
+        curr_notes = clean_up_notes(curr_notes)
+
+        # print # of notes in curr_notes
+        print('After pruning: %s notes' % (len([i for i in curr_notes
+            if isinstance(i, note.Note)])))
+
+        # insert into the output stream
+        for m in curr_notes:
+            out_stream.insert(curr_offset + m.offset, m)
+        for mc in curr_chords:
+            out_stream.insert(curr_offset + mc.offset, mc)
+
+        curr_offset += 4.0
+
+    out_stream.insert(0.0, tempo.MetronomeMark(number=bpm))
+
+    # Play the final stream through output (see 'play' lambda function above)
+    play = lambda x: midi.realtime.StreamPlayer(x).play()
+    play(out_stream)
+
+    # save stream
+    mf = midi.translate.streamToMidiFile(out_stream)
+    mf.open(out_fn, 'wb')
+    mf.write()
+    mf.close()
+
+''' Runs generate() -- generating, playing, thene storing a musical sequence --
+    with the default Metheny file. '''
+def main(args):
+    try:
+        N_epochs = int(args[1])
+    except:
+        N_epochs = 16 # default
+
+    # i/o settings
+    data_fn = 'midi/' + 'original_metheny.mid' # 'And Then I Knew' by Pat Metheny 
+    out_fn = 'midi/' 'deepjazz_on_metheny...' + str(N_epochs)
+    if (N_epochs == 1): out_fn += '_epoch.midi'
+    else:               out_fn += '_epochs.midi'
+
+    generate(data_fn, out_fn, N_epochs)
+
+''' If run as script, execute main '''
+if __name__ == '__main__':
+    import sys
+    main(sys.argv)
diff --git a/lstm.py b/lstm.py
@@ -16,21 +16,21 @@
 import numpy as np
 
 ''' Build a 2-layer LSTM from a training corpus '''
-def build_model(corpus, val_indices, maxlen, N_epochs=128):
+def build_model(corpus, val_indices, max_len, N_epochs=128):
     # number of different values or words in corpus
     N_values = len(set(corpus))
 
-    # cut the corpus in semi-redundant sequences of maxlen values
+    # cut the corpus in semi-redundant sequences of max_len values
     step = 3
     sentences = []
     next_values = []
-    for i in range(0, len(corpus) - maxlen, step):
-        sentences.append(corpus[i: i + maxlen])
-        next_values.append(corpus[i + maxlen])
+    for i in range(0, len(corpus) - max_len, step):
+        sentences.append(corpus[i: i + max_len])
+        next_values.append(corpus[i + max_len])
     print('nb sequences:', len(sentences))
 
     # transform data into binary matrices
-    X = np.zeros((len(sentences), maxlen, N_values), dtype=np.bool)
+    X = np.zeros((len(sentences), max_len, N_values), dtype=np.bool)
     y = np.zeros((len(sentences), N_values), dtype=np.bool)
     for i, sentence in enumerate(sentences):
         for t, val in enumerate(sentence):
@@ -39,7 +39,7 @@ def build_model(corpus, val_indices, maxlen, N_epochs=128):
 
     # build a 2 stacked LSTM
     model = Sequential()
-    model.add(LSTM(128, return_sequences=True, input_shape=(maxlen, N_values)))
+    model.add(LSTM(128, return_sequences=True, input_shape=(max_len, N_values)))
     model.add(Dropout(0.2))
     model.add(LSTM(128, return_sequences=False))
     model.add(Dropout(0.2))

diff --git a/preprocess.py b/preprocess.py
@@ -17,9 +17,9 @@
 #----------------------------HELPER FUNCTIONS----------------------------------#
 
 ''' Helper function to parse a MIDI file into its measures and chords '''
-def __parse_midi(fn):
+def __parse_midi(data_fn):
     # Parse the MIDI data for separate melody and accompaniment parts.
-    midi_data = converter.parse(fn)
+    midi_data = converter.parse(data_fn)
     # Get melody part, compress into single voice.
     melody_stream = midi_data[5]     # For Metheny piece, Melody is Part #5.
     melody1, melody2 = melody_stream.getElementsByClass(stream.Voice)
@@ -126,8 +126,8 @@ def __get_abstract_grammars(measures, chords):
 #----------------------------PUBLIC FUNCTIONS----------------------------------#
 
 ''' Get musical data from a MIDI file '''
-def get_musical_data(fn):
-    measures, chords = __parse_midi(fn)
+def get_musical_data(data_fn):
+    measures, chords = __parse_midi(data_fn)
     abstract_grammars = __get_abstract_grammars(measures, chords)
 
     return chords, abstract_grammars
@@ -139,4 +139,4 @@ def get_corpus_data(abstract_grammars):
     val_indices = dict((v, i) for i, v in enumerate(values))
     indices_val = dict((i, v) for i, v in enumerate(values))
 
-    return corpus, val_indices, indices_val
+    return corpus, values, val_indices, indices_val