Skip to content

Commit

Permalink
Modularized code
Browse files Browse the repository at this point in the history
  • Loading branch information
Ji-Sung Linux committed Apr 21, 2016
1 parent c4c22b8 commit 1aec4c2
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 100 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

### Using Keras & Theano for deep learning driven jazz generation

I built [*deepjazz*](http://deepjazz.io) in 36 hours for HackPrinceton, Spring 2016. It uses Keras & Theano, two deep learning libraries, to generate jazz music. Specifically, it builds a two-layer [LSTM](http://deeplearning.net/tutorial/lstm.html), learning from the given MIDI file. It uses deep learning, the AI tech that powers [Google's AlphaGo](https://deepmind.com/alpha-go.html) and [IBM's Watson](https://www.ibm.com/smarterplanet/us/en/ibmwatson/what-is-watson.html), **to make music -- something that's considered as deeply human**.
I built [*deepjazz*](http://deepjazz.io) in 36 hours at a hackathon. It uses Keras & Theano, two deep learning libraries, to generate jazz music. Specifically, it builds a two-layer [LSTM](http://deeplearning.net/tutorial/lstm.html), learning from the given MIDI file. It uses deep learning, the AI tech that powers [Google's AlphaGo](https://deepmind.com/alpha-go.html) and [IBM's Watson](https://www.ibm.com/smarterplanet/us/en/ibmwatson/what-is-watson.html), **to make music -- something that's considered as deeply human**.

[![SoundCloud](https://jisungk.github.io/deepjazz/img/button_soundcloud.png)](https://soundcloud.com/deepjazz-ai)
Check out deepjazz's music on **[SoundCloud](https://soundcloud.com/deepjazz-ai)**!
Expand Down Expand Up @@ -35,7 +35,7 @@ jisungk@princeton.edu

### Citations

This project was inspired by and adapts a lot of preprocessing code (with permission) from Evan Chow's [jazzml](https://github.com/evancchow/jazzml). Thank you [Evan](https://www.linkedin.com/in/evancchow)! Public examples from the [Keras documentation](https://github.com/fchollet/keras) were also referenced.
This project develops a lot of preprocessing code (with permission) from Evan Chow's [jazzml](https://github.com/evancchow/jazzml). Thank you [Evan](https://www.linkedin.com/in/evancchow)! Public examples from the [Keras documentation](https://github.com/fchollet/keras) were also referenced.

### Code License, Media Copyright

Expand Down
191 changes: 105 additions & 86 deletions generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,16 @@ def __predict(model, x, indices_val, diversity):
''' Helper function which uses the given model to generate a grammar sequence
from a given corpus, indices_val (mapping), abstract_grammars (list),
and diversity floating point value. '''
def __generate_grammar(model, corpus, abstract_grammars, indices_val,
diversity):
def __generate_grammar(model, corpus, abstract_grammars, values, val_indices,
indices_val, max_len, max_tries, diversity):
curr_grammar = ''
# np.random.randint is exclusive to high
start_index = np.random.randint(0, len(corpus) - maxlen)
sentence = corpus[start_index: start_index + maxlen] # seed
start_index = np.random.randint(0, len(corpus) - max_len)
sentence = corpus[start_index: start_index + max_len] # seed
running_length = 0.0
while running_length <= 4.1: # arbitrary, from avg in input file
# transform sentence (previous sequence) to matrix
x = np.zeros((1, maxlen, len(values)))
x = np.zeros((1, max_len, len(values)))
for t, val in enumerate(sentence):
if (not val in val_indices): print(val)
x[0, t, val_indices[val]] = 1.
Expand Down Expand Up @@ -89,84 +89,103 @@ def __generate_grammar(model, corpus, abstract_grammars, indices_val,

return curr_grammar

#---------------------------------SCRIPT---------------------------------------#

# model settings
diversity = 0.5
maxlen = 20
max_tries = 1000
N_epochs = int(sys.argv[1])

# i/o settings
fn = 'midi/' + 'original_metheny.mid' # 'And Then I Knew' by Pat Metheny
outfn = 'midi/' 'deepjazz_on_metheny...' + str(N_epochs)
if (N_epochs == 1): outfn += '_epoch.midi'
else: outfn += '_epochs.midi'

# musical settings
bpm = 130

# get data
chords, abstract_grammars = get_musical_data(fn)
corpus, val_indices, indices_val = get_corpus_data(abstract_grammars)
values = set(corpus)
print('corpus length:', len(corpus))
print('total # of values:', len(values))

# build model
model = lstm.build_model(corpus=corpus, val_indices=val_indices, maxlen=maxlen,
N_epochs=N_epochs)

# set up audio stream
out_stream = stream.Stream()
play = lambda x: midi.realtime.StreamPlayer(x).play()
stop = lambda: pygame.mixer.music.stop()

# generation loop
curr_offset = 0.0
loopEnd = len(chords)
for loopIndex in range(1, loopEnd):
# get chords from file
curr_chords = stream.Voice()
for j in chords[loopIndex]:
curr_chords.insert((j.offset % 4), j)

# generate grammar
curr_grammar = __generate_grammar(model, corpus, abstract_grammars,
indices_val, diversity)
curr_grammar = curr_grammar.replace(' A',' C').replace(' X',' C')

# Pruning #1: smoothing measure
curr_grammar = prune_grammar(curr_grammar)

# Get notes from grammar and chords
curr_notes = unparse_grammar(curr_grammar, curr_chords)

# Pruning #2: removing repeated and too close together notes
curr_notes = prune_notes(curr_notes)

# quality assurance: clean up notes
curr_notes = clean_up_notes(curr_notes)

# print # of notes in curr_notes
print('After pruning: %s notes' % (len([i for i in curr_notes
if isinstance(i, note.Note)])))

# insert into the output stream
for m in curr_notes:
out_stream.insert(curr_offset + m.offset, m)
for mc in curr_chords:
out_stream.insert(curr_offset + mc.offset, mc)

curr_offset += 4.0

out_stream.insert(0.0, tempo.MetronomeMark(number=bpm))

# Play the final stream through output (see 'play' lambda function above)
play(out_stream)

# save stream
mf = midi.translate.streamToMidiFile(out_stream)
mf.open(outfn, 'wb')
mf.write()
mf.close()
#----------------------------PUBLIC FUNCTIONS----------------------------------#
''' Generates musical sequence based on the given data filename and settings.
Plays then stores (MIDI file) the generated output. '''
def generate(data_fn, out_fn, N_epochs):
# model settings
max_len = 20
max_tries = 1000
diversity = 0.5

# musical settings
bpm = 130

# get data
chords, abstract_grammars = get_musical_data(data_fn)
corpus, values, val_indices, indices_val = get_corpus_data(abstract_grammars)
print('corpus length:', len(corpus))
print('total # of values:', len(values))

# build model
model = lstm.build_model(corpus=corpus, val_indices=val_indices,
max_len=max_len, N_epochs=N_epochs)

# set up audio stream
out_stream = stream.Stream()

# generation loop
curr_offset = 0.0
loopEnd = len(chords)
for loopIndex in range(1, loopEnd):
# get chords from file
curr_chords = stream.Voice()
for j in chords[loopIndex]:
curr_chords.insert((j.offset % 4), j)

# generate grammar
curr_grammar = __generate_grammar(model=model, corpus=corpus,
abstract_grammars=abstract_grammars,
values=values, val_indices=val_indices,
indices_val=indices_val,
max_len=max_len, max_tries=max_tries,
diversity=diversity)

curr_grammar = curr_grammar.replace(' A',' C').replace(' X',' C')

# Pruning #1: smoothing measure
curr_grammar = prune_grammar(curr_grammar)

# Get notes from grammar and chords
curr_notes = unparse_grammar(curr_grammar, curr_chords)

# Pruning #2: removing repeated and too close together notes
curr_notes = prune_notes(curr_notes)

# quality assurance: clean up notes
curr_notes = clean_up_notes(curr_notes)

# print # of notes in curr_notes
print('After pruning: %s notes' % (len([i for i in curr_notes
if isinstance(i, note.Note)])))

# insert into the output stream
for m in curr_notes:
out_stream.insert(curr_offset + m.offset, m)
for mc in curr_chords:
out_stream.insert(curr_offset + mc.offset, mc)

curr_offset += 4.0

out_stream.insert(0.0, tempo.MetronomeMark(number=bpm))

# Play the final stream through output (see 'play' lambda function above)
play = lambda x: midi.realtime.StreamPlayer(x).play()
play(out_stream)

# save stream
mf = midi.translate.streamToMidiFile(out_stream)
mf.open(out_fn, 'wb')
mf.write()
mf.close()

''' Runs generate() -- generating, playing, thene storing a musical sequence --
with the default Metheny file. '''
def main(args):
try:
N_epochs = int(args[1])
except:
N_epochs = 16 # default

# i/o settings
data_fn = 'midi/' + 'original_metheny.mid' # 'And Then I Knew' by Pat Metheny
out_fn = 'midi/' 'deepjazz_on_metheny...' + str(N_epochs)
if (N_epochs == 1): out_fn += '_epoch.midi'
else: out_fn += '_epochs.midi'

generate(data_fn, out_fn, N_epochs)

''' If run as script, execute main '''
if __name__ == '__main__':
import sys
main(sys.argv)
14 changes: 7 additions & 7 deletions lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,21 @@
import numpy as np

''' Build a 2-layer LSTM from a training corpus '''
def build_model(corpus, val_indices, maxlen, N_epochs=128):
def build_model(corpus, val_indices, max_len, N_epochs=128):
# number of different values or words in corpus
N_values = len(set(corpus))

# cut the corpus in semi-redundant sequences of maxlen values
# cut the corpus in semi-redundant sequences of max_len values
step = 3
sentences = []
next_values = []
for i in range(0, len(corpus) - maxlen, step):
sentences.append(corpus[i: i + maxlen])
next_values.append(corpus[i + maxlen])
for i in range(0, len(corpus) - max_len, step):
sentences.append(corpus[i: i + max_len])
next_values.append(corpus[i + max_len])
print('nb sequences:', len(sentences))

# transform data into binary matrices
X = np.zeros((len(sentences), maxlen, N_values), dtype=np.bool)
X = np.zeros((len(sentences), max_len, N_values), dtype=np.bool)
y = np.zeros((len(sentences), N_values), dtype=np.bool)
for i, sentence in enumerate(sentences):
for t, val in enumerate(sentence):
Expand All @@ -39,7 +39,7 @@ def build_model(corpus, val_indices, maxlen, N_epochs=128):

# build a 2 stacked LSTM
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape=(maxlen, N_values)))
model.add(LSTM(128, return_sequences=True, input_shape=(max_len, N_values)))
model.add(Dropout(0.2))
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.2))
Expand Down
10 changes: 5 additions & 5 deletions preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
#----------------------------HELPER FUNCTIONS----------------------------------#

''' Helper function to parse a MIDI file into its measures and chords '''
def __parse_midi(fn):
def __parse_midi(data_fn):
# Parse the MIDI data for separate melody and accompaniment parts.
midi_data = converter.parse(fn)
midi_data = converter.parse(data_fn)
# Get melody part, compress into single voice.
melody_stream = midi_data[5] # For Metheny piece, Melody is Part #5.
melody1, melody2 = melody_stream.getElementsByClass(stream.Voice)
Expand Down Expand Up @@ -126,8 +126,8 @@ def __get_abstract_grammars(measures, chords):
#----------------------------PUBLIC FUNCTIONS----------------------------------#

''' Get musical data from a MIDI file '''
def get_musical_data(fn):
measures, chords = __parse_midi(fn)
def get_musical_data(data_fn):
measures, chords = __parse_midi(data_fn)
abstract_grammars = __get_abstract_grammars(measures, chords)

return chords, abstract_grammars
Expand All @@ -139,4 +139,4 @@ def get_corpus_data(abstract_grammars):
val_indices = dict((v, i) for i, v in enumerate(values))
indices_val = dict((i, v) for i, v in enumerate(values))

return corpus, val_indices, indices_val
return corpus, values, val_indices, indices_val

0 comments on commit 1aec4c2

Please sign in to comment.