In [1]:
import numpy as np
import pandas as pd
import glob
import pickle
from music21 import *
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Lambda
from keras.layers import LSTM, Bidirectional
from keras.layers import Activation
from keras.layers import BatchNormalization as BatchNorm
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
import random
import tensorflow as tf

Using TensorFlow backend.


In [2]:
def create_network(network_input, n_vocab):
    model = Sequential()
    model.add(LSTM(
        512,
        input_shape=(network_input.shape[1], network_input.shape[2]),
        recurrent_dropout=0.3,
        return_sequences=True
    ))
    
    model.add(LSTM(
        512, 
        recurrent_dropout=0.3,
        return_sequences=False
    ))
    
    model.add(BatchNorm())
    model.add(Dropout(0.3))
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(BatchNorm())
    model.add(Dropout(0.3))
    model.add(Dense(n_vocab))
    model.add(Lambda(lambda x: x / 0.6))
    model.add(Activation('softmax'))

    model.compile(optimizer='adam', loss='categorical_crossentropy')
    return model

In [3]:
def prepare_sequences_homework(notes, n_vocab, debug = False):
    #Prepare the sequences used by the Neural Network
    sequence_length = 4

    pitchnames = sorted(set(item for item in notes))

    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        if debug:
            network_input.append(sequence_in)
            network_output.append(sequence_out)
        else:
            network_input.append([note_to_int[char] for char in sequence_in])
            network_output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    if debug == False:
        network_input = network_input / float(n_vocab)
        network_output = np_utils.to_categorical(network_output)

    return (network_input, network_output)

In [4]:
# Looping works in this cell
def get_notes():

    notes = []
    filesAdded = 0
    for file in glob.glob("./*.mid"):
        midi = converter.parse(file)
        print("Parsing %s" % file)
        notes_to_parse = None
        try: # file has instrument parts
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse() 
        except: # file has notes in a flat structure
            notes_to_parse = midi.flat.notes

        for element in notes_to_parse:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder))
        notes.append('$')
        
        filesAdded += 1
        """ Stop after n files"""
        #if filesAdded == 2:
        #    break

    pickle.dump(notes, open('notes.p', 'wb'))

    return notes

def prepare_sequences_homework(notes, n_vocab, debug = False):
    #Prepare the sequences used by the Neural Network
    sequence_length = 4

    pitchnames = sorted(set(item for item in notes))

    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        if debug:
            network_input.append(sequence_in)
            network_output.append(sequence_out)
        else:
            network_input.append([note_to_int[char] for char in sequence_in])
            network_output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    if debug == False:
        network_input = network_input / float(n_vocab)
        network_output = np_utils.to_categorical(network_output)

    return (network_input, network_output)

def prepare_sequences(notes, n_vocab, debug=False):
    """ Prepare the sequences used by the Neural Network """
    sequence_length = 4

    pitchnames = sorted(set(item for item in notes))
    #print(pitchnames)

    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
    #print(note_to_int)
    network_input = []
    network_output = []
    first_note_of_file = 0
    midi_start_index = 0
    """ Our implementation """
    for i in range(0, len(notes) - sequence_length, 1):
        if '$' in notes[i:i+sequence_length]:
            continue
        output_note = i + sequence_length
        output_char = notes[i + sequence_length]
        if output_char == '$':
            i_copy = i
            for k in range(sequence_length):
                sequence_in = notes[i_copy:output_note] + notes[midi_start_index:midi_start_index+(k)]
                sequence_out = notes[first_note_of_file]
                if debug:
                  network_input.append(sequence_in)
                  network_output.append(sequence_out)
                else:  
                  network_input.append([note_to_int[char] for char in sequence_in])
                  network_output.append(note_to_int[sequence_out])
                first_note_of_file += 1
                i_copy += 1
            midi_start_index = output_note + 1
            i = midi_start_index
            #print(i)
            first_note_of_file = midi_start_index
        else:
            sequence_in = notes[i:i + sequence_length]
            sequence_out = notes[i + sequence_length]
            if debug:
              network_input.append(sequence_in)
              network_output.append(sequence_out)
            else:  
              network_input.append([note_to_int[char] for char in sequence_in])
              network_output.append(note_to_int[sequence_out])
    
    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    if debug == False:
      network_input = network_input / float(n_vocab)
      network_output = np_utils.to_categorical(network_output)

    return (network_input, network_output)

In [6]:
custom_input = ['A','B','C','D','E','F','G','$','p','q','r','s','t','u','v','$']
b,c = prepare_sequences_homework(custom_input,len(set(custom_input)),debug=True)
print("HOMEWORK IMPLEMENTATION")
for i in range(len(b)):
  print(b[i],c[i])
print("OUR IMPLEMENTATION")
b,c = prepare_sequences(custom_input,len(set(custom_input)),debug=True)
for i in range(len(b)):
  print(b[i],c[i])

HOMEWORK IMPLEMENTATION
[['A']
 ['B']
 ['C']
 ['D']] E
[['B']
 ['C']
 ['D']
 ['E']] F
[['C']
 ['D']
 ['E']
 ['F']] G
[['D']
 ['E']
 ['F']
 ['G']] $
[['E']
 ['F']
 ['G']
 ['$']] p
[['F']
 ['G']
 ['$']
 ['p']] q
[['G']
 ['$']
 ['p']
 ['q']] r
[['$']
 ['p']
 ['q']
 ['r']] s
[['p']
 ['q']
 ['r']
 ['s']] t
[['q']
 ['r']
 ['s']
 ['t']] u
[['r']
 ['s']
 ['t']
 ['u']] v
[['s']
 ['t']
 ['u']
 ['v']] $
OUR IMPLEMENTATION
[['A']
 ['B']
 ['C']
 ['D']] E
[['B']
 ['C']
 ['D']
 ['E']] F
[['C']
 ['D']
 ['E']
 ['F']] G
[['D']
 ['E']
 ['F']
 ['G']] A
[['E']
 ['F']
 ['G']
 ['A']] B
[['F']
 ['G']
 ['A']
 ['B']] C
[['G']
 ['A']
 ['B']
 ['C']] D
[['p']
 ['q']
 ['r']
 ['s']] t
[['q']
 ['r']
 ['s']
 ['t']] u
[['r']
 ['s']
 ['t']
 ['u']] v
[['s']
 ['t']
 ['u']
 ['v']] p
[['t']
 ['u']
 ['v']
 ['p']] q
[['u']
 ['v']
 ['p']
 ['q']] r
[['v']
 ['p']
 ['q']
 ['r']] s


In [None]:
def train_network():
    """ Train a Neural Network to generate music """
    notes = get_notes()

    n_vocab = len(set(notes))
    
    network_input, network_output = prepare_sequences(notes, n_vocab)
    
    model = create_network(network_input, n_vocab)
 
    checkpoint = ModelCheckpoint(
        "weights.hdf5",
        monitor='loss',
        verbose=0,
        save_best_only=True,
        mode='min'
    )
    
    callbacks_list = [checkpoint]

    # Your line of code here
    model.fit(x=network_input, y=network_output, epochs=50, batch_size = 8, callbacks=callbacks_list)
    
train_network()

ValueError: ignored

In [None]:
def prepare_sequences_prediction(notes, pitchnames, n_vocab):

    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
    sequence_length = 8
    network_input = []
    output = []
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    normalized_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    normalized_input = normalized_input / float(n_vocab)

    return (network_input, normalized_input)

In [None]:
def generate_notes(model, network_input, pitchnames, n_vocab):
    """ Generate notes from the neural network based on a sequence of notes """
    # Starts the melody by picking a random sequence from the input as a starting point
    start = np.random.randint(0, len(network_input)-1)

    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))

    pattern = network_input[start]
    prediction_output = []

    for note_index in range(200):
        prediction_input = np.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)
        
        ### Complete the line below
        prediction = model.predict(prediction_input)
        prob = (prediction / np.sum(prediction))[0]
        
        index = random.choices(list(range(len(prob))), weights=prob)[0]
        result = int_to_note[index]
        prediction_output.append(result)

        pattern.append(index)
        pattern = pattern[1:len(pattern)]

    return prediction_output

In [None]:
def generate():
    notes = pickle.load(open('notes.p', 'rb'))
    pitchnames = sorted(set(item for item in notes))
    n_vocab = len(set(notes))

    network_input, normalized_input = prepare_sequences_prediction(notes, pitchnames, n_vocab)
    model = create_network(normalized_input, n_vocab)
    
    # TODO: Change hdf5 file name later
    model.load_weights("weights.hdf5")
    
    prediction_output = generate_notes(model, network_input, pitchnames, n_vocab)
    create_midi(prediction_output)

In [None]:
def create_midi(prediction_output):
    offset = 0
    output_notes = []
    for pattern in prediction_output:
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)
        offset += 1.0

    print(output_notes)    
    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp='test_output.mid')
    
generate()

FileNotFoundError: ignored

In [None]:
mf = midi.MidiFile()
mf.open("test_output.mid")
mf.read()
mf.close()
s = midi.translate.midiFileToStream(mf)
myStream = stream.Stream()
myStream.append(s)

x = myStream
x.show('midi')