In [1764]:
import numpy as np
import matplotlib.pyplot as plt
import os
import glob
import pickle
from music21 import converter, instrument, stream, note, chord
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Activation, Bidirectional, Flatten, Embedding, Lambda, Input, concatenate, Reshape, Permute, RepeatVector, Multiply
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
from keras_self_attention import SeqSelfAttention, SeqWeightedAttention
from keras.layers import BatchNormalization as BatchNorm
from fractions import Fraction
from keras.models import Model
from keras import backend as K

In [1765]:
def convert_midi_to_notes():
    notes = []
    for file in glob.glob("classical/*.mid"):
        print("Parsing %s" % file)
        midi = converter.parse(file)
        try: # file has instrument parts
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse() 
        except: # file has notes in a flat structure
            notes_to_parse = midi.flat.notes

        for element in notes_to_parse:
            length = str(element.quarterLength)
            if "/" in str(element.quarterLength):
                length = str('%.1f' % float(element.quarterLength))
                
            if isinstance(element, note.Note):
                notes.append(str(element.pitch) + " " +  length)
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder) + " " + length)
            elif isinstance(element, note.Rest):
                notes.append(str(element.name)  + " " + length)
  
    #pickle.dump(notes, open('notes.p', 'wb'))
    return notes

In [1766]:
def create_network1(network_input, n_vocab): #attention network
  
    notes_in = Input(shape = (network_input.shape[1]))
    
  
    x1 = Embedding(n_vocab, 5, input_length = network_input.shape[1])(notes_in)

    x = LSTM(512, return_sequences = True)(x1)
    x = LSTM(512, return_sequences = True)(x)
    
    e = Dense(1, activation='tanh')(x)
    e = Reshape([-1])(e)
   

    alpha = Activation('softmax')(e)

    c = Permute([2,1])(RepeatVector(512)(alpha))
    c = Multiply()([x,c])
 
    c = Lambda(lambda xin: K.sum(xin, axis=1), output_shape=(512,))(c)
    
    notes_out = Dense(n_vocab, activation = 'softmax')(c)

    model = Model(notes_in, notes_out)

    att_model = Model(notes_in, alpha)

    #opti = RMSprop(lr = 0.001)
    model.compile(loss='categorical_crossentropy', optimizer='adam')

    return model

In [1767]:
def create_network(network_input, n_vocab): #sequential network

    model = Sequential()
    model.add(LSTM(
        512,
        input_shape=(network_input.shape[1], network_input.shape[2]),
        recurrent_dropout=0.3,
        return_sequences=True
    ))
    model.add(LSTM(512,
        input_shape=(network_input.shape[1], network_input.shape[2]), 
        recurrent_dropout=0.3,
        return_sequences=False
    ))
    model.add(BatchNorm())
    model.add(Dropout(0.3))
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(BatchNorm())
    model.add(Dropout(0.3))
    model.add(Dense(n_vocab))
    model.add(Lambda(lambda x: x / 0.6))
    model.add(Activation('softmax'))
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy')
    return model

In [1768]:
def train_model():
    """ Train a Neural Network to generate music """
    #notes = convert_midi_to_notes()
    n_vocab = len(set(notes))
    
    network_input, network_output = prepare_sequences(notes, n_vocab)
    
    model = create_network(network_input, n_vocab)
 
    checkpoint = ModelCheckpoint(
        #"weights2-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5",
        "weights.hdf5",
        monitor='loss',
        verbose=0,
        save_best_only=True,
        mode='min'
    )
    
    callbacks_list = [checkpoint]

# Your line of code here
    model.fit(network_input, network_output, epochs = 5,callbacks=[callbacks_list])



In [1769]:
def prepare_sequences(notes, n_vocab):
    """ Prepare the sequences used by the Neural Network """
    sequence_length = 4

    pitchnames = sorted(set(item for item in notes))

    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    network_input = network_input / float(n_vocab)

    network_output = np_utils.to_categorical(network_output)

    return (network_input, network_output)

In [1770]:
def prepare_sequences_prediction(notes, pitchnames, n_vocab):

    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
    sequence_length = 4
    network_input = []
    output = []
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    normalized_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    normalized_input = normalized_input / float(n_vocab)

    return (network_input, normalized_input)

In [1771]:
def predict_notes(model, network_input, pitchnames, n_vocab):
    """ Generate notes from the neural network based on a sequence of notes """
    # Starts the melody by picking a random sequence from the input as a starting point
    start = np.random.randint(0, len(network_input)-1)
    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))
    pattern = network_input[start]
    prediction_output = []

    for note_index in range(200):
        prediction_input = np.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)
        prediction = model.predict(prediction_input)       
        index = np.random.choice(range(len(prediction[0])), p=prediction[0])
        result = int_to_note[index]
        prediction_output.append(result)
        pattern.append(index)
        pattern = pattern[1:len(pattern)]
    return prediction_output

In [1772]:

def generate_music_file(prediction_output):
    """ convert the output from the prediction to notes and create a midi file
        from the notes """
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern_and_duration in prediction_output:
        duration = pattern_and_duration.split()[1]
        pattern = pattern_and_duration.split()[0]

        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            new_chord.quarterLength = float(duration)
            output_notes.append(new_chord)
        elif('rest' in pattern):
            new_rest = note.Rest(pattern)
            new_rest.offset = offset
            new_rest.storedInstrument = instrument.Piano()
            new_rest.quarterLength = float(duration)
            output_notes.append(new_rest)
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            new_note.quarterLength = float(duration)
            output_notes.append(new_note)
        offset += float(duration)

        

    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp='test_output.mid')


In [1773]:
# MAIN BEGINS
# notes = convert_midi_to_notes()
notes = convert_midi_to_notes()

Parsing classical/appass_2_format0.mid
Parsing classical/beethoven_hammerklavier_1_format0.mid
Parsing classical/beethoven_hammerklavier_2_format0.mid
Parsing classical/appass_1_format0.mid
Parsing classical/appass_3_format0.mid
Parsing classical/beethoven_hammerklavier_3_format0.mid


In [1774]:
train_model()
#notes = pickle.load(open('notes.p', 'rb'))

pitchnames = sorted(set(item for item in notes))
n_vocab = len(set(notes))

network_input, normalized_input = prepare_sequences_prediction(notes, pitchnames, n_vocab)
model = create_network(normalized_input, n_vocab)

model.load_weights('weights.hdf5')

prediction_output = generate_notes(model, network_input, pitchnames, n_vocab)

Train on 10208 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [1775]:
generate_music_file(prediction_output)

In [1776]:
#Experimental code for multiple instruments

#     midi = converter.parse('test_output.mid')
#     for el in midi.recurse():
#         if 'Instrument' in el.classes: # or 'Piano'
#             el.activeSite.replace(el, instrument.SteelDrum())
#     midi.parts[0].insert(0, instrument.SteelDrum())
#     midi.parts[1].insert(0, instrument.Guitar())
#     for p in midi.parts:
#         p.insert(0, instrument.SteelDrum())
    
#     midi.write('midi', fp='test_output2.mid')
    
# def generate_second_instrument():
#     notes = pickle.load(open('notes.p', 'rb'))
#     pitchnames = sorted(set(item for item in notes))
#     n_vocab = len(set(notes))
#     network_input, normalized_input = prepare_sequences_prediction(notes, pitchnames, n_vocab)
#     model = create_network(normalized_input, n_vocab)
#     model.load_weights('double_weights.hdf5')
#     prediction_output = generate_notes(model, network_input, pitchnames, n_vocab)
#     create_midi(prediction_output)
# def get_notes_second_instrument():
#     notes = []

#     for file in glob.glob("rock_test/*.mid"):
#         midi = converter.parse(file)

#         print("Parsing %s" % file)

#         elements_to_parse = None

#         try: # file has instrument parts
#             s2 = instrument.partitionByInstrument(midi) 
#             guitar = s2.parts[0].recurse() 
#             drums = = s2.parts[1].recurse()
#         except: # file has notes in a flat structure
#             elements_to_parse = midi.flat.notes

#         for i in (0, len(drums): drums[i]
#             length = str(drums[i].quarterLength)
#             if "/" in str(drums[i].quarterLength):
#                 length = str('%.1f' % float(drums[i].quarterLength))
                
#             if isinstance(drums[i], note.Note):
#                 notes.append(str(drums[i].pitch) + " " +  length + str(guitar[i].pitch))
#             elif isinstance(drums[i], chord.Chord):
#                 notes.append('.'.join(str(n) for n in drums[i].normalOrder) + " " + length 
#                     + '.'.join(str(n) for n in guitar[i].normalOrder))
#             elif isinstance(drums[i], note.Rest):
#                 notes.append(str(drums[i].name)  + " " + length + str(guitar[i].name))
#     pickle.dump(notes, open('notes.p', 'wb'))
#     return notes
# def create_midi(prediction_output):
#     properly unpack


In [1777]:
#other attention layer attempts
def create_network3(network_input, n_vocab):
    ipt = Input(shape=(network_input.shape[1], network_input.shape[2])) 
    x = LSTM(512, activation='tanh', return_sequences=True)(ipt) 
    x = SeqSelfAttention(return_attention=True)(x) 
    x = concatenate(x) 
    x = Flatten()(x) 
    print(x.shape)
    out = Dense(1, activation='sigmoid')(x) 
    model = Model(ipt,out) 
    model.compile(optimizer = 'adorn', loss = 'categorical_crossentropy') 
    return model

In [1778]:
def create_network2(network_input, n_vocab):
    model = Sequential()
    model.add(Bidirectional(LSTM(512,
        input_shape=(network_input.shape[1], network_input.shape[2]), #n_time_steps, n_features?
        return_sequences=True)))
    #model.add(Bidirectional(LSTM(10)))
    model.add(Dense(n_vocab))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    return model

In [1779]:
def create_network4(network_input, n_vocab):
    model = Sequential()
    model.add(Embedding(input_dim=network_input.shape[0],
                                     output_dim=network_input.shape[1],
                                     mask_zero=True))
    model.add(Bidirectional(LSTM(units=512,
                                                           return_sequences=True)))
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dense(units=256))
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['categorical_accuracy'],
    )
    return model