In [14]:
#This section is for imports and globals/configs

#Credits to https://towardsdatascience.com/how-to-generate-music-using-a-lstm-neural-network-in-keras-68786834d4c5

import matplotlib
#matplotlib.use('TkAgg')
#import matplotlib.pyplot as plt
#get_ipython().run_line_magic('config', "InlineBackend.figure_format = 'png'")
#get_ipython().run_line_magic('matplotlib', 'inline')

from music21 import converter, instrument, note, chord, stream
#import pandas as pd
import numpy as np
import glob
import tensorflow as tf

from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Activation
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
from keras import losses

#path to input training data, songs
midi_path = "midi_songs"
#length of input sequence of notes to generate the next note
sequence_length = 100
#percentage of inputs dropped put pf model to prevent overfitting
dropout = 0.3
#number of LSTM nodes in the layer
lstm_nodes = 512
#the type of activation method we are using
activation_method='softmax'
#size of internal layers
dense = 256
#method of loss calculation
loss='categorical_crossentropy'
#method of optimization
optimizer='rmsprop'
#number of training runs through model 
epochs = 200
#size of batch of inputs per epoch
epoch_size = 64
#the file of learned weights we wish to use
weight_file = 'olivia_run_2/weights-improvement-13-4.5746-bigger.hdf5'
#length of output to generate
output_lenght = 500

In [5]:
#This section is for reading in the midi file into python
def get_notes():
    notes = []

    for file in glob.glob((midi_path+"/*.mid")):
        midi = converter.parse(file)
        notes_to_parse = None

        parts = instrument.partitionByInstrument(midi)

        #check if file has instrument parts
        if parts:
            notes_to_parse = parts.parts[0].recurse()
        else:
            notes_to_parse = midi.flat.notes

        for element in notes_to_parse:
            #check to see if this is a note or chord
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder))
    return notes

In [6]:
#This section is for translating the input from strings to numbers (easier for the RNN to parse)
def prepare_encoding(notes):
    # get all pitch names
    pitchnames = sorted(set(item for item in notes))

     # create a dictionary to map pitches to integers
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
    
    return note_to_int

def prepare_unencoding(notes):
    pitchnames = sorted(set(item for item in notes))
    
    #map notes to pitches
    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))
    
    return int_to_note

In [7]:
#This section creates the set of input notes sequences and output notes
def prepare_sequences_in(notes, n_vocab):
    #get note encodings
    note_to_int = prepare_encoding(notes)
    
    network_input = []

    # create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    network_input = network_input / float(n_vocab)

    return (network_input)

def prepare_sequences_out(notes, n_vocab):
    #get note encodings
    note_to_int = prepare_encoding(notes)
    
    network_output = []

    # create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_out = notes[i + sequence_length]
        network_output.append(note_to_int[sequence_out])

    network_output = np_utils.to_categorical(network_output)

    return (network_output)

In [8]:
#This section creates the model
def create_network(network_input, n_vocab):
    #This is a sequential model
    model = Sequential()
    #Add a LSM layer that has a certain number of nodes, 
    #knows the shape of the input data,
    #and is outputting something sequential
    model.add(LSTM(
        lstm_nodes,
        input_shape=(network_input.shape[1], network_input.shape[2]),
        return_sequences=True
    ))
    #This deliberatly looses part of the data to prevent overfitting
    model.add(Dropout(dropout))
    model.add(LSTM(lstm_nodes, return_sequences=True))
    model.add(Dropout(dropout))
    model.add(LSTM(lstm_nodes))
    model.add(Dense(dense))
    model.add(Dropout(dropout))
    #This makes sure our output layer matches the number of possible outputs
    model.add(Dense(n_vocab))
    #activation method set here
    model.add(Activation(activation_method))
    model.compile(loss=loss, optimizer=optimizer)

    return model

In [9]:
#This section trains the model
def train(model, network_input, network_output):
    #save checkpoints every epoch run
    filepath = "weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
    checkpoint = ModelCheckpoint(
        filepath,
        monitor='loss',
        verbose=0,
        save_best_only=True,
        mode='min'
    )
    callbacks_list = [checkpoint]

    model.fit(network_input, network_output, epochs=epochs, batch_size=epoch_size, callbacks=callbacks_list)

In [12]:
#Main function to cover grabbing the data to training the model
def train_network():
    notes = get_notes()

    # get amount of pitch names
    n_vocab = len(set(notes))

    #get inputs and outputs
    network_input = prepare_sequences_in(notes, n_vocab)
    network_output = prepare_sequences_out(notes, n_vocab)

    model = create_network(network_input, n_vocab)
    
    #only if I want to start training on a previously run weight file uncomment
    #model.load_weights(weight_file)

    train(model, network_input, network_output)


In [16]:
#This section runs the model once the weights are generated
#generates an output (numerical output)
def run_network():
    notes = get_notes()

    # get amount of pitch names
    n_vocab = len(set(notes))
    
    #only need network input here, need a size to generate model and seed to  start off
    network_input = prepare_sequences_in(notes, n_vocab)
    network_output = prepare_sequences_out(notes, n_vocab)
    
    #get model and load with weights
    model = create_network(network_input, n_vocab)
    model.load_weights(weight_file)
    
    #use a random input set as the seed
    start = np.random.randint(0, len(network_input)-1)  
    
    #pattern is the initial melody seed we will input
    pattern = network_input[start]
    
    prediction_output = []
    
    int_to_note = prepare_unencoding(notes)
    
    # generate some number of notes
    for note_index in range(output_lenght):
        #aligning input pattern with model input size
        prediction_input = np.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)
        
        #predict the next note
        prediction = model.predict(prediction_input, verbose=0)
        #end result of neural network is a list of probabilities this is the next note
        #grab note with highest probability
        index = np.argmax(prediction)
        result = int_to_note[index]
        
        #add predicted note to our output
        prediction_output.append(result)
        
        #add predicted note to end of input pattern and cut off begining of pattern
        pattern = np.append(pattern, index)
        pattern = pattern[1:len(pattern)]
    
    return prediction_output
    

In [17]:
#This section decodes the RNN friendly note format
def decode (prediction_output):
    offset = 0
    output_notes = []
    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)
        # increase offset each iteration so that notes do not stack
        offset += 0.5
        
    return output_notes

In [18]:
#This section creates the midi file
def make_midi(output_notes):
    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp='test_output.mid')

In [19]:
#Main function to cover using the model to generate a midi file
def use_network():
    prediction_output = run_network()
    output_notes = decode (prediction_output)
    make_midi(output_notes)
 

In [13]:
#train_network()

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
  768/57077 [..............................] - ETA: 1:22:45 - loss: 4.6238

KeyboardInterrupt: 

In [20]:
use_network()