In [1]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Activation, BatchNormalization as BatchNorm
from music21 import converter, instrument, note, chord, stream

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# Load in original and preprocessed MIDI data
np_notes = np.load("cached_data/notes.npy")
np_network_input = np.load("cached_data/network_input.npy")
np_network_output = np.load("cached_data/network_output.npy")

In [3]:
# Ascertain the number of vocabulary available to our network for training
n_vocab = len(set(np_notes))

# Get all pitch names
pitch_names = sorted(set(item for item in np_notes))

In [4]:
# Instantiate a nine-layer sequential model with Keras
model = Sequential()

model.add(LSTM(
    512,
    input_shape=(np_network_input.shape[1], np_network_input.shape[2]),
    return_sequences=True
))
model.add(LSTM(512, return_sequences=True, recurrent_dropout=0.3))
model.add(LSTM(512))
model.add(BatchNorm())
model.add(Dropout(0.3))
model.add(Dense(256))
model.add(Activation('relu'))
model.add(BatchNorm())
model.add(Dropout(0.3))
model.add(Dense(n_vocab))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

In [17]:
# Load the weights to each node
model.load_weights('model/weights.h5')

OSError: Unable to open file (unable to open file: name = 'model/weights.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [13]:
start = np.random.randint(0, len(np_network_input)-1)
int_to_note = dict((number, note) for number, note in enumerate(pitch_names))
pattern = np_network_input[start]
prediction_output = []

# Generate 500 notes
for note_index in range(500):
    prediction_input = np.reshape(pattern, (1, len(pattern), 1))
    prediction_input = prediction_input / float(n_vocab)
    prediction = model.predict(prediction_input, verbose=0)
    index = np.argmax(prediction)
    result = int_to_note[index]
    prediction_output.append(result)
    pattern = np.append(pattern, index)
    pattern = pattern[1:len(pattern)]

In [14]:
offset = 0
output_notes = []

# Create note and chord objects based on the values generated by the model
for pattern in prediction_output:
    
    # Pattern is a chord
    if ('.' in pattern) or pattern.isdigit():
        notes_in_chord = pattern.split('.')
        notes = []
        for current_note in notes_in_chord:
            new_note = note.Note(int(current_note))
            new_note.storedInstrument = instrument.Piano()
            notes.append(new_note)
        new_chord = chord.Chord(notes)
        new_chord.offset = offset
        output_notes.append(new_chord)
        
    # Pattern is a note
    else:
        new_note = note.Note(pattern)
        new_note.offset = offset
        new_note.storedInstrument = instrument.Piano()
        output_notes.append(new_note)
        
    # Increase offset each iteration so that notes do not stack
    offset += 0.5

In [15]:
midi_stream = stream.Stream(output_notes)
midi_stream.write('midi', fp='sound/jazz_nn.mid')

'sound/jazz_nn.mid'