In [1]:
import music21
import pathlib
import pickle

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras

In [2]:
# 输入 100 个音符，输出之后的一个音符
input_notes_length = 100

output_dir = 'Bach'
generate_file = 'generate'

In [3]:
def get_all_notes():
    with open('output/{}/all_notes'.format(output_dir), 'rb') as f:
        all_notes = pickle.load(f)

    vocab = sorted(set(all_notes))
    print("\nvocab's length: ", len(vocab))
    print("#notes: ", len(all_notes))
    return all_notes, vocab

In [4]:
def make_seq(all_notes, vocab):
    # 建立 one-hot 词典
    note_dict = {}
    for i, note in enumerate(vocab):
        note_dict[note] = i

    num_training = len(all_notes) - input_notes_length
    input_notes_in_vocab = np.zeros((num_training, input_notes_length, len(vocab)))
    # output_notes_in_vocab = np.zeros((num_training, len(vocab)))

    for i in range(num_training):
        input_notes = all_notes[i: i + input_notes_length]
        # output_note = all_notes[i + input_notes_length]
        for j, note in enumerate(input_notes):
            input_notes_in_vocab[i, j, note_dict[note]] = 1
        # output_notes_in_vocab[i, note_dict[output_note]] = 1
        print("\r{} / {}".format(i+1, num_training), end="")
    print()
    return input_notes_in_vocab

In [13]:
def choose_seq(input_notes_in_vocab):
    # 随机挑选一个序列作为开始
    n = np.random.randint(0, len(input_notes_in_vocab) - 1)
    random_sequence = input_notes_in_vocab[n]
    random_sequence = random_sequence[np.newaxis, :]
    return random_sequence

In [6]:
def build_network(num_vocab):
    model = keras.Sequential([
        keras.layers.LSTM(512, recurrent_dropout=0.3, return_sequences=True,
                          input_shape=(input_notes_length, num_vocab)),
        keras.layers.LSTM(512),
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(256, activation='relu'),
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(num_vocab, activation='softmax')
    ])

    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc'])
    return model

In [21]:
def predict(model, sequence, num_vocab):
    outputs = []
    for i in range(500):
        newNote = model.predict(sequence, verbose=0)
        note_in_vocab = np.argmax(newNote)
        note = np.zeros(num_vocab)
        note[note_in_vocab] = 1
        outputs.append(note)

        # 序列向前移动
        sequence = sequence[0][1:]
        sequence = np.concatenate((sequence, note[np.newaxis, :]))
        sequence = sequence[np.newaxis, :]

        print("\rgenerating... {} / {}".format(i+1, 500), end="")

    print()
    print(len(outputs))
    print(outputs[0])
    return outputs

In [8]:
def save_to_midi_file(sequence):

    output_notes = []
    for element in sequence:
        note_in_vocab = list(element).index(1)
        output_notes.append(vocab[note_in_vocab])

    offset = 0

    output_music_notes = []
    for note in output_notes:
        # chord
        if ('.' in note) or note.isdigit():
            notes_in_chord = note.split('.')
            notes = []
            for note_in_chord in notes_in_chord:
                one_note = music21.note.Note(int(note_in_chord))
                one_note.storedInstrument = music21.instrument.Piano()
                notes.append(one_note)
            new_chord = music21.chord.Chord(notes)
            new_chord.offset = offset
            output_music_notes.append(new_chord)
        # note
        else:
            new_note = music21.note.Note(note)
            new_note.offset = offset
            new_note.storedInstrument = music21.instrument.Piano()
            output_music_notes.append(new_note)

        # 音符的位置 + 0.5
        offset += 0.5

    midi_stream = music21.stream.Stream(output_music_notes)
    midi_stream.write('midi', fp='output/{}/{}.mid'.format(output_dir, generate_file))

In [22]:
all_notes, vocab = get_all_notes()
inputs = make_seq(all_notes, vocab)
seq = choose_seq(inputs)
model = build_network(len(vocab))
model.load_weights('output/{}/weights.h5'.format(output_dir))
seq = predict(model, seq, len(vocab))
save_to_midi_file(seq)


vocab's length:  73
#notes:  4586
4486 / 4486
generating... 500 / 500
500
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
