# 基于深度学习的音乐生成

## 导入必要库

In [1]:
import glob
import pickle
import numpy
from music21 import converter, instrument, note, chord, stream
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM, Dropout
from keras.layers import BatchNormalization as BatchNorm
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
from keras.utils.vis_utils import plot_model
import matplotlib.pyplot as plt
%matplotlib inline

## 数据预处理

In [4]:
def get_notes():
    """ 从目录中的midi文件中获取所有音符和和弦 """
    notes = []
    # 读取midi文件夹所有.mid文件
    for file in glob.glob("midi/*.mid"):
        midi = converter.parse(file)
        print("Parsing %s" % file)
        notes_to_parse = None
        # 基于不同乐器的分组
        try:
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse() 
        except:
            notes_to_parse = midi.flat.notes
        # 区分单个音符和和弦
        for element in notes_to_parse:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder))
    # 将获取到的音符和和弦保存到文件中
    with open('notes', 'wb') as filepath:
        pickle.dump(notes, filepath)
    return notes

In [5]:
def train_prepare_sequences(notes):
    """ 准备训练神经网络所需的序列 """
    sequence_length = 100
    # 获取所有音高名称及数量
    pitchnames = sorted(set(item for item in notes))
    n_vocab = len(set(notes))
    # 通过字典来将音高映射为整数
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
    network_input = []
    network_output = []
    # 创建输入序列和相应的输出序列
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])
    n_patterns = len(network_input)
    # 将输入调整为与LSTM层兼容的格式
    network_input = numpy.reshape(network_input, (n_patterns, sequence_length, 1))
    # 将输入标准化
    network_input = network_input / float(n_vocab)
    network_output = np_utils.to_categorical(network_output)
    return (network_input, network_output)

## 构建神经网络

In [10]:
def create_network(network_input, notes, flag):
    """ 创建用于训练和生成音乐的神经网络结构 """
    n_vocab = len(set(notes))
    model = Sequential()
    model.add(LSTM(
        256,
        input_shape=(network_input.shape[1], network_input.shape[2]),
        recurrent_dropout=0.3,
        return_sequences=True
    ))
    model.add(LSTM(128, return_sequences=True, recurrent_dropout=0.3,))
    model.add(LSTM(128))
    model.add(BatchNorm())
    model.add(Dropout(0.3))
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(BatchNorm())
    model.add(Dropout(0.3))
    model.add(Dense(n_vocab))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    plot_model(model,to_file='model.png',show_shapes=True)
    # train_flag为False时为生成模式
    if (flag == False):
        # 从模型文件将权重加载到每个节点
        model.load_weights('model/0-2.0434.hdf5')
    return model

## 训练神经网络

In [7]:
def train(model, network_input, network_output):
    """ 训练神经网络 """
    # 每训练一轮保存一次模型，训练200轮
    filepath = "model/weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
    checkpoint = ModelCheckpoint(
        filepath,
        monitor='loss',
        verbose=0,
        save_best_only=True,
        mode='min',
        period=1
    )
    callbacks_list = [checkpoint]
    history = model.fit(network_input, network_output, epochs=200, batch_size=128, callbacks=callbacks_list)

In [8]:
def train_network():
    """ 开始训练模型 """
    # 获取所有音符和和弦
    notes = get_notes()
    # 准备训练所需序列
    network_input, network_output = train_prepare_sequences(notes)
    # 构建合适的模型
    model = create_network(network_input, notes, True)
    # 开始训练
    train(model, network_input, network_output)

In [69]:
train_network()

In [None]:
def show():
    # 列出历史记录中的所有数据
    print(history.history.keys())
    # 总结历史以确保准确性
    plt.plot(history.history['accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    # 总结历史损失
    plt.plot(history.history['loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

In [None]:
show()

## 模型结构

In [11]:
with open('notes', 'rb') as filepath:
    notes = pickle.load(filepath)
network_input, network_output = train_prepare_sequences(notes)
model = create_network(network_input, notes, True)

## 生成音乐的准备

In [3]:
def generate_prepare_sequences(notes):
    """ 准备生成音乐时神经网络所需的序列 """
    # 获取所有音高名称及个数
    pitchnames = sorted(set(item for item in notes))
    n_vocab = len(set(notes))
    # 通过字典来将音高映射为整数
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
    sequence_length = 100
    network_input = []
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
    n_patterns = len(network_input)
    # 将输入调整为与LSTM层兼容的格式
    normalized_input = numpy.reshape(network_input, (n_patterns, sequence_length, 1))
    # 将输入标准化
    normalized_input = normalized_input / float(n_vocab)
    return (network_input, normalized_input)

In [4]:
def generate_notes(model, network_input, notes):
    """ 根据音符序列从神经网络生成音符 """
    # 获取所有音高名称及个数
    pitchnames = sorted(set(item for item in notes))
    n_vocab = len(set(notes))
    # 从输入中选择随机序列作为预测的起点
    start = numpy.random.randint(0, len(network_input)-1)
    # 将int整形变量转换为音符
    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))
    pattern = network_input[start]
    prediction_output = []
    # 产生300个音符
    for note_index in range(300):
        prediction_input = numpy.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)
        prediction = model.predict(prediction_input, verbose=0)
        index = numpy.argmax(prediction)
        result = int_to_note[index]
        prediction_output.append(result)
        pattern.append(index)
        pattern = pattern[1:len(pattern)]
    return prediction_output

In [5]:
def create_midi(prediction_output):
    """ 将预测的输出转换为音符，并创建一个Midi音乐文件 """
    offset = 0
    output_notes = []
    # 根据模型生成的值创建音符和和弦对象
    for pattern in prediction_output:
        # 和弦型
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # 音符型
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)
        # 每次迭代增加偏移量，保证音符不会堆叠
        offset += 0.5
    # 将生成的音乐流转换为midi并保存
    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp='output/test_output5.mid')
    midi_stream.write("xml", fp="output/test_output5.xml")

In [6]:
def generate():
    """ 开始生成音乐文件 """
    # 加载用于训练的音符和弦集合
    with open('notes', 'rb') as filepath:
        notes = pickle.load(filepath)
    # 准备训练神经网络所需的序列
    network_input, normalized_input = generate_prepare_sequences(notes)
    # 创建用于生成音乐的神经网络
    model = create_network(normalized_input, notes, False)
    # 生成音符
    prediction_output = generate_notes(model, network_input, notes)
    # 获取midi音乐文件
    create_midi(prediction_output)

In [8]:
generate()