# Piano music generation notebook

In [12]:
import os
import pandas as pd
import numpy
from keras.models import Sequential, load_model
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import Activation
from keras.layers import BatchNormalization as BatchNorm
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras import backend as K
import random


In [3]:
!pip install midiutil music21

Collecting midiutil
  Downloading MIDIUtil-1.2.1.tar.gz (1.0 MB)
[K     |████████████████████████████████| 1.0 MB 4.9 MB/s eta 0:00:01
Building wheels for collected packages: midiutil
  Building wheel for midiutil (setup.py) ... [?25ldone
[?25h  Created wheel for midiutil: filename=MIDIUtil-1.2.1-py3-none-any.whl size=54566 sha256=834cfc81bd2b3602b8cf57fd923ae8cb6ce6a6458b3c30c7666a6e89b17cb085
  Stored in directory: /Users/jean-baptistegourlet/Library/Caches/pip/wheels/e3/97/cd/a677b61a76d575f373e2e10302f1d9106507fea6dd1320df03
Successfully built midiutil
Installing collected packages: midiutil
Successfully installed midiutil-1.2.1


In [4]:
from midiutil import MIDIFile
from music21 import *

In [17]:
def get_clips(directory, number_of_clips):
    i=0
    notes  = []

    for filename in os.listdir(directory):
        if i<number_of_clips:
            i+=1
            print(filename)
            note = pd.read_pickle(directory + str(filename))
            notes += [note] 
        else:
          break
    return notes


In [6]:

def create_set(notes,vocab, n_vocab, sequence_length = 50):
    pitchnames = vocab
    network_input = []
    network_output = []
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
    if not isinstance(notes[0], list):
       notes = [notes] 
    i=0
    lag = sequence_length//7
    for clip in notes:
        for i in range(0,len(clip)-sequence_length):
            sequence_in = clip[i:i + sequence_length]
            sequence_out = clip[i + sequence_length]
            network_input.append([note_to_int[char] for char in sequence_in])
            network_output.append(note_to_int[sequence_out])
    n_patterns = len(network_input)
    # reshape the input into a format compatible with LSTM layers
    network_input = numpy.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    network_input = network_input / float(n_vocab)
    network_output = np_utils.to_categorical(network_output)
    return (network_input, network_output)

def train(model, network_input, network_output, epochs=2, batch_size=200,validation_split=0.2):
    """ train the neural network """
    filepath = "weights/best_model.h5"
    checkpoint = ModelCheckpoint(
        filepath,
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        mode='min'
    )
    callbacks_list = [checkpoint]

    history = model.fit(network_input, network_output, epochs=epochs,
              batch_size=batch_size, validation_split=validation_split, callbacks=callbacks_list)
    
    
    return model, history.history

In [7]:
def create_network(network_input, n_vocab, depth = 3, dropout =0.3):
    """ create the structure of the neural network """
    model = Sequential()
    if depth == 1:
        model.add(LSTM(
            256,
            input_shape=(network_input.shape[1], network_input.shape[2]),
            recurrent_dropout=dropout
        ))
    if depth == 2:
        model.add(LSTM(
            256,
            input_shape=(network_input.shape[1], network_input.shape[2]),
            recurrent_dropout=dropout,
            return_sequences=True
        ))
        model.add(LSTM(512))
        model.add(Dropout(dropout))
    if depth == 3:
        model.add(LSTM(
            256,
            input_shape=(network_input.shape[1], network_input.shape[2]),
            recurrent_dropout=dropout,
            return_sequences=True
        ))
        model.add(LSTM(512,return_sequences=True))
        model.add(Dropout(dropout))
        model.add(LSTM(256))
        model.add(Dropout(dropout))
    model.add(Dense(n_vocab))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    model.summary()
    return model

In [8]:
def generate_notes(model, vocab, start_pattern):
    print('generating notes')
    int_to_note = dict((number, note) for number, note in enumerate(vocab))
    pattern = start_pattern
    prediction_output = []
    # generate 50 notes
    for note_index in range(50):
        prediction_input = numpy.reshape(pattern, (1, len(pattern), 1))
        prediction = model.predict(prediction_input, verbose=0)
        index = numpy.argmax(prediction)
        result = int_to_note[index]
        prediction_output.append(result)
        pattern = numpy.append(pattern,index/ float(n_vocab))
        pattern = pattern[1:len(pattern)]
        
    return prediction_output

In [9]:
def generate_random_notes(vocab, number):
    notes = []
    for n in range(number):
        notes.append(random.choice(vocab))
    return notes

In [10]:
def pitch_to_midi_number(sequence):
    midi_notes = []
    for p in sequence:
        if ('.' in p):
            notes_in_chord = p.split('.')
            notes = []
            for current_note in notes_in_chord:
                notes.append(pitch.Pitch(current_note).midi)
            midi_notes.append(notes)
        else:
            midi_notes.append(pitch.Pitch(p).midi)
    return midi_notes

def create_MIDI(prediction_output, output_name):
    degrees  = pitch_to_midi_number(prediction_output)  # MIDI note number
    track    = 0
    channel  = 0
    time     = 0    # In beats
    duration = 1    # In beats
    tempo    = 120  # In BPM
    volume   = 100  # 0-127, as per the MIDI standard

    MyMIDI = MIDIFile(1)  # One track, defaults to format 1 (tempo track is created
                          
    MyMIDI.addTempo(track, time, tempo)

    for i, pitch in enumerate(degrees):
        if type(pitch) is list:
            for p in pitch:
                MyMIDI.addNote(track, channel, p, time + i, duration, volume)
        else:
            MyMIDI.addNote(track, channel, pitch, time + i, duration, volume)

    with open(output_name+'.mid', "wb") as output_file:
        MyMIDI.writeFile(output_file)

---

# First shot: learn recurrent patterns and fine tune the model
## 30 seconds of music


In [20]:
directory = r'data/clips/'
number_of_clips = 2

notes = get_clips(directory, number_of_clips)

all_dataset = sum(notes, [])

train_set = notes[0:number_of_clips-1]

small_pattern = train_set[0][0:240]*200
vocab = sorted(set(small_pattern))
n_vocab = len(vocab)

notes_2004_186
notes_2006_111


In [15]:
os.getcwd()

'/Users/jean-baptistegourlet/Documents/code/DeepLearning/Music-Generation-Project'

### Sequence length tuning

In [None]:
sequence_length_to_test = [5,10,20,50,100,200,500]

dict_history = {}
for s in sequence_length_to_test:
    network_input_train, network_output_train = create_set(small_pattern,vocab,n_vocab, sequence_length=s)
    print('creating model for training, sequence_length = '+str(s))
    model = create_network(network_input_train, n_vocab)
    model_trained, history = train(model, network_input_train, network_output_train, validation_split=0.2, epochs=100, batch_size=128)
    dict_history[s] = history


### Depth tuning

In [None]:
depth_to_test = [1,2,3]

dict_history = {}
for d in depth_to_test:
    network_input_train, network_output_train = create_set(small_pattern,vocab,n_vocab)
    print('creating model for training, depth_to_test = '+str(d))
    model = create_network(network_input_train, n_vocab, depth=d)
    model_trained, history = train(model, network_input_train, network_output_train, validation_split=0.2, epochs=30, batch_size=128)
    dict_history[d] = history


### Dropout tuning

In [None]:
dropout_to_test = [0,0.1,0.2,0.3,0.5,0.8]

dict_history = {}
for d in dropout_to_test:
    network_input_train, network_output_train = create_set(small_pattern,vocab,n_vocab)
    print('creating model for training, dropout_to_test = '+str(d))
    model = create_network(network_input_train, n_vocab, dropout=d)
    model_trained, history = train(model, network_input_train, network_output_train, validation_split=0.2, epochs=30, batch_size=128)
    dict_history[d] = history


### Batch size tuning

In [None]:
batch_size_to_test = [32,128,252,512]

dict_history = {}
for b in batch_size_to_test:
    network_input_train, network_output_train = create_set(small_pattern,vocab,n_vocab)
    print('creating model for training, batch size = '+str(b))
    model = create_network(network_input_train, n_vocab)
    model_trained, history = train(model, network_input_train, network_output_train, validation_split=0.2, epochs=30, batch_size=b)
    dict_history[b] = history


### Generation

In [37]:
test_set = generate_random_notes(vocab,300)
network_input_test = create_set(test_set,vocab,n_vocab)[0]

In [25]:
network_input_train, network_output_train = create_set(small_pattern,vocab,n_vocab)
model = create_network(network_input_train, n_vocab)

model_trained, history = train(model, network_input_train, network_output_train, epochs=10, batch_size=128, validation_split=0.2)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_6 (LSTM)                (None, 50, 256)           264192    
_________________________________________________________________
lstm_7 (LSTM)                (None, 50, 512)           1574912   
_________________________________________________________________
dropout_4 (Dropout)          (None, 50, 512)           0         
_________________________________________________________________
lstm_8 (LSTM)                (None, 256)               787456    
_________________________________________________________________
dropout_5 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 120)               30840     
_________________________________________________________________
activation_2 (Activation)    (None, 120)              

In [41]:
model = load_model("weights/best_model.h5")
generated_notes = generate_notes(model, vocab, start_pattern=network_input_test[0])

generating notes


In [42]:
create_MIDI(test_set[0:50], 'input_for_simple_generation')

In [43]:
create_MIDI(generated_notes, 'generated_music_with_simple_generation')




# Feed the network with more data, generate complex music

In [13]:
directory = r'data/results2'
number_of_clips = 2

notes = get_clips(directory, number_of_clips)

all_dataset = sum(notes, [])

train_set = []
for n in notes:
    train_set += n*100

vocab = sorted(set(all_dataset))
n_vocab = len(vocab)

test_set = generate_random_notes(vocab,300)


notes_2002_0
notes_2002_1


In [17]:
network_input_train, network_output_train = create_set(train_set,vocab,n_vocab)
model = create_network(network_input_train, n_vocab)

model_trained, history = train(model, network_input_train, network_output_train, epochs=10, batch_size=128, validation_split=0.2)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 50, 256)           264192    
_________________________________________________________________
lstm_4 (LSTM)                (None, 50, 512)           1574912   
_________________________________________________________________
dropout_2 (Dropout)          (None, 50, 512)           0         
_________________________________________________________________
lstm_5 (LSTM)                (None, 256)               787456    
_________________________________________________________________
dropout_3 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 1714)              440498    
_________________________________________________________________
activation_1 (Activation)    (None, 1714)             

KeyboardInterrupt: ignored

In [22]:
model = load_model("weights/best_model.h5")

network_input_test = create_set(test_set,vocab,n_vocab, sequence_length=50)[0]
generated_notes = generate_notes(model, vocab, network_input_test[0])

generating notes


In [20]:
create_MIDI(test_set[0:50], 'input_for_complex_generation')

In [23]:
create_MIDI(generated_notes, 'generated_music_with_complex_generation')