## Feature extraction and training and testing creation

In [184]:
# conda install -c iainsgillis music21
from music21 import *
import glob
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import keras as keras
from keras.utils.np_utils import to_categorical
import random

def read_midi_dataset(file):
    notes = list()

    for midi in glob.glob(file):
        notes_to_parse = None
        mu = converter.parse(midi)
        s2 = instrument.partitionByInstrument(mu)
        notes_to_parse = s2.parts[0].recurse()
        notes_song = list()
        for element in notes_to_parse:
            if isinstance(element, note.Note): # isinstance check if element is a note
                notes_song.append(str(element.pitch))

            elif isinstance(element, chord.Chord): # check if it is a chord
                notes_song.append('.'.join(str(n) for n in element.normalOrder))          
        notes.append(notes_song)

    return np.array(notes)



file = "/home/cj/Bureau/Master2/Q2/deep_learning/dataset2/*.mid"
notes_np = read_midi_dataset(file)

notes_list = [note for notes_song in notes_np for note in notes_song]

def data_exploration(data, printt=False, show=False):
    diff_notes = list(set(data))
    freqs_notes = dict(Counter(notes_list))
    
    if printt is True:
        print("The number of notes in the dataset is {}.".format(len(data)))
        print("The number of different notes in the dataset is {}.".format(len(diff_notes)))
     
    if show is True : # histogram of the notes
        plt.bar(list(freqs_notes.keys()), freqs_notes.values(), color='g')
        plt.show()

data_exploration(notes_list)

def select_notes(data_np, data_list, frequency, printt=False):
    freqs_notes = dict(Counter(data_list))
    frequent_notes = [data_list for data_list, count in freqs_notes.items() if count>=frequency]
    
    if printt is True :
        print("The number of different notes that appear at least {} time is {}.".format(frequency,
                                                                                     len(frequent_notes)))
    new_data = list()
    for notes_song in data_np:
        temp = list()
        for note in notes_song:
            if note in frequent_notes:
                temp.append(note)
        new_data.append(temp)
    return frequent_notes, new_data

pitchname, new_data = select_notes(notes_np, notes_list, frequency=20)
size_vocab = len(frequent_notes)

def create_dataset(data, time_step): #time_step = window
    window = time_step
    x = list()
    y = list()
    for notes_song in data:
        for i in range(len(notes_song)-window):
            x.append(notes_song[i:i + window])
            y.append(notes_song[i + window])
    
    return np.array(x), np.array(y)

window_size = 20
X_notes, y_notes = create_dataset(new_data, window_size)

def reshape(X_train, X_test, y_train, y_test):
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    y_train = keras.utils.np_utils.to_categorical(y_train)
    y_test = keras.utils.np_utils.to_categorical(y_test)
    return X_train, X_test, y_train, y_test

def create_numerical_dataset(X, y):
    # we associate each unique note of X with an integer
    diff_X = list(set(X.ravel()))
    dict_note_X = dict((note, nb) for nb, note in enumerate(diff_X))
     # we associate each unique note of y with an integer
    diff_y = list(set(y))
    dict_note_y = dict((note, nb) for nb, note in enumerate(diff_y))
    
    X_dataset = list()
    y_dataset = list()
    
    for i in range(len(X)):
        temp_X = []
        for note in X[i]:
            temp_X.append(dict_note_X[note])
        X_dataset.append(temp_X)
        y_dataset.append(dict_note_y[y[i]])
        
    return np.array(X_dataset), np.array(y_dataset)

#create a function normalization !
    
X, y = create_numerical_dataset(X_notes, y_notes)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_test, y_train, y_test = reshape(X_train, X_test, y_train, y_test)

## NORMALIZATION X dataset !!!!0

## Model : LSTM

In [155]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Activation
from keras.callbacks import ModelCheckpoint

def lstm_model(window_size, dropout_rate, size_vocab): # input_shape=(n_steps, n_features)
    model = Sequential()
    model.add(LSTM(128, input_shape=(window_size, 1), return_sequences=True))
    model.add(Dropout(dropout_rate))
    model.add(LSTM(256, return_sequences=True))
    model.add(Dropout(dropout_rate))
    model.add(LSTM(128))
    model.add(Dense(128))
    model.add(Dropout(dropout_rate))
    model.add(Dense(size_vocab))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    
    return model

lstm = lstm_model(window_size, dropout_rate=0.3, size_vocab=size_vocab)

## Train the model

In [157]:
def fit_model(model, X_train, y_train, batch_size, epochs, callbacks=False):
    
    #mc=ModelCheckpoint('best_model.h5', monitor='val_loss', mode='min', save_best_only=True,verbose=1)
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size)
    
    return model
    
lstm = fit_model(lstm, X_train, y_train, batch_size=128, epochs=50, callbacks=False)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


## Save the model

In [161]:
lstm.save("model_lstm.h5")
print("Saved model to disk")

Saved model to disk


## Generating Music

In [231]:
def generate_music(model, nb_steps, pitchname, input_sequence):
    dict_notes = dict((number, note) for number, note in enumerate(pitchname))
    prediction_output = []
    
    for note in range(nb_steps):
        prediction_input = np.reshape(input_sequence, (1, len(input_sequence), 1))
        #prediction_input = prediction_input / float(n_vocab) # normalization
        pred = model.predict(prediction_input, verbose=0)
        indice = np.argmax(pred) # takes the biggest probability
        note_generated = dict_notes[indice]
        prediction_output.append(note_generated)
        # The note generated is put at the end of the input sequence
        input_sequence = np.append(input_sequence, indice)
        # The first note is removed from the input sequence
        input_sequence = input_sequence[1:len(input_sequence)]

    return prediction_output

ind = np.random.randint(0, len(X_test)-1)
input_sequence = X_test[ind]

music_generated = generate_music(lstm, 500, pitchname, input_sequence)

In [246]:
def from_notes_to_MIDI(music_generated):
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern in music_generated:
        
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                
                cn=int(current_note)
                new_note = note.Note(cn)
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
                
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
            
        # pattern is a note
        else:
            
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # increase offset each iteration so that notes do not stack
        offset += 1
        
    return output_notes

MIDI_music_generated = from_notes_to_MIDI(music_generated)

midi_stream = stream.Stream(MIDI_music_generated)
midi_stream.write('midi', fp='music_gen.mid')

'music_gen.mid'