## Feature extraction and creation of the dataset

In [7]:
from music21 import *
import glob
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import keras as keras
from keras.utils.np_utils import to_categorical
import random


"""
This function extract the features from the MIDI files.

Input : Directory containing the midi files
outputs : numpy ndarray containing numpy arrays of the concatenated elements of the MIDI files.
          Elements are feature extracted from the MIDI files.
"""
def read_midi_dataset(file): 
    notes = list()
    for midi in glob.glob(file):
        notes_to_parse = None
        mu = converter.parse(midi)
        s2 = instrument.partitionByInstrument(mu)
        notes_to_parse = s2.parts[0].recurse() # parts[0] means we only takes into account piano
        notes_song = list()
        for element in notes_to_parse:
            
            if isinstance(element, note.Note): # isinstance check if element is a note
                notes_song.append(str(element.pitch))

            elif isinstance(element, chord.Chord): # check if it is a chord
                notes_song.append('.'.join(str(n) for n in element.normalOrder))   
            
            elif isinstance(element, note.Rest):
                notes.append(str(element.name)  + " " + str(element.quarterLength))
            
        notes.append(notes_song)

    return np.array(notes)

file = "/home/cj/Bureau/Master2/Q2/deep_learning/project/20_songs/*.mid"
data_elements = read_midi_dataset(file)


"""
This function transforms a numpy ndarray containaing arrays of elements of MIDI files into one list of
these elements. Example : [[a,b][c,d]] => [a,b,c,d]
"""
def from_ndarrays_to_list(data):
    return [element for elements_ in data for element in elements_] 


"""
This function shows an histogram of the notes and prints the total number of notes as well as the number
of unique notes.

Input : numpy ndarray containing numpy arrays of the concatenated elements of the MIDI files.
Output : No output. 
"""

def data_exploration(data, printt=False, show=False):
    elements_list = from_ndarrays_to_list(data)
    unique_elements = list(set(elements_list))
    frequence_of_elements = dict(Counter(elements_list))
    
    if printt is True:
        print("The number of notes in the dataset is {}.".format(len(elements_list)))
        print("The number of different notes in the dataset is {}.".format(len(unique_elements)))
     
    if show is True : # histogram of the notes
        plt.bar(list(frequence_of_elements.keys()), frequence_of_elements.values(), color='g')
        plt.show()
        
data_exploration(data_elements, printt=False, show=False)


"""
This function deletes from the dataset elements that do not appear more than a particular frequency.
It is a filter.
Input : numpy ndarray containing numpy arrays of the concatenated elements of the MIDI files.
Output : List of list. Each list is a concatenation of all the elements of a MIDI file.
"""
def select_notes(data, frequency, printt=False):
    elements_list = from_ndarrays_to_list(data)
    frequence_of_notes = dict(Counter(elements_list))
    # unique_elements is the sorted set of unique elements of the set of MIDI files. The elements selected depends
    # on a particular frequency. Therefore, it is the total vacabulary of the dataset.
    unique_elements = sorted([elements_list for elements_list,
                              count in frequence_of_notes.items() if count>=frequency])

    if printt is True :
        print("The number of different notes that appear at least {} time is {}.".format(frequency,
                                                                                     len(unique_elements)))
    new_data = list()
    for elements_ in data:
        temp = list()
        for element in elements_:
            if element in unique_elements:
                temp.append(element)
        new_data.append(temp)
        
    return new_data

freq = 0
data_filtered = select_notes(data_elements, frequency=freq)


"""
This function creates the X and y matrices needed by the model.
We use a sliding window mechanism in order to create this dataset.
[a,b,c,d,e,f,g] becomes x1=[a,b,c], y1=[d] then x2=[b,c,d], y2=[e] etc.

Input : List of list. Each list is a concatenation of all the elements of a MIDI file.
Output : matrix X and vector y.
"""
def create_dataset(data, window): #time_step = window
    x = list()
    y = list()
    for elements_ in data:
        for i in range(len(elements_)-window):
            x.append(elements_[i:i + window])
            y.append(elements_[i + window])
    
    return np.array(x), np.array(y)

window_size = 100
X, y = create_dataset(data_filtered, window_size)


"""
This function makes the different matrices usable by an LSTM unit.
input : For X matrices : [nb_samples, window_size]
        For y matrices : [nb_samples, ]
output : For X matrices : [nb_samples, window_size, 1] # 1 because there is only one feature (element)
         For y matrices : [nb_samples, vocabulary_size] # One-hot encoding
"""
def reshape(X_train, X_test, y_train, y_test, size_vocab):
    y_train = keras.utils.np_utils.to_categorical(y_train, num_classes = size_vocab)
    y_test = keras.utils.np_utils.to_categorical(y_test, num_classes = size_vocab)
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))/float(size_vocab) # Normalization
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))/float(size_vocab) # Normalization
    
    return X_train, X_test, y_train, y_test


"""
Deep Neural network works better with numerical dataset. Each element is going to be replaced by a number.
Input : matrix X and vector y non usable by a Deep Neural network.
Output : X_train, y_train, X_test, y_test
"""
def dataset_for_NN(X, y, data, split_ratio):

    unique_data = list(sorted(set(from_ndarrays_to_list(data)))) 
    dict_vocabulary = dict((element, nb) for nb, element in enumerate(unique_data)) # from element to integer
    size_vocab = len(unique_data)

    X_dataset = list()
    y_dataset = list()
    
    for i in range(len(X)):
        temp_X = []
        for element in X[i]:
            temp_X.append(dict_vocabulary[element])
        X_dataset.append(temp_X)
        y_dataset.append(dict_vocabulary[y[i]])
    
    X_train, X_test, y_train, y_test = train_test_split(np.array(X_dataset), np.array(y_dataset),
                                                        test_size=split_ratio, random_state=0)
    
    X_train, X_test, y_train, y_test = reshape(X_train, X_test, y_train, y_test, size_vocab)
    
    return X_train, X_test, y_train, y_test, dict_vocabulary, size_vocab

split_ratio = 0.2
X_train, X_test, y_train, y_test, dict_vocabulary, size_vocab = dataset_for_NN(X, y, data_filtered, split_ratio)

## determine
- Loss function
- F-measure => https://stats.stackexchange.com/questions/49226/how-to-interpret-f-measure-values , https://en.wikipedia.org/wiki/F1_score
- How to retreive these values ? Is F-measure really important ? Yes it is and we can retreive it !
    https://machinelearningmastery.com/how-to-calculate-precision-recall-f1-and-more-for-deep-learning-models/

## Structure of the Models tested

In [125]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Activation
from keras.callbacks import ModelCheckpoint
from keras.layers import BatchNormalization as BatchNorm
from keras.layers import Bidirectional, Flatten
from keras_self_attention import SeqSelfAttention
import pandas as pd
# pip install keras-self-attention

optimizer = 'rmsprop' # TO MOVE

def lstm_model_1(window_size, dropout_rate, size_vocab, size_lstm):
    model = Sequential()
    model.add(LSTM(size_lstm, input_shape=(window_size, 1), return_sequences=True)) # 512
    model.add(Dropout(dropout_rate))
    model.add(LSTM(size_lstm, return_sequences=True)) # 512
    model.add(Dropout(dropout_rate))
    model.add(LSTM(size_lstm)) # 512
    model.add(Dense(256))
    model.add(Dropout(dropout_rate))
    model.add(Dense(size_vocab))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model

def lstm_model_2(window_size, dropout_rate, size_vocab, size_lstm):
    model = Sequential()
    model.add(LSTM(size_lstm, input_shape=(window_size, 1), recurrent_dropout=0.3, return_sequences=True)) # 512
    model.add(LSTM(size_lstm, return_sequences=True, recurrent_dropout=0.3)) # 512
    model.add(LSTM(size_lstm)) # 512
    model.add(BatchNorm())
    model.add(Dropout(0.3))
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(BatchNorm())
    model.add(Dropout(0.3))
    model.add(Dense(size_vocab))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

def lstm_attention_model(window_size, dropout_rate, size_vocab, size_lstm):
    model = Sequential()
    model.add(Bidirectional(LSTM(size_lstm, input_shape=(window_size, 1),return_sequences=True))) # 512
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dropout(dropout_rate))
    
    model.add(Bidirectional(LSTM(size_lstm, return_sequences=True))) # 512
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dropout(dropout_rate))
    
    model.add(Bidirectional(LSTM(size_lstm, return_sequences=True))) # 512
    model.add(SeqSelfAttention(attention_activation='sigmoid'))
    model.add(Dense(256))
    model.add(Dropout(dropout_rate))
    
    model.add(Flatten()) #Supposedly needed to fix stuff before dense layer
    model.add(Dense(size_vocab))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

def save_on_disk(model, history, name):
    name_model = name+".h5"
    model.save(name_model)
    print(name_model + " model saved on disk")
    
    history_ = pd.DataFrame.from_dict(history.history, orient='index')
    name_hisory = name+"_history.csv"
    history_.to_csv(name_hisory)


# An history object is the output of the fit(), it keeps tracks of the value of
# [loss, val_loss, accuracy, val_accuray] for each epoch during the training of the model.
# Very important to plot the learning curve for the training (loss) and testing set (val_loss).
def fit_model(model, X_train, y_train, X_test, y_test, batch_size, epochs, name, callbacks=False):
    if callbacks is False:
        history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs,
                  batch_size=batch_size, verbose=1)
    else:
        filepath = "weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"    
        checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=0, save_best_only=True, mode='min')    
        callbacks_list = [checkpoint]
        history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs,
                  batch_size=batch_size, callbacks=callbacks_list, verbose=1)
    
    # Save results on disk
    save_on_disk(model, history, name)
    
    return model

In [130]:
def to_train(which, window_size, dropout_rate, size_vocab, X_train, y_train, X_test, y_test,
             size_lstm, batch_size, epochs, callbacks):
    
    if which == 1:
        name = 'lstm_model_1'
        model = lstm_model_1(window_size, dropout_rate, size_vocab, size_lstm)
        model = fit_model(model, X_train, y_train, X_test, y_test, batch_size, epochs, name, callbacks)
        
    elif which == 2:
        name = 'lstm_model_2'
        model = lstm_model_2(window_size, dropout_rate, size_vocab, size_lstm)
        model = fit_model(model, X_train, y_train, X_test, y_test, batch_size, epochs, name, callbacks)
    
    elif which == 3:
        name = 'lstm_att_model'
        model = lstm_attention_model(window_size, dropout_rate, size_vocab, size_lstm)
        model = fit_model(model, X_train, y_train, X_test, y_test, batch_size, epochs, name, callbacks)
    
    else:
        return print("No corresponding model")

    return model

optimizer = 'rmsprop' # ['rmsprop', 'sgd', etc] # sensé etre le 3 ieme, on va pas le tester.
dropout_rate = 0.3 # [0, 0.2, 0.4, 0.8] # second to test   
batch_size = 5 # [128, 256, 512, 1024] # Fourth 
epochs = 3 
which = 1
size_lstm = 64 # [64, 128, 256, 512] # First to test
model = to_train(which, window_size, dropout_rate, size_vocab, X_train, y_train, X_test, y_test,
                 size_lstm, batch_size, epochs, callbacks=False)

# control the model 
https://machinelearningmastery.com/how-to-control-neural-network-model-capacity-with-nodes-and-layers/

## If a model needs to be loaded

In [23]:
from keras.models import load_model
file_model = '/home/cj/Bureau/Master2/Q2/deep_learning/project/models/model_final_lstm_dunjon_small.h5'
lstm = load_model(file_model)

## If you have weights of the model

In [52]:
file_weigts = "/home/cj/Bureau/Master2/Q2/deep_learning/project/weights_sigur.hdf5"  
lstm.load_weights(file_weigts)
lstm.compile(loss='categorical_crossentropy', optimizer='rmsprop')

## Generating Music

In [78]:
def generate_music(model, nb_steps, dict_vocabulary, input_sequence, size_vocab):
    
    prediction_output = []
    for note in range(nb_steps):
        prediction_input = np.reshape(input_sequence, (1, len(input_sequence), 1))
        prediction_input = prediction_input/float(size_vocab) # normalization
        pred = model.predict(prediction_input, verbose=0)
        indice = np.argmax(pred) # takes the biggest probability
        note_generated = dict_vocabulary[indice]
        prediction_output.append(note_generated)
        # The note generated is put at the end of the input sequence
        input_sequence = np.append(input_sequence, indice)
        # The first note is removed from the input sequence
        input_sequence = input_sequence[1:len(input_sequence)]

    return prediction_output

unique_data = list(sorted(set(from_ndarrays_to_list(data_filtered)))) 
from_ind_to_element = dict((nb, element) for nb, element in enumerate(unique_data)) # from element to integer

ind = np.random.randint(0, len(X_test)-1)
input_sequence = X_test[ind]
nb_steps_gen = 100

music_generated = generate_music(lstm_2, nb_steps_gen, from_ind_to_element, input_sequence, size_vocab)

In [79]:
def from_notes_to_MIDI(music_generated, name, offset):
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern in music_generated:
        
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                
                cn=int(current_note)
                new_note = note.Note(cn)
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
                
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
            
        elif('rest' in pattern):
            new_rest = note.Rest(pattern)
            new_rest.offset = offset
            new_rest.storedInstrument = instrument.Piano() #???
            output_notes.append(new_rest)
            
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # increase offset each iteration so that notes do not stack
        offset += 0.5
    
    midi_stream = stream.Stream(output_notes)
    name_song = name+'.mid'
    midi_stream.write('midi', fp=name_song)
    print(name_song+" downloaded succesfully !")

name_song = 'lol_melody'
offset = 0.5
from_notes_to_MIDI(music_generated, name_song, offset)

lol_melody.mid downloaded succesfully !


## Hyper-parameters to optimize
- Frequency threshold (done with data exploration)
- time-step (window size)
- dropout_rate
- architecture (more complicated)
- Number of batch
- Number of epochs (PAY attention to the learning rate!)
- size of the layers

## Todo
- Work on hyper parameters
- Add rythm and pause to the feature extraction
- change function from element_to_midi accordingly
- structure of the report
- upoad library

In [72]:
from sklearn.metrics import f1_score

yhat_probs = lstm_1.predict(X_test, verbose=0)
yhat_classes = lstm_1.predict_classes(X_test, verbose=0)
y_pred = keras.utils.np_utils.to_categorical(yhat_classes, num_classes = size_vocab)
f1 = f1_score(y_test, y_pred, average='micro')
print('F1 score: %f' % f1)

F1 score: 0.004484
