In [1]:
import numpy as np
import sklearn as skl
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_validate
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn import preprocessing
from sklearn.metrics import f1_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Dense
import tensorboard
from keras.optimizers import SGD
import keras.backend as K

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [2]:
def read_csv(url):
    """
    Args:
        url (string): the url of the file
    Returns:
        df: the dataframe filled
    """
    df = pd.read_csv(url, header=None)
    df.head()
    return df

In [3]:
music_class_binary = {'BIG_BAND':[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
          'BLUES_CONTEMPORARY':  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
          'COUNTRY_TRADITIONAL': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
          'DANCE':               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
          'ELECTRONICA':         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
          'EXPERIMENTAL':        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
          'FOLK_INTERNATIONAL':  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
          'GOSPEL':              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
          'GRUNGE_EMO':          [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
          'HIP_HOP_RAP':         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          'JAZZ_CLASSIC':        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          'METAL_ALTERNATIVE':   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          'METAL_DEATH':         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          'METAL_HEAVY':         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          'POP_CONTEMPORARY':    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          'POP_INDIE':           [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          'POP_LATIN':           [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          'PUNK':                [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          'REGGAE':              [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          'RNB_SOUL':            [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          'ROCK_ALTERNATIVE':    [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          'ROCK_COLLEGE':        [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          'ROCK_CONTEMPORARY':   [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          'ROCK_HARD':           [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          'ROCK_NEO_PSYCHEDELIA':[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}

In [4]:
music_class = {'BIG_BAND':1,
          'BLUES_CONTEMPORARY':2,
          'COUNTRY_TRADITIONAL':3,
          'DANCE':4,
          'ELECTRONICA':5,
          'EXPERIMENTAL':6,
          'FOLK_INTERNATIONAL':7,
          'GOSPEL':8,
          'GRUNGE_EMO':9,
          'HIP_HOP_RAP':10,
          'JAZZ_CLASSIC':11,
          'METAL_ALTERNATIVE':12,
          'METAL_DEATH':13,
          'METAL_HEAVY':14,
          'POP_CONTEMPORARY':15,
          'POP_INDIE':16,
          'POP_LATIN':17,
          'PUNK':18,
          'REGGAE':19,
          'RNB_SOUL':20,
          'ROCK_ALTERNATIVE':21,
          'ROCK_COLLEGE':22,
          'ROCK_CONTEMPORARY':23,
          'ROCK_HARD':24,
          'ROCK_NEO_PSYCHEDELIA':25}

In [5]:
#Custom metrics function to calculate the F1 score
def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
def CreateMultiPerceptron(nb_layers,nb_perceptron,nb_iteration,learning_rate,nb_essai,path):
    
    
    #Extraire les données des ensembles 
    dataset = read_csv(path)
    dataset.head()
    print(dataset.shape[1])
    labels = dataset.loc[:,dataset.columns == (dataset.shape[1]-1)]
    labels = labels.to_numpy()
    y = []
    for e in labels:
        y.append(music_class_binary[e[0]])
    
    dataset = dataset.drop([0,1,(dataset.shape[1]-1)],axis=1)
    
    X = dataset.to_numpy()
    y = np.array(y)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.20)

    # scale the data : réduire le execution time
    scaling = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
    X_train = scaling.transform(X_train)
    X_val = scaling.transform(X_val)
    X_test = scaling.transform(X_test)
    
    
    #Create the model 
    model = Sequential()
    #First hidden layer with specified number of percpetrons 
    model.add(Dense(units=nb_perceptron, activation='relu', input_dim = X_train.shape[1]))
    
    for i in range(nb_layers-1):
        #Next hidden layers with specified number of percpetrons 
        model.add(Dense(units=nb_perceptron, activation='relu'))
    
    #Last layer, the activation layer with 2 outputs
    model.add(Dense(units = 25, activation='softmax'))
    
    #Compile the model
    sgd = SGD(lr=learning_rate)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy',f1])
    
    #3. Entraîner 
    log_dir  = './logs/'+str(nb_essai)+'/nn_64'
    # On va utiliser Tensorboard pour visualizer le progrès de l'entraînement
    tb_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

    history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
              epochs=nb_iteration, batch_size=100, callbacks=[tb_callback])
    
    # Plot training & validation accuracy values
    accuracy = pd.DataFrame(history.history['accuracy'])
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.show()
    print("Accuracy")
    print(accuracy.head(nb_iteration))
    
    # Plot training & validation loss values
    f1score = pd.DataFrame(history.history['f1'])
    plt.plot(history.history['f1'])
    plt.plot(history.history['val_f1'])
    plt.title('Model F1 score')
    plt.ylabel('F1 score')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.show()
    print("F1 score")
    print(f1score.head(nb_iteration))
    
    
    #4 Evaluer le modèle
    score = model.evaluate(X_test, y_test)
    print("---TEST---")
    print(score)