# This notebook reproduces the FMA results on the small dataset.

In [13]:
import time
import os

import IPython.display as ipd
from tqdm import tqdm_notebook
import numpy as np
import pandas as pd
import keras
from keras.layers import Activation, Dense, Conv1D, Conv2D, MaxPooling1D, Flatten, Reshape

from sklearn.utils import shuffle
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder, LabelBinarizer, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.multiclass import OneVsRestClassifier

import utils 


# Loading in the data.
Download csv files from https://github.com/mdeff/fma/blob/master/utils.py

In [4]:
AUDIO_DIR = os.environ.get('AUDIO_DIR')

# loading in the data
tracks = utils.load('data/fma_metadata/tracks.csv')
features = utils.load('data/fma_metadata/features.csv')
echonest = utils.load('data/fma_metadata/echonest.csv')

# SUBSET

In [5]:
subset = tracks.index[tracks['set', 'subset'] <= 'small']

assert subset.isin(tracks.index).all()
assert subset.isin(features.index).all()

features_all = features.join(echonest, how='inner').sort_index(axis=1)
print('Not enough Echonest features: {}'.format(features_all.shape))

tracks = tracks.loc[subset]
features_all = features.loc[subset]

tracks.shape, features_all.shape

Not enough Echonest features: (13129, 767)


((8000, 52), (8000, 518))

In [8]:
train = tracks.index[tracks['set', 'split'] == 'training']
val = tracks.index[tracks['set', 'split'] == 'validation']
test = tracks.index[tracks['set', 'split'] == 'test']

print('{} training examples, {} validation examples, {} testing examples'.format(*map(len, [train, val, test])))

genres = list(LabelEncoder().fit(tracks['track', 'genre_top']).classes_)
print('Top genres ({}): {}'.format(len(genres), genres))
genres = list(MultiLabelBinarizer().fit(tracks['track', 'genres_all']).classes_)
print('All genres ({}): {}'.format(len(genres), genres))
     


6400 training examples, 800 validation examples, 800 testing examples
Top genres (8): ['Electronic', 'Experimental', 'Folk', 'Hip-Hop', 'Instrumental', 'International', 'Pop', 'Rock']
All genres (114): [1, 2, 6, 10, 12, 15, 16, 17, 18, 21, 22, 25, 26, 27, 30, 31, 32, 33, 36, 38, 41, 42, 45, 46, 47, 49, 53, 58, 64, 66, 70, 71, 76, 77, 79, 81, 83, 85, 86, 88, 89, 90, 92, 94, 98, 100, 101, 102, 103, 107, 109, 111, 113, 117, 118, 125, 130, 167, 171, 172, 174, 177, 180, 181, 182, 183, 184, 185, 186, 214, 224, 232, 236, 240, 247, 250, 267, 286, 296, 297, 314, 337, 359, 360, 361, 362, 400, 401, 404, 439, 440, 456, 468, 491, 495, 502, 504, 514, 524, 538, 539, 542, 580, 602, 619, 695, 741, 763, 808, 811, 1032, 1060, 1193, 1235]


# Data Pre Processing

In [15]:
def pre_process(tracks, features, columns, multi_label=False, verbose=False):
    if not multi_label:
        # Assign an integer value to each genre.
        enc = LabelEncoder()
        labels = tracks['track', 'genre_top']
        #y = enc.fit_transform(tracks['track', 'genre_top'])
    else:
        # Create an indicator matrix.
        enc = MultiLabelBinarizer()
        labels = tracks['track', 'genres_all']
        #labels = tracks['track', 'genres']

    # Split in training, validation and testing sets.
    y_train = enc.fit_transform(labels[train])
    y_val = enc.transform(labels[val])
    y_test = enc.transform(labels[test])
    X_train = features.loc[train, columns].to_numpy()
    X_val = features.loc[val, columns].to_numpy()
    X_test = features.loc[test, columns].to_numpy()
    
    X_train, y_train = shuffle(X_train, y_train, random_state=42)
    
    # Standardize features by removing the mean and scaling to unit variance.
    scaler = StandardScaler(copy=False)
    scaler.fit_transform(X_train)
    scaler.transform(X_val)
    scaler.transform(X_test)
    
    return y_train, y_val, y_test, X_train, X_val, X_test

In [16]:
def test_classifiers_features(classifiers, feature_sets, multi_label=False):
    columns = list(classifiers.keys()).insert(0, 'dim')
    scores = pd.DataFrame(columns=columns, index=feature_sets.keys())
    times = pd.DataFrame(columns=classifiers.keys(), index=feature_sets.keys())
    for fset_name, fset in tqdm_notebook(feature_sets.items(), desc='features'):
        y_train, y_val, y_test, X_train, X_val, X_test = pre_process(tracks, features_all, fset, multi_label)
        scores.loc[fset_name, 'dim'] = X_train.shape[1]
        for clf_name, clf in classifiers.items():
            t = time.process_time()
            clf.fit(X_train, y_train)
            score = clf.score(X_test, y_test)
            scores.loc[fset_name, clf_name] = score
            times.loc[fset_name, clf_name] = time.process_time() - t
    return scores, times

def format_scores(scores):
    def highlight(s):
        is_max = s == max(s[1:])
        return ['background-color: yellow' if v else '' for v in is_max]
    scores = scores.style.apply(highlight, axis=1)
    return scores.format('{:.2%}', subset=pd.IndexSlice[:, scores.columns[1]:])
     

classifiers = {
    'LR': LogisticRegression(),
    'kNN': KNeighborsClassifier(n_neighbors=200),
    'SVCrbf': SVC(kernel='rbf'),
    'SVCpoly1': SVC(kernel='poly', degree=1),
    'linSVC1': SVC(kernel="linear"),
    'linSVC2': LinearSVC(),
    #GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
    'DT': DecisionTreeClassifier(max_depth=5),
    'RF': RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    'AdaBoost': AdaBoostClassifier(n_estimators=10),
    'MLP1': MLPClassifier(hidden_layer_sizes=(100,), max_iter=2000),
    'MLP2': MLPClassifier(hidden_layer_sizes=(200, 50), max_iter=2000),
    'NB': GaussianNB(),
    'QDA': QuadraticDiscriminantAnalysis(),
}

feature_sets = {
#    'echonest_audio': ('echonest', 'audio_features'),
#    'echonest_social': ('echonest', 'social_features'),
#    'echonest_temporal': ('echonest', 'temporal_features'),
#    'echonest_audio/social': ('echonest', ('audio_features', 'social_features')),
#    'echonest_all': ('echonest', ('audio_features', 'social_features', 'temporal_features')),
}
for name in features.columns.levels[0]:
    feature_sets[name] = name
feature_sets.update({
    'mfcc/contrast': ['mfcc', 'spectral_contrast'],
    'mfcc/contrast/chroma': ['mfcc', 'spectral_contrast', 'chroma_cens'],
    'mfcc/contrast/centroid': ['mfcc', 'spectral_contrast', 'spectral_centroid'],
    'mfcc/contrast/chroma/centroid': ['mfcc', 'spectral_contrast', 'chroma_cens', 'spectral_centroid'],
    'mfcc/contrast/chroma/centroid/tonnetz': ['mfcc', 'spectral_contrast', 'chroma_cens', 'spectral_centroid', 'tonnetz'],
    'mfcc/contrast/chroma/centroid/zcr': ['mfcc', 'spectral_contrast', 'chroma_cens', 'spectral_centroid', 'zcr'],
    'all_non-echonest': list(features.columns.levels[0])
})

scores, times = test_classifiers_features(classifiers, feature_sets)

ipd.display(format_scores(scores))
ipd.display(times.style.format('{:.4f}'))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for fset_name, fset in tqdm_notebook(feature_sets.items(), desc='features'):


features:   0%|          | 0/18 [00:00<?, ?it/s]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Unnamed: 0,dim,LR,kNN,SVCrbf,SVCpoly1,linSVC1,linSVC2,DT,RF,AdaBoost,MLP1,MLP2,NB,QDA
chroma_cens,84.0,25.00%,22.88%,32.00%,26.25%,26.25%,25.25%,18.25%,23.12%,18.62%,24.12%,26.88%,18.62%,23.75%
chroma_cqt,84.0,27.38%,22.25%,29.25%,26.88%,28.62%,27.12%,22.38%,21.25%,22.50%,28.62%,24.62%,17.25%,13.88%
chroma_stft,84.0,33.25%,30.88%,37.62%,32.75%,32.75%,34.75%,26.50%,28.25%,27.00%,30.38%,30.38%,16.00%,17.00%
mfcc,140.0,42.12%,36.88%,46.38%,42.50%,41.62%,43.00%,29.25%,35.50%,29.25%,38.75%,40.50%,36.00%,39.50%
rmse,7.0,21.12%,21.00%,23.12%,21.75%,21.75%,23.38%,25.37%,22.25%,22.00%,22.88%,20.12%,19.50%,17.75%
spectral_bandwidth,7.0,31.87%,30.50%,31.37%,31.75%,32.00%,31.87%,28.50%,29.25%,26.25%,32.38%,27.12%,29.00%,28.00%
spectral_centroid,7.0,30.88%,30.63%,33.12%,32.25%,32.25%,31.50%,29.75%,31.62%,36.88%,31.25%,28.00%,25.75%,26.00%
spectral_contrast,49.0,36.12%,34.75%,40.00%,37.00%,37.38%,33.75%,26.25%,30.00%,30.25%,32.75%,31.75%,35.12%,33.88%
spectral_rolloff,7.0,28.38%,30.50%,31.50%,31.37%,31.87%,29.88%,28.62%,31.50%,29.12%,31.25%,29.38%,24.62%,23.88%
tonnetz,42.0,26.75%,21.75%,27.50%,27.00%,26.25%,27.62%,21.12%,22.75%,24.50%,25.75%,21.25%,22.88%,22.50%


Unnamed: 0,LR,kNN,SVCrbf,SVCpoly1,linSVC1,linSVC2,DT,RF,AdaBoost,MLP1,MLP2,NB,QDA
chroma_cens,6.3806,1.0747,6.2298,3.2916,9.4807,7.8964,2.1111,0.2795,0.5312,271.3821,96.9282,0.0447,1.1799
chroma_cqt,2.5646,0.9295,6.3453,3.2667,8.2892,7.5314,1.9684,0.2885,0.6878,200.6154,176.8117,0.0415,1.1215
chroma_stft,2.4814,0.8821,6.1962,3.2542,7.2614,7.5693,1.8801,0.296,0.7461,178.149,200.6519,0.0543,1.1091
mfcc,3.167,1.0377,5.3186,2.5163,13.9285,10.089,2.678,0.0335,0.8845,116.7093,62.1316,0.0628,2.4017
rmse,1.6697,0.6343,5.1493,2.368,2.299,1.8396,0.0199,0.0314,0.064,5.9567,169.559,0.0218,0.0633
spectral_bandwidth,1.3631,0.6891,5.1101,2.3544,2.3263,1.7747,0.0198,0.0316,0.0647,5.7295,128.8405,0.0226,0.063
spectral_centroid,1.6784,0.6434,4.9752,2.2501,2.2308,1.7982,0.0196,0.0315,0.064,9.322,173.414,0.0187,0.063
spectral_contrast,2.2421,0.871,5.1687,2.3902,4.6408,3.9578,1.3619,0.3407,1.0609,155.6729,113.2466,0.0348,0.5299
spectral_rolloff,1.6475,0.6898,5.0198,2.2998,2.2713,1.813,0.0162,0.0298,0.0555,8.3912,225.7938,0.0219,0.0629
tonnetz,2.1544,0.8195,5.7472,2.6937,5.2748,4.1111,0.904,0.2349,0.9743,97.3214,149.2235,0.025,0.3775
