In [1]:
import time
import os

import IPython.display as ipd
from tqdm import tqdm_notebook
import numpy as np
import pandas as pd
import keras
from keras.layers import Activation, Dense, Conv1D, Conv2D, MaxPooling1D, Flatten, Reshape

from sklearn.utils import shuffle
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
#from sklearn.gaussian_process import GaussianProcessClassifier
#from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.multiclass import OneVsRestClassifier

import utils

Using TensorFlow backend.


In [2]:
AUDIO_DIR = os.environ.get('AUDIO_DIR')

tracks = utils.load('tracks.csv')
features = utils.load('features.csv')
echonest = utils.load('echonest.csv')

np.testing.assert_array_equal(features.index, tracks.index)
assert echonest.index.isin(tracks.index).all()

tracks.shape, features.shape, echonest.shape

((106574, 52), (106574, 518), (13129, 249))

In [3]:
subset = tracks.index[tracks['set', 'subset'] <= 'small']

assert subset.isin(tracks.index).all()
assert subset.isin(features.index).all()

features_all = features.join(echonest, how='inner').sort_index(axis=1)
print('Not enough Echonest features: {}'.format(features_all.shape))

tracks = tracks.loc[subset]
features_all = features.loc[subset]

tracks.shape, features_all.shape

Not enough Echonest features: (13129, 767)


((8000, 52), (8000, 518))

In [4]:
train = tracks.index[tracks['set', 'split'] == 'training']
val = tracks.index[tracks['set', 'split'] == 'validation']
test = tracks.index[tracks['set', 'split'] == 'test']

print('{} training examples, {} validation examples, {} testing examples'.format(*map(len, [train, val, test])))

genres = list(MultiLabelBinarizer().fit(tracks['track', 'genre_top']).classes_)
#genres = list(tracks['track', 'genre_top'].unique())
print('Top genres ({}): {}'.format(len(genres), genres))
genres = list(MultiLabelBinarizer().fit(tracks['track', 'genres_all']).classes_)
print('All genres ({}): {}'.format(len(genres), genres))

6400 training examples, 800 validation examples, 800 testing examples
Top genres (22): ['-', 'E', 'F', 'H', 'I', 'P', 'R', 'a', 'c', 'e', 'i', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'x']
All genres (114): [1, 2, 6, 10, 12, 15, 16, 17, 18, 21, 22, 25, 26, 27, 30, 31, 32, 33, 36, 38, 41, 42, 45, 46, 47, 49, 53, 58, 64, 66, 70, 71, 76, 77, 79, 81, 83, 85, 86, 88, 89, 90, 92, 94, 98, 100, 101, 102, 103, 107, 109, 111, 113, 117, 118, 125, 130, 167, 171, 172, 174, 177, 180, 181, 182, 183, 184, 185, 186, 214, 224, 232, 236, 240, 247, 250, 267, 286, 296, 297, 314, 337, 359, 360, 361, 362, 400, 401, 404, 439, 440, 456, 468, 491, 495, 502, 504, 514, 524, 538, 539, 542, 580, 602, 619, 695, 741, 763, 808, 811, 1032, 1060, 1193, 1235]


In [5]:
labels_onehot = MultiLabelBinarizer().fit_transform(tracks['track', 'genre_top'])
labels_onehot = pd.DataFrame(labels_onehot, index=tracks.index)

In [6]:
# Just be sure that everything is fine. Multiprocessing is tricky to debug.
utils.FfmpegLoader().load(utils.get_audio_path(AUDIO_DIR, 2))
SampleLoader = utils.build_sample_loader(AUDIO_DIR, labels_onehot, utils.FfmpegLoader())
SampleLoader(train, batch_size=2).__next__()[0].shape

(2, 1321967)

In [7]:
# Keras parameters.
#NB_WORKER = len(os.sched_getaffinity(0))  # number of usables CPUs
NB_WORKER = 3 # len(os.sched_getaffinity(0))  # number of usables CPUs
params = {'pickle_safe': False, 'nb_worker': NB_WORKER, 'max_q_size': 10}

In [8]:
#train = train[:10]

In [9]:
loader = utils.FfmpegLoader(sampling_rate=2000)
SampleLoader = utils.build_sample_loader(AUDIO_DIR, labels_onehot, loader)
print('Dimensionality: {}'.format(loader.shape))

keras.backend.clear_session()

model = keras.models.Sequential()
model.add(Dense(output_dim=1000, input_shape=loader.shape))
model.add(Activation("relu"))
model.add(Dense(output_dim=100))
model.add(Activation("relu"))
model.add(Dense(output_dim=labels_onehot.shape[1]))
model.add(Activation("softmax"))

optimizer = keras.optimizers.SGD(lr=0.1, momentum=0.9, nesterov=True)
model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

model.fit_generator(SampleLoader(train, batch_size=64), train.size, nb_epoch=1, **params)
loss = model.evaluate_generator(SampleLoader(val, batch_size=64), val.size, **params)
loss = model.evaluate_generator(SampleLoader(test, batch_size=64), test.size, **params)
#Y = model.predict_generator(SampleLoader(test, batch_size=64), test.size, **params);

loss

Dimensionality: (59953,)




Epoch 1/1
  12/6400 [..............................] - ETA: 11:06:53 - loss: 108.0836 - acc: 0.0286133297
  18/6400 [..............................] - ETA: 9:44:59 - loss: 105.8451 - acc: 0.0191 17637
99134
  21/6400 [..............................] - ETA: 9:27:48 - loss: 104.7078 - acc: 0.016455783
17635
  24/6400 [..............................] - ETA: 9:03:22 - loss: 104.5789 - acc: 0.014317636
  30/6400 [..............................] - ETA: 8:57:57 - loss: 104.9188 - acc: 0.011554576
  36/6400 [..............................] - ETA: 8:50:48 - loss: 104.8516 - acc: 0.009554578
  43/6400 [..............................] - ETA: 8:57:54 - loss: 104.8965 - acc: 0.008017634
  60/6400 [..............................] - ETA: 8:20:13 - loss: 104.7306 - acc: 0.005798569
  75/6400 [..............................] - ETA: 8:04:03 - loss: 103.9523 - acc: 0.004698565
  77/6400 [..............................] - ETA: 8:03:43 - loss: 103.9309 - acc: 0.004598567
  87/6400 [........................

ERROR: Internal Python error in the inspect module.
Below is the traceback from this internal error.


KeyboardInterrupt


In [10]:
loader = utils.FfmpegLoader(sampling_rate=16000)
#loader = utils.LibrosaLoader(sampling_rate=16000)
SampleLoader = utils.build_sample_loader(AUDIO_DIR, labels_onehot, loader)

keras.backend.clear_session()

model = keras.models.Sequential()
model.add(Reshape((-1, 1), input_shape=loader.shape))
print(model.output_shape)

model.add(Conv1D(128, 512, subsample_length=512))
print(model.output_shape)
model.add(Activation("relu"))

model.add(Conv1D(32, 8))
print(model.output_shape)
model.add(Activation("relu"))
model.add(MaxPooling1D(4))

model.add(Conv1D(32, 8))
print(model.output_shape)
model.add(Activation("relu"))
model.add(MaxPooling1D(4))

print(model.output_shape)
#model.add(Dropout(0.25))
model.add(Flatten())
print(model.output_shape)
model.add(Dense(100))
model.add(Activation("relu"))
print(model.output_shape)
model.add(Dense(labels_onehot.shape[1]))
model.add(Activation("softmax"))
print(model.output_shape)

optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)
#optimizer = keras.optimizers.Adam()#lr=1e-5)#, momentum=0.9, nesterov=True)
model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

model.fit_generator(SampleLoader(train, batch_size=10), train.size, nb_epoch=1, **params)
loss = model.evaluate_generator(SampleLoader(val, batch_size=10), val.size, **params)
loss = model.evaluate_generator(SampleLoader(test, batch_size=10), test.size, **params)

loss

(None, 479625, 1)




(None, 936, 128)
(None, 929, 32)
(None, 225, 32)
(None, 56, 32)
(None, 1792)
(None, 100)
(None, 22)




Epoch 1/1
17637
  93/6400 [..............................] - ETA: 1:04:03 - loss: 101.5773 - acc: 0.0000e+00133297
  99/6400 [..............................] - ETA: 1:03:51 - loss: 101.1683 - acc: 0.0000e+0017635
 115/6400 [..............................] - ETA: 1:03:30 - loss: 100.5058 - acc: 0.0000e+0017631
 206/6400 [..............................] - ETA: 1:01:52 - loss: 102.6623 - acc: 0.0000e+0098569
 252/6400 [>.............................] - ETA: 1:01:09 - loss: 102.0040 - acc: 0.0000e+0099134
 277/6400 [>.............................] - ETA: 1:00:51 - loss: 102.1196 - acc: 0.0000e+0055783
 339/6400 [>.............................] - ETA: 1:00:07 - loss: 102.5611 - acc: 0.0000e+0054576
 362/6400 [>.............................] - ETA: 59:50 - loss: 102.5499 - acc: 0.0000e+0017636
 400/6400 [>.............................] - ETA: 59:23 - loss: 102.4947 - acc: 0.0000e+0054578
 465/6400 [=>............................] - ETA: 58:44 - loss: 102.1157 - acc: 0.0000e+00108925
 473/640



[102.7609229850769, 0.0]