In [16]:
import time
import os
import ast

import IPython.display as ipd
from tqdm import tqdm_notebook
import numpy as np
import pandas as pd

import keras
from keras.layers import Activation, Dense, Conv1D, Conv2D, MaxPooling1D, Flatten, Reshape

from sklearn.utils import shuffle
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder, StandardScaler, LabelBinarizer
from sklearn.linear_model import LogisticRegression

from sklearn import linear_model

import utils

### Constants

In [17]:
AUDIO_DIR = "..\\fma_small"
META_DIR = "..\\fma_metadata"
SUBSET = 'small'

### Load data

In [18]:
# Load metadata to memory
tracks   = utils.load(META_DIR + '\\tracks.csv')
features = utils.load(META_DIR + '\\features.csv')
echonest = utils.load(META_DIR + '\\echonest.csv')
#genres = utils.load(META_DIR + 'genres.csv')

np.testing.assert_array_equal(features.index, tracks.index)
assert echonest.index.isin(tracks.index).all()

In [19]:
# Choose Subset
subset = tracks.index[tracks['set', 'subset'] <= 'small']

assert subset.isin(tracks.index).all()
assert subset.isin(features.index).all()

features_all = features.join(echonest, how='inner').sort_index(axis=1)
print('Not enough Echonest features: {}'.format(features_all.shape))

tracks = tracks.loc[subset]
features_all = features.loc[subset]

tracks.shape, features_all.shape

Not enough Echonest features: (13129, 767)


((8000, 52), (8000, 518))

In [20]:
# Splitting into Train, Validation, Test

train = tracks.index[tracks['set', 'split'] == 'training']
val   = tracks.index[tracks['set', 'split'] == 'validation']
test  = tracks.index[tracks['set', 'split'] == 'test']

In [21]:
print('{} training examples\n{} validation examples\n{} testing examples'.format(*map(len, [train, val, test])))

genres = list(MultiLabelBinarizer().fit(tracks['track', 'genre_top']).classes_)
print('Top genres ({}): {}'.format(len(genres), genres))
#genres = list(MultiLabelBinarizer().fit(tracks['track', 'genres_all']).classes_)
#print('All genres ({}): {}'.format(len(genres), genres))


6400 training examples
800 validation examples
800 testing examples
Top genres (22): ['-', 'E', 'F', 'H', 'I', 'P', 'R', 'a', 'c', 'e', 'i', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'x']


### Deep Learning Model No Audio

In [22]:
features_all.shape

(8000, 518)

In [23]:
def only_feature_nn(generes, features = 518):
    model = keras.models.Sequential()
    model.add(Dense(units=1000, input_shape=(features,)))
    model.add(Activation("relu"))
    model.add(Dense(units=200))
    model.add(Activation("relu"))
    model.add(Dense(units=generes))
    model.add(Activation("softmax"))
    
    optimizer = keras.optimizers.Adam()
    model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model
    

In [24]:
#labels_onehot = MultiLabelBinarizer().fit_transform(tracks['track', 'genre_top'])

In [25]:
lb = LabelBinarizer()
one_hot = lb.fit_transform(X=tracks['track', 'genre_top'])

In [11]:
print(features_all.values.shape)
print(one_hot.shape)

(8000, 518)
(8000, 8)


In [15]:
X = features_all.values
Y = one_hot

In [12]:
params = {
    
}
keras.backend.clear_session()
model = only_feature_nn(one_hot.shape[1])

history = model.fit(x =, y =  , batch_size=64, epochs=20, **params, verbose = 2)

Epoch 1/20
 - 2s - loss: 14.1033 - acc: 0.1250
Epoch 2/20
 - 1s - loss: 14.1033 - acc: 0.1250
Epoch 3/20
 - 2s - loss: 14.1033 - acc: 0.1250
Epoch 4/20
 - 2s - loss: 14.1033 - acc: 0.1250
Epoch 5/20
 - 2s - loss: 14.1033 - acc: 0.1250
Epoch 6/20
 - 2s - loss: 14.1033 - acc: 0.1250
Epoch 7/20
 - 2s - loss: 14.1033 - acc: 0.1250
Epoch 8/20
 - 1s - loss: 14.1033 - acc: 0.1250
Epoch 9/20
 - 1s - loss: 14.1033 - acc: 0.1250
Epoch 10/20
 - 1s - loss: 14.1033 - acc: 0.1250
Epoch 11/20
 - 1s - loss: 14.1033 - acc: 0.1250
Epoch 12/20
 - 2s - loss: 14.1033 - acc: 0.1250
Epoch 13/20
 - 1s - loss: 14.1033 - acc: 0.1250
Epoch 14/20
 - 1s - loss: 14.1033 - acc: 0.1250
Epoch 15/20
 - 1s - loss: 14.1033 - acc: 0.1250
Epoch 16/20
 - 1s - loss: 14.1033 - acc: 0.1250
Epoch 17/20
 - 2s - loss: 14.1033 - acc: 0.1250
Epoch 18/20
 - 1s - loss: 14.1033 - acc: 0.1250
Epoch 19/20
 - 2s - loss: 14.1033 - acc: 0.1250
Epoch 20/20
 - 1s - loss: 14.1033 - acc: 0.1250


In [45]:
logreg = linear_model.LogisticRegression(verbose=2, max_iter=10)


In [37]:
logreg.fit(X, tracks['track', 'genre_top'])

[LibLinear]



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=2, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=2, warm_start=False)

In [44]:
 np.mean(tracks['track', 'genre_top'] == logreg.predict(X))

0.207875

array([0, 0, 0, 1, 0, 0, 0, 0])

### Deep Learning Model Using Audio

In [None]:
labels_onehot = MultiLabelBinarizer().fit_transform(tracks['track', 'genre_top'])
labels_onehot = pd.DataFrame(labels_onehot, index=tracks.index)

In [None]:
# Sanitation Test, Just making sure it works
utils.FfmpegLoader().load(utils.get_audio_path(AUDIO_DIR, 2))
SampleLoader = utils.build_sample_loader(AUDIO_DIR, labels_onehot, utils.FfmpegLoader())
SampleLoader(train, batch_size=2).__next__()[0].shape

In [None]:
params = {
    
}

In [None]:
def basic_fully_connected(loader, labels_onehot):
    model = keras.models.Sequential()
    model.add(Dense(units=1000, input_shape=loader.shape))
    model.add(Activation("relu"))
    model.add(Dense(units=100))
    model.add(Activation("relu"))
    model.add(Dense(units=labels_onehot.shape[1]))
    model.add(Activation("softmax"))
    
    optimizer = keras.optimizers.SGD(lr=0.1, momentum=0.9, nesterov=True)
    model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model


In [None]:
loader = utils.FfmpegLoader(sampling_rate=2000)
SampleLoader = utils.build_sample_loader(AUDIO_DIR, labels_onehot, loader)
print('Dimensionality: {}'.format(loader.shape))

In [None]:
keras.backend.clear_session()

model = basic_fully_connected(loader, labels_onehot)
model.fit_generator(SampleLoader(train, batch_size=64), train.size/100, epochs=2, **params)

In [None]:
loss = model.evaluate_generator(SampleLoader(val, batch_size=64), val.size, **params)
loss = model.evaluate_generator(SampleLoader(test, batch_size=64), test.size, **params)