In [1]:
import os
import os.path as osp
import sys
from itertools import combinations
sys.path.append(osp.abspath('..'))

import numpy as np
import scipy
from tqdm import tqdm_notebook
from sklearn.calibration import CalibratedClassifierCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import scale
from sklearn.svm import LinearSVC, SVC


import config
from datasets.gtzan import GTZAN_MFCC

%load_ext autoreload
%autoreload 2

In [17]:
# Random seeds
np.random.seed(1234)
MIN_SEGMENTS = -1
TEST_SEGMENTS = -1

In [18]:
train_set = GTZAN_MFCC(phase='val', test_size=0.1, val_size=0.4,
                       min_segments=MIN_SEGMENTS, randomized=True)
test_set = GTZAN_MFCC(phase='test', test_size=0.1, val_size=0.4, min_segments=TEST_SEGMENTS)

_, C, F = train_set[0][0].shape
print('Train:', len(train_set))
print('Test:', len(test_set))
print('Shape:', train_set[0][0].shape)
print('Shape:', test_set[0][0].shape)

Train: 400
Test: 100
Shape: (1, 42, 1997)
Shape: (1, 42, 1997)


In [20]:
def concatenate(dataset):
    X = []
    Y = []
    for sample_idx, (x, y) in tqdm_notebook(enumerate(dataset), total=len(dataset)):
        N, C, F = x.shape
        x = x.transpose(0, 2, 1).reshape(N * F, C)
#         mean = x.mean(axis=2)
#         std = x.std(axis=2)
#         max = x.max(axis=2)
#         x = np.hstack((mean, std, max))
        X.append(x)
        Y.append(np.array(y).repeat(N * F))
    X = np.vstack(X)
    Y = np.array(Y).flatten()
    return X, Y

x_train, y_train = concatenate(train_set)
x_test, y_test = concatenate(test_set)
print(x_train.shape)
print(x_test.shape)

HBox(children=(IntProgress(value=0, max=400), HTML(value='')))

HBox(children=(IntProgress(value=0), HTML(value='')))

(798800, 42)
(199700, 42)


In [21]:
params = {
    'C': [10**x for x in range(-3, 3)],
    #'gamma': [10**x for x in range(-4, 0)],
    'random_state': [1234],
    #'solver': ['sag'],
    #'max_iter': [1000],
}

CLF_CLASS = LinearSVC

#rs = GridSearchCV(LogisticRegression(), params, n_jobs=-1, cv=3, verbose=2)
rs = GridSearchCV(CLF_CLASS(), params, n_jobs=-1, cv=3, verbose=2)
rs.fit(x_train, y_train)
print(rs.best_score_)
print(rs.best_params_)

Fitting 3 folds for each of 6 candidates, totalling 18 fits
[CV] C=0.001, random_state=1234 ......................................
[CV] C=0.001, random_state=1234 ......................................
[CV] C=0.001, random_state=1234 ......................................
[CV] C=0.01, random_state=1234 .......................................
[CV] C=0.01, random_state=1234 .......................................
[CV] C=0.01, random_state=1234 .......................................
[CV] C=0.1, random_state=1234 ........................................
[CV] C=0.1, random_state=1234 ........................................
[CV] C=0.1, random_state=1234 ........................................
[CV] C=1, random_state=1234 ..........................................
[CV] C=1, random_state=1234 ..........................................
[CV] C=1, random_state=1234 ..........................................
[CV] C=10, random_state=1234 .........................................
[CV] C=10, random

KeyboardInterrupt: 

In [None]:
%%time
if not hasattr(CLF_CLASS, 'predict_proba'):
    clf = CalibratedClassifierCV(CLF_CLASS(**rs.best_params_))
else:
    clf = CLF_CLASS(**rs.best_params_, probability=True)
clf.fit(x_train, y_train)

In [None]:
MIN_SEGMENTS = 1997
y_train_ = y_train[::MIN_SEGMENTS]
y_train_pred = clf.predict_proba(x_train).reshape((len(train_set), MIN_SEGMENTS, 10))
y_train_pred = y_train_pred.sum(axis=1)
y_train_pred = np.argmax(y_train_pred, axis=1)
print('Train score:', accuracy_score(y_train_, y_train_pred))

y_test_ = y_test[::TEST_SEGMENTS]
y_test_pred = clf.predict_proba(x_test).reshape((len(test_set), TEST_SEGMENTS, 10))
y_test_pred = y_test_pred.sum(axis=1)
y_test_pred = np.argmax(y_test_pred, axis=1)
print('Test score:', accuracy_score(y_test_, y_test_pred))