In [1]:
from glob import glob
import numpy as np
data_path = "/Users/soltan/Programs/kaggle/raw_data/train/train/*"

import librosa
def extract_features(file_name):
    X, sample_rate = librosa.load(file_name, res_type='kaiser_fast')

    # Generate Mel-frequency cepstral coefficients (MFCCs) from a time series
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)

    # Generates a Short-time Fourier transform (STFT) to use in the chroma_stft
    stft = np.abs(librosa.stft(X))

    # Computes a chromagram from a waveform or power spectrogram.
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)

    # Computes a mel-scaled spectrogram.
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)

    # Computes spectral contrast
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)

    # Computes the tonal centroid features (tonnetz)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),sr=sample_rate).T,axis=0)

    return np.concatenate([mfccs, chroma, mel, contrast, tonnetz])


In [2]:
from tqdm import tqdm
data_dir = np.array(glob(data_path))
features, labels = [], []
for file in tqdm(data_dir):
    file_name = file.split("/")[-1]
    file_name = file.split(".")[0]
    name, label = file_name.split("-")[0], file_name.split("-")[1]
    features.append(extract_features(file))
    labels.append(label)


100%|██████████| 9000/9000 [18:55<00:00,  7.92it/s]


In [7]:
from sklearn.model_selection import train_test_split
inputs_train, inputs_test, targets_train, targets_test = train_test_split(features, labels, test_size=0.2)


#
# from sklearn.neighbors import KNeighborsClassifier
#
# model = KNeighborsClassifier(n_neighbors=17)
#model.fit(inputs_train, targets_train)

In [8]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
grid_params = {
    'n_neighbors': [3, 5, 7, 9, 11, 15],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

model = GridSearchCV(KNeighborsClassifier(), grid_params, n_jobs=10, verbose=1000)
model.fit(inputs_train, targets_train)

Fitting 5 folds for each of 24 candidates, totalling 120 fits
[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done   1 tasks      | elapsed:    1.6s
[Parallel(n_jobs=10)]: Done   2 tasks      | elapsed:    1.8s
[Parallel(n_jobs=10)]: Done   3 tasks      | elapsed:    2.3s
[Parallel(n_jobs=10)]: Done   4 tasks      | elapsed:    2.7s
[Parallel(n_jobs=10)]: Done   5 tasks      | elapsed:    3.1s
[Parallel(n_jobs=10)]: Done   6 tasks      | elapsed:    3.5s
[Parallel(n_jobs=10)]: Done   7 tasks      | elapsed:    3.8s
[Parallel(n_jobs=10)]: Done   8 tasks      | elapsed:    4.2s
[Parallel(n_jobs=10)]: Done   9 tasks      | elapsed:    4.6s
[Parallel(n_jobs=10)]: Done  10 tasks      | elapsed:    5.0s
[Parallel(n_jobs=10)]: Done  11 tasks      | elapsed:    5.6s
[Parallel(n_jobs=10)]: Done  12 tasks      | elapsed:    5.8s
[Parallel(n_jobs=10)]: Done  13 tasks      | elapsed:    6.3s
[Parallel(n_jobs=10)]: Done  14 tasks      | elapsed:  

GridSearchCV(cv=None, error_score=nan,
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='deprecated', n_jobs=10,
             param_grid={'metric': ['euclidean', 'manhattan'],
                         'n_neighbors': [3, 5, 7, 9, 11, 15],
                         'weights': ['uniform', 'distance']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=1000)

In [9]:
from sklearn import metrics
predictions = model.predict(inputs_test)
print("Accuracy:",metrics.accuracy_score(targets_test, predictions))


Accuracy: 0.6272222222222222
