In [1]:
from glob import glob
import numpy as np
data_path = "/Users/soltan/Programs/kaggle/raw_data/train/train/*"

import librosa
def extract_features(file_name):
    X, sample_rate = librosa.load(file_name, res_type='kaiser_fast')

    # Generate Mel-frequency cepstral coefficients (MFCCs) from a time series
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)

    # Generates a Short-time Fourier transform (STFT) to use in the chroma_stft
    stft = np.abs(librosa.stft(X))

    # Computes a chromagram from a waveform or power spectrogram.
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)

    # Computes a mel-scaled spectrogram.
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)

    # Computes spectral contrast
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)

    # Computes the tonal centroid features (tonnetz)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),sr=sample_rate).T,axis=0)

    return np.concatenate([mfccs, chroma, mel, contrast, tonnetz])

In [2]:
from tqdm import tqdm
data_dir = np.array(glob(data_path))
features, labels = [], []
for file in tqdm(data_dir):
    file_name = file.split("/")[-1]
    file_name = file.split(".")[0]
    name, label = file_name.split("-")[0], file_name.split("-")[1]
    features.append(extract_features(file))
    labels.append(label)



100%|██████████| 9000/9000 [18:03<00:00,  8.31it/s]


In [3]:
from sklearn.model_selection import train_test_split
inputs_train, inputs_test, targets_train, targets_test = train_test_split(features, labels, test_size=0.2)

In [4]:
from sklearn import svm

clf = svm.SVC()
clf.fit(inputs_train, targets_train)


SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [5]:
from sklearn import metrics
predictions = clf.predict(inputs_test)
print("Accuracy:",metrics.accuracy_score(targets_test, predictions))


Accuracy: 0.645


In [13]:
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [0.1,1, 10, 100], 'gamma': [1,0.1,0.01,0.001],'kernel': ['rbf', 'poly', 'sigmoid']}
grid = GridSearchCV(svm.SVC(),param_grid,refit=True,verbose=1000, n_jobs=16)
grid.fit(inputs_train,targets_train)

Fitting 5 folds for each of 48 candidates, totalling 240 fits
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   1 tasks      | elapsed:   24.8s
[Parallel(n_jobs=16)]: Done   2 tasks      | elapsed:   24.8s
[Parallel(n_jobs=16)]: Done   3 tasks      | elapsed:   24.8s
[Parallel(n_jobs=16)]: Done   4 tasks      | elapsed:   24.8s
[Parallel(n_jobs=16)]: Done   5 tasks      | elapsed:   24.8s
[Parallel(n_jobs=16)]: Done   6 tasks      | elapsed:   24.8s
[Parallel(n_jobs=16)]: Done   7 tasks      | elapsed:   24.8s
[Parallel(n_jobs=16)]: Done   8 tasks      | elapsed:   24.8s
[Parallel(n_jobs=16)]: Done   9 tasks      | elapsed:   24.8s
[Parallel(n_jobs=16)]: Done  10 tasks      | elapsed:   24.8s
[Parallel(n_jobs=16)]: Done  11 tasks      | elapsed:   24.8s
[Parallel(n_jobs=16)]: Done  12 tasks      | elapsed:   24.8s
[Parallel(n_jobs=16)]: Done  13 tasks      | elapsed:   24.8s
[Parallel(n_jobs=16)]: Done  14 tasks      | elapsed:  

KeyboardInterrupt: 

In [12]:
grid_predictions = grid.predict(inputs_test)
print("Grid Accuracy:",metrics.accuracy_score(targets_test, grid_predictions))

NotFittedError: This GridSearchCV instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.

In [8]:
test_path = "/Users/soltan/Programs/kaggle/raw_data/test/test/*"
from tqdm import tqdm
test_dir = np.array(glob(test_path))

fout = open("../submission.txt", "w")

for file in tqdm(test_dir):
    name = file.split("/")[-1]
    ft = extract_features(file)
    pred = grid.predict([ft])[0]
    fout.write("{},{}\n".format(name, pred))

fout.close()


100%|██████████| 3000/3000 [05:32<00:00,  9.03it/s]
