# Keyword Spotting baseline classifier

In [6]:
import numpy as np
np.random.seed(1234)
from utils import load_dataset
import sklearn
print(f'{sklearn.__version__ = }')

sklearn.__version__ = '1.2.0'


## Data loading

In [7]:
data_dir = '/mnt/speechdataset/processed_data'
keywords = ['bed', 'down', 'forward', 'house', 'nine', 'one', 'six', 'tree', 'visual', 'bird', 'eight', 'four', 'learn', 'no',
            'right', 'stop', 'two', 'wow', 'cat', 'five', 'go', 'left', 'off', 'seven', 'up', 'yes', 'backward', 'dog', 
            'follow', 'happy', 'marvin', 'on', 'sheila', 'three', 'zero']

X_train, Y_train, X_test, Y_test = load_dataset(data_dir, keywords, nfilt=20) # Time x Features x Number

In [8]:
print(X_train.shape, Y_train.shape)

(99, 39, 89954) (89954,)


## PCA + SVM as a baseline

In [15]:
from sklearn.decomposition import PCA
from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsRestClassifier
import time

kept_dimensions = int(0.1 * (X_train.shape[0] * X_train.shape[1])) 
pca = PCA(kept_dimensions)
# Need Number x (Time * Features) shape for PCA
Z_train = pca.fit_transform(X_train.transpose(2, 0, 1).reshape(X_train.shape[2], X_train.shape[0] * X_train.shape[1]))
print(f'Variance explained with {kept_dimensions} dimensions: {100 * sum(pca.explained_variance_ratio_):2.2f} %')
Z_test = pca.transform(X_test.transpose(2, 0, 1).reshape(X_test.shape[2], X_test.shape[0] * X_test.shape[1]))
# SVM for multiclass One-vs-Rest
svm = LinearSVC()
baseline_classifier = OneVsRestClassifier(svm, verbose=5, n_jobs=15)
baseline_classifier.fit(Z_train, Y_train)
prediction = baseline_classifier.predict(Z_test)
# Measure performance
count = 0
for idx, label in enumerate(prediction):
    if label == Y_test[idx]:
        count += 1
acc = 100 * count / len(Y_test)
time.sleep(3)
print(f'PCA + SVM accuracy:\t {acc:2.2f} %')
print(f'Random label accuracy:\t {100 / len(keywords):2.2f} %')

Variance explained with 386 dimensions: 94.81 %


[Parallel(n_jobs=15)]: Using backend LokyBackend with 15 concurrent workers.
[Parallel(n_jobs=15)]: Done  14 out of  35 | elapsed:  5.5min remaining:  8.3min
[Parallel(n_jobs=15)]: Done  22 out of  35 | elapsed: 10.6min remaining:  6.3min
[Parallel(n_jobs=15)]: Done  30 out of  35 | elapsed: 11.3min remaining:  1.9min
[Parallel(n_jobs=15)]: Done  35 out of  35 | elapsed: 13.4min finished


PCA + SVM accuracy:	 16.05 %
Random label accuracy:	 2.86 %






In [14]:
from sklearn.decomposition import PCA
from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsRestClassifier
import time

kept_dimensions = int(0.01 * (X_train.shape[0] * X_train.shape[1])) 
pca = PCA(kept_dimensions)
# Need Number x (Time * Features) shape for PCA
Z_train = pca.fit_transform(X_train.transpose(2, 0, 1).reshape(X_train.shape[2], X_train.shape[0] * X_train.shape[1]))
print(f'Variance explained with {kept_dimensions} dimensions: {100 * sum(pca.explained_variance_ratio_):2.2f} %')
Z_test = pca.transform(X_test.transpose(2, 0, 1).reshape(X_test.shape[2], X_test.shape[0] * X_test.shape[1]))
# SVM for multiclass One-vs-Rest
svm = LinearSVC()
baseline_classifier = OneVsRestClassifier(svm, verbose=5, n_jobs=15)
baseline_classifier.fit(Z_train, Y_train)
prediction = baseline_classifier.predict(Z_test)
# Measure performance
count = 0
for idx, label in enumerate(prediction):
    if label == Y_test[idx]:
        count += 1
acc = 100 * count / len(Y_test)
time.sleep(3)
print(f'PCA + SVM accuracy:\t {acc:2.2f} %')
print(f'Random label accuracy:\t {100 / len(keywords):2.2f} %')

Variance explained with 38 dimensions: 59.97 %


[Parallel(n_jobs=15)]: Using backend LokyBackend with 15 concurrent workers.
[Parallel(n_jobs=15)]: Done  14 out of  35 | elapsed:  1.1min remaining:  1.7min
[Parallel(n_jobs=15)]: Done  22 out of  35 | elapsed:  1.5min remaining:   54.8s
[Parallel(n_jobs=15)]: Done  30 out of  35 | elapsed:  2.1min remaining:   21.4s
[Parallel(n_jobs=15)]: Done  35 out of  35 | elapsed:  2.2min finished


PCA + SVM accuracy:	 11.50 %
Random label accuracy:	 2.86 %
