In [4]:
import tensorflow as tf
import numpy as np
import matplotlib.mlab
import scipy.io.wavfile
import scipy
import os
import time
from scipy import signal
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import sparse
sns.set()

In [5]:
def log_specgram(audio, sample_rate, window_size=20,
                 step_size=10, eps=1e-10):
    nperseg = int(round(window_size * sample_rate / 1e3))
    noverlap = int(round(step_size * sample_rate / 1e3))
    freqs, times, spec = signal.spectrogram(audio,
                                    fs=sample_rate,
                                    window='hann',
                                    nperseg=nperseg,
                                    noverlap=noverlap,
                                    detrend=False)
    return freqs, times, np.log(spec.T.astype(np.float32) + eps)

def pad_audio(samples, L=16000):
    if len(samples) >= L: return samples
    else: return np.pad(samples, pad_width=(L - len(samples), 0), mode='constant', constant_values=(0, 0))

def chop_audio(samples, L=16000, num=20):
    for i in range(num):
        beg = np.random.randint(0, len(samples) - L)
        yield samples[beg: beg + L]

In [6]:
folders = [i for i in os.listdir(os.getcwd())if i.find('.md') < 0 and i.find('.txt') < 0 and i.find('ipynb') < 0 and i.find('py')  < 0 and i.find('LICENSE') < 0 and i.find('_background_noise_') < 0]

In [7]:
new_sample_rate = 8000
Y = []
X = []
for i in folders:
    print(i)
    for k in os.listdir(os.getcwd()+'/'+i):
        sample_rate, samples = scipy.io.wavfile.read(os.path.join(os.getcwd(), i, k))
        samples = pad_audio(samples)
        if len(samples) > 16000:
            n_samples = chop_audio(samples)
        else: n_samples = [samples]
        for samples in n_samples:
            resampled = signal.resample(samples, int(new_sample_rate / sample_rate * samples.shape[0]))
            _, _, specgram = log_specgram(resampled, sample_rate=new_sample_rate)
            Y.append(i)
            X.append(scipy.misc.imresize(specgram,[45, 40]).flatten())

yes
marvin
off
bed
house
up
six
go
four
nine
left
no
three
wow
sheila
right
on
five
seven
zero
stop
one
down
bird
tree
eight
dog
two
cat


In [8]:
X = np.array(X)
print(X.shape)
len(Y)

(62979, 1800)


62979

In [9]:
import xgboost as xgb
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelEncoder
labels = np.unique(Y)
target = LabelEncoder().fit_transform(Y)
train_X, test_X, train_Y, test_Y = train_test_split(X, target, test_size = 0.2)



In [None]:
train_d = xgb.DMatrix(train_X, train_Y)
test_d = xgb.DMatrix(test_X, test_Y)
params_xgd = {
    'min_child_weight': 10.0,
    'max_depth': 7,
    'objective': 'multi:softprob',
    'max_delta_step': 1.8,
    'num_class': len(labels),
    'colsample_bytree': 0.4,
    'subsample': 0.8,
    'learning_rate': 0.1,
    'gamma': 0.65,
    'silent':False,
    'eval_metric': 'mlogloss',
    'tree_method':'gpu_hist'
}
model = xgb.train(params_xgd, train_d, 10000, evals=[(test_d, 'validation')], 
                  early_stopping_rounds=100, verbose_eval=5)

[0]	validation-mlogloss:3.2758
Will train until validation-mlogloss hasn't improved in 100 rounds.
[5]	validation-mlogloss:2.85859
[10]	validation-mlogloss:2.50898
[15]	validation-mlogloss:2.23018
[20]	validation-mlogloss:2.02854
[25]	validation-mlogloss:1.87272
[30]	validation-mlogloss:1.75218
[35]	validation-mlogloss:1.65266
[40]	validation-mlogloss:1.57137
[45]	validation-mlogloss:1.50541
[50]	validation-mlogloss:1.44853
[55]	validation-mlogloss:1.39895
[60]	validation-mlogloss:1.35449
[65]	validation-mlogloss:1.31792
[70]	validation-mlogloss:1.28513
[75]	validation-mlogloss:1.25347
[80]	validation-mlogloss:1.22769
[85]	validation-mlogloss:1.20271
[90]	validation-mlogloss:1.17994
[110]	validation-mlogloss:1.1072
[115]	validation-mlogloss:1.09192
[120]	validation-mlogloss:1.07794
[125]	validation-mlogloss:1.06526
[130]	validation-mlogloss:1.05304
[135]	validation-mlogloss:1.04167
[140]	validation-mlogloss:1.03059
[145]	validation-mlogloss:1.02016
[150]	validation-mlogloss:1.01023
[15

[1185]	validation-mlogloss:0.71189
[1190]	validation-mlogloss:0.711869
[1195]	validation-mlogloss:0.711838
[1200]	validation-mlogloss:0.711769
[1205]	validation-mlogloss:0.711646
[1210]	validation-mlogloss:0.711689
[1215]	validation-mlogloss:0.711609
[1220]	validation-mlogloss:0.711547
[1225]	validation-mlogloss:0.711521
[1230]	validation-mlogloss:0.711589
[1235]	validation-mlogloss:0.711663
[1240]	validation-mlogloss:0.711674
[1245]	validation-mlogloss:0.71156
[1250]	validation-mlogloss:0.711497
[1255]	validation-mlogloss:0.711361
[1260]	validation-mlogloss:0.711408
[1265]	validation-mlogloss:0.711342
[1270]	validation-mlogloss:0.711376
[1275]	validation-mlogloss:0.711298
[1280]	validation-mlogloss:0.711231
[1285]	validation-mlogloss:0.711108
[1290]	validation-mlogloss:0.711031
[1295]	validation-mlogloss:0.711023
[1300]	validation-mlogloss:0.710938
[1305]	validation-mlogloss:0.710947
[1310]	validation-mlogloss:0.710972
[1315]	validation-mlogloss:0.710945
[1320]	validation-mlogloss:0.7

In [16]:
predicted = np.argmax(model.predict(xgb.DMatrix(test_X),ntree_limit=model.best_ntree_limit),axis=1)
print('accuracy validation set: ', np.mean(predicted == test_Y))

# print scores
print(metrics.classification_report(test_Y, predicted, target_names = labels))

accuracy validation set:  0.801047951731
             precision    recall  f1-score   support

        bed       0.71      0.78      0.74       330
       bird       0.85      0.83      0.84       348
        cat       0.81      0.85      0.83       362
        dog       0.76      0.70      0.73       351
       down       0.77      0.74      0.75       442
      eight       0.83      0.86      0.85       485
       five       0.80      0.77      0.78       478
       four       0.83      0.88      0.86       460
         go       0.68      0.69      0.69       460
      house       0.90      0.82      0.86       368
       left       0.81      0.78      0.79       495
     marvin       0.87      0.80      0.84       363
       nine       0.81      0.80      0.80       455
         no       0.76      0.74      0.75       468
        off       0.77      0.78      0.78       464
         on       0.78      0.74      0.76       447
        one       0.76      0.82      0.79       418
    