In [1]:
import glob
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
%matplotlib inline
plt.style.use('ggplot')

plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 12
plt.rcParams['axes.labelsize'] = 11
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['xtick.labelsize'] = 10
plt.rcParams['ytick.labelsize'] = 10
plt.rcParams['legend.fontsize'] = 11
plt.rcParams['figure.titlesize'] = 13

In [2]:
list(map(lambda d: os.path.isdir(os.path.join('dataset', d)), os.listdir('dataset')))

[True, False, True, True, True, True, True]

In [3]:
os.listdir('dataset_tmp/right')

['0a7c2a8d_nohash_1.wav',
 '0ac15fe9_nohash_0.wav',
 '0a7c2a8d_nohash_0.wav',
 '0a9f9af7_nohash_0.wav',
 '0ab3b47d_nohash_0.wav']

In [23]:
class_labels = {
    'down' : 0,
    'left' : 1,
    'off'  : 2,
    'on'   : 3,
    'right': 4,
    'up'   : 5
}

num_classes = 6

def extract_feature(fn):
    
    X, _ = librosa.load(fn, sr=None)
    
    return np.pad(X, ((0),(16000 - len(X))), mode='constant', constant_values=0.0)
    
def parse_audio_files(data_dir, sample_size=10, file_ext='*.wav'):
    
    features, labels = np.empty((0, 16000)), []
    
    for sub_dir in os.listdir(data_dir):
        
        if not os.path.isdir(os.path.join(data_dir, sub_dir)):
            continue
        
        raw_label_all_zero = np.zeros(num_classes, dtype=np.int)
        raw_label_all_zero[class_labels[sub_dir]] = 1
        print("Working on %s" % sub_dir)
        
        for idx, fn in enumerate(glob.glob(os.path.join(data_dir, sub_dir, file_ext))):

            if idx >= sample_size:
                break
            
            features = np.vstack([features, extract_feature(fn)])
            labels.append(raw_label_all_zero)

    return np.array(features), np.array(labels)

In [18]:
data_dir = 'dataset'

features, labels = parse_audio_files(data_dir, sample_size=500)

Working on right
Working on left
Working on up
Working on down
Working on on
Working on off


In [24]:
print(features.shape)
print(labels.shape)

(3000, 16000)
(3000, 6)


In [40]:
# np.savetxt("nn_conv1d_features.csv", features, delimiter=",")
# np.savetxt("nn_conv1d_labels.csv", labels, delimiter=",")

In [25]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    features, labels, stratify=labels, train_size=.9, test_size=0.1)

# X_train, X_val, y_train, y_val = train_test_split(
#     X_not_test, y_not_rest, stratify=y_not_rest, train_size=.95)

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(2700, 16000)
(2700, 6)
(300, 16000)
(300, 6)


In [42]:
## Imports
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, Conv1D, MaxPooling1D
from keras import backend as K
from keras.utils import print_summary, plot_model

In [43]:
# Training parameters
batch_size = 100
num_classes = 6
epochs = 40
signal_length = 16000

X_train = X_train.reshape(X_train.shape[0], signal_length, 1)
X_test = X_test.reshape(X_test.shape[0], signal_length, 1)
# X_val = X_val.reshape(X_val.shape[0], signal_length, 1)
input_shape = (signal_length, 1)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(2700, 16000, 1)
(2700, 6)
(300, 16000, 1)
(300, 6)


In [50]:
from keras import regularizers
from keras.layers import BatchNormalization

def build_model():
    # Model definition
    model = Sequential()
    model.add(Conv1D(320, kernel_size=int(16000 * 0.01),
                     strides=int(16000 * 0.005),
                     activation='relu',
                     input_shape=input_shape))
#     model.add(MaxPooling1D())
    model.add(Dropout(0.5))
    
    model.add(Conv1D(640, kernel_size=4,
                     strides=1,
                     activation='relu'))
    model.add(MaxPooling1D())
    model.add(Dropout(0.5))
    
    model.add(Flatten())
    
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer='adam',
                  metrics=['accuracy'])
    
    return model

In [54]:
model = build_model()

In [52]:
model.fit(X_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(X_test, y_test))

Train on 2700 samples, validate on 300 samples
Epoch 1/40

KeyboardInterrupt: 

In [49]:
score = model.evaluate(X_train, y_train, verbose=1)
print()
print('Train loss: %f, train accuracy: %f' % (score[0], score[1]))

score = model.evaluate(X_test, y_test, verbose=1)
print()
print('Test loss: %f, test accuracy: %f' % (score[0], score[1]))

SyntaxError: invalid syntax (<ipython-input-49-e6c87dafdf4a>, line 7)

In [53]:
import gc