In [57]:
import numpy as np
from scipy.io import wavfile
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
from scipy.signal import decimate
from sklearn.model_selection import train_test_split

In [58]:
from keras.models import Sequential
from keras.layers import Conv1D, MaxPool1D, GlobalAvgPool1D, Dropout, BatchNormalization, Dense
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping
from keras.utils import np_utils
from keras.regularizers import l2

In [59]:
SAMPLE_RATE = 44100

In [61]:
def clean_filename(fname, string):   
    file_name = fname.split('/')[1]
    if file_name[:2] == '__':        
        file_name = string + file_name
    return file_name

In [62]:
def load_wav_file(name, path):
    _, b = wavfile.read(path + name)
    assert _ == SAMPLE_RATE
    return b

In [64]:
def repeat_to_length(arr, length):
    """Repeats the numpy 1D array to given length, and makes datatype float"""
    result = np.empty((length, ), dtype = 'float32')
    l = len(arr)
    pos = 0
    while pos + l <= length:
        result[pos:pos+l] = arr
        pos += l
    if pos < length:
        result[pos:length] = arr[:length-pos]
    return result

In [66]:
df = pd.read_csv('set_a.csv')
df['fname'] = df['fname'].apply(clean_filename, string='Aunlabelledtest')
df['label'] = df['label'].fillna('unclassified')
df['time_series'] = df['fname'].apply(load_wav_file,path='set_a/')
df['len_series'] = df['time_series'].apply(len)
MAX_LEN = max(df['len_series'])
df['time_series'] = df['time_series'].apply(repeat_to_length, length=MAX_LEN)
df.head()

Unnamed: 0,dataset,fname,label,sublabel,time_series,len_series
0,a,artifact__201012172012.wav,artifact,,"[1.0, -3.0, -1.0, -7.0, -9.0, -2.0, -6.0, -5.0...",396900
1,a,artifact__201105040918.wav,artifact,,"[-2.0, 3.0, -4.0, 4.0, -3.0, 2.0, -1.0, 0.0, 0...",396900
2,a,artifact__201105041959.wav,artifact,,"[6.0, -4.0, -9.0, -1.0, -4.0, 1.0, -5.0, 2.0, ...",396900
3,a,artifact__201105051017.wav,artifact,,"[-85.0, -198.0, -214.0, -173.0, -177.0, -206.0...",396900
4,a,artifact__201105060108.wav,artifact,,"[53.0, -35.0, 47.0, 170.0, 340.0, 436.0, 535.0...",396900


In [67]:
x_data = np.stack(df['time_series'].values, axis=0)

In [68]:
new_labels =[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 1,
             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 1, 
             2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
             2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 
             1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 0, 2, 2, 1, 1, 1, 1, 1, 
             0, 1, 0, 1, 1, 1, 2, 1, 0, 1, 1, 1, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 
             1, 0, 0, 0, 0, 0, 0, 1, 0, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
new_labels = np.array(new_labels, dtype='int')
y_data = np_utils.to_categorical(new_labels)

In [69]:
x_train, x_test, y_train, y_test, train_filenames, test_filenames = \
    train_test_split(x_data, y_data, df['fname'].values, test_size=0.25)

In [70]:
x_train = decimate(x_train, 8, axis=1, zero_phase=True)
x_train = decimate(x_train, 8, axis=1, zero_phase=True)
x_train = decimate(x_train, 4, axis=1, zero_phase=True)
x_test = decimate(x_test, 8, axis=1, zero_phase=True)
x_test = decimate(x_test, 8, axis=1, zero_phase=True)
x_test = decimate(x_test, 4, axis=1, zero_phase=True)

  b = a[a_slice]
  return y[sl]


In [71]:
x_train = x_train / np.std(x_train, axis=1).reshape(-1,1)
x_test = x_test / np.std(x_test, axis=1).reshape(-1,1)

In [72]:
x_train = x_train[:,:,np.newaxis]
x_test = x_test[:,:,np.newaxis]

In [73]:
model = Sequential()
model.add(Conv1D(filters=4, kernel_size=9, activation='relu',
                input_shape = x_train.shape[1:],
                kernel_regularizer = l2(0.025)))
model.add(MaxPool1D(strides=4))
model.add(BatchNormalization())
model.add(Conv1D(filters=4, kernel_size=9, activation='relu',
                kernel_regularizer = l2(0.05)))
model.add(MaxPool1D(strides=4))
model.add(BatchNormalization())
model.add(Conv1D(filters=8, kernel_size=9, activation='relu',
                 kernel_regularizer = l2(0.1)))
model.add(MaxPool1D(strides=4))
model.add(BatchNormalization())
model.add(Conv1D(filters=16, kernel_size=9, activation='relu'))
model.add(MaxPool1D(strides=4))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Conv1D(filters=64, kernel_size=4, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Conv1D(filters=32, kernel_size=1, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.75))
model.add(GlobalAvgPool1D())
model.add(Dense(3, activation='softmax'))

In [74]:
def batch_generator(x_train, y_train, batch_size):
    """
    Rotates the time series randomly in time
    """
    x_batch = np.empty((batch_size, x_train.shape[1], x_train.shape[2]), dtype='float32')
    y_batch = np.empty((batch_size, y_train.shape[1]), dtype='float32')
    full_idx = range(x_train.shape[0])
    
    while True:
        batch_idx = np.random.choice(full_idx, batch_size)
        x_batch = x_train[batch_idx]
        y_batch = y_train[batch_idx]
    
        for i in range(batch_size):
            sz = np.random.randint(x_batch.shape[1])
            x_batch[i] = np.roll(x_batch[i], sz, axis = 0)
     
        yield x_batch, y_batch

In [75]:
weight_saver = ModelCheckpoint('set_a_weights.h5', monitor='val_loss', 
                               save_best_only=True, save_weights_only=True)

In [76]:
model.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.8**x)

In [77]:
hist = model.fit_generator(batch_generator(x_train, y_train, 8),
                   epochs=25, steps_per_epoch=1000,
                   validation_data=(x_test, y_test),
                   callbacks=[weight_saver, annealer],
                   verbose=2)

Epoch 1/25
 - 43s - loss: 1.8950 - acc: 0.4661 - val_loss: 1.1237 - val_acc: 0.7955
Epoch 2/25
 - 12s - loss: 1.2013 - acc: 0.6255 - val_loss: 0.7994 - val_acc: 0.9318
Epoch 3/25
 - 11s - loss: 0.9721 - acc: 0.6761 - val_loss: 0.6064 - val_acc: 0.9318
Epoch 4/25
 - 11s - loss: 0.8263 - acc: 0.7157 - val_loss: 0.4632 - val_acc: 0.9545
Epoch 5/25
 - 11s - loss: 0.7595 - acc: 0.7369 - val_loss: 0.4191 - val_acc: 0.9318
Epoch 6/25
 - 11s - loss: 0.6988 - acc: 0.7532 - val_loss: 0.4330 - val_acc: 0.9091
Epoch 7/25
 - 11s - loss: 0.6608 - acc: 0.7720 - val_loss: 0.4120 - val_acc: 0.9318
Epoch 8/25
 - 11s - loss: 0.6304 - acc: 0.7906 - val_loss: 0.3837 - val_acc: 0.9091
Epoch 9/25
 - 11s - loss: 0.5920 - acc: 0.8091 - val_loss: 0.3494 - val_acc: 0.9318
Epoch 10/25
 - 11s - loss: 0.5727 - acc: 0.8157 - val_loss: 0.3584 - val_acc: 0.9318
Epoch 11/25
 - 12s - loss: 0.5440 - acc: 0.8275 - val_loss: 0.3165 - val_acc: 0.9545
Epoch 12/25
 - 12s - loss: 0.5555 - acc: 0.8183 - val_loss: 0.3153 - val_a

In [78]:
y_hat = model.predict(x_test)
y_pred = np.argmax(y_hat, axis=1)
y_pred

array([0, 2, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 2, 2, 1, 0, 2, 2,
       2, 0, 2, 0, 0, 1, 0, 2, 1, 1, 1, 2, 1, 0, 1, 0, 2, 0, 0, 1, 0, 0],
      dtype=int64)

In [79]:
_,b = wavfile.read('microphone-results.wav')
SAMPLE_RATE = 44100
assert _ == SAMPLE_RATE

In [80]:
b=b[:396900]

In [81]:
length = len(b)
value = repeat_to_length(b,length)
value = np.array([value])
value.shape

(1, 396900)

In [82]:
value = decimate(value, 8, axis=1, zero_phase=True)
value = decimate(value, 8, axis=1, zero_phase=True)
value = decimate(value, 4, axis=1, zero_phase=True)

  b = a[a_slice]
  return y[sl]


In [83]:
value.shape

(1, 1551)

In [84]:
value = value / np.std(value, axis=1).reshape(-1,1)
value = value[:,:,np.newaxis]

In [85]:
y_new = model.predict(value)
new_pred = np.argmax(y_new,axis =1)
new_pred

array([0], dtype=int64)