In [2]:
%load_ext autoreload
%autoreload 2
#tensorboard doesn't work with keras model callbacks! needs tensorflow.keras
from preprocess import *
from tensorflow import keras
from tensorflow.keras.models import Sequential
#from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.layers import (Dense, Dropout, Flatten, Convolution2D, GlobalAveragePooling2D, BatchNormalization, Flatten,
                          GlobalMaxPool2D, MaxPool2D, concatenate, Activation, Input)
from tensorflow.keras import losses, models, optimizers
from tensorflow.keras.activations import relu, softmax
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import TensorBoard
import time  
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [3]:
#get_labels returns the label names, their indices and 1 hot encodings
labels = get_labels('./data')  
num_classes = len(labels[0])
labels

(['shh', 'silence', 'ttt'], array([0, 1, 2]), array([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]], dtype=float32))

In [3]:
#length of longest wav data
#all data will be padded to this length
max_sample_length = max_sample_len()
print(max_sample_length)

16384


In [4]:
# Save data to array file first
n_mfcc = 40
save_data_to_array(n_mfcc=n_mfcc)

Saving vectors of label - 'shh': 100%|██████████| 260/260 [00:01<00:00, 244.78it/s]
Saving vectors of label - 'silence': 100%|██████████| 214/214 [00:00<00:00, 214.73it/s]
Saving vectors of label - 'ttt': 100%|██████████| 229/229 [00:00<00:00, 236.96it/s]


In [5]:
# # Loading train set and test set
X_train, X_test, y_train, y_test = get_train_test()

In [6]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(421, 40, 33)
(421,)
(282, 40, 33)
(282,)


In [7]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

y_train_hot = to_categorical(y_train)
y_test_hot = to_categorical(y_test)

In [8]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
print(y_train_hot.shape)#2nd dim is the number of labels

(421, 40, 33, 1)
(421,)
(282, 40, 33, 1)
(282,)
(421, 3)


In [9]:
#kaggle standardizes data, TODO try normalizing like in the 1dconv
mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)

X_train = (X_train - mean)/std
X_test = (X_test - mean)/std

In [10]:
#modified kaggle tut, https://www.kaggle.com/fizzbuzz/beginner-s-guide-to-audio-data
def get_2d_conv_model(training_shape):
    
    inp = Input(shape=(training_shape[1], training_shape[2], 1))
    x = Convolution2D(16, (4,10), padding="same")(inp)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = MaxPool2D()(x)

    
    x = Flatten()(x)
    x = Dense(64)(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    out = Dense(num_classes, activation=softmax)(x)

    model = models.Model(inputs=inp, outputs=out)
    opt = optimizers.Adam(0.001)#learning rate

    model.compile(optimizer=opt, loss=losses.categorical_crossentropy, metrics=['acc'])
    return model

# Building The Model Then Training it

In [11]:
NAME = "mfccmodel{}".format(int(time.time()))
tensorboard_cb = TensorBoard(log_dir="logs/{}".format(NAME), histogram_freq=1)
%load_ext tensorboard
tensorboard_cb

<tensorflow.python.keras.callbacks.TensorBoard at 0x7ff4d4c7e390>

In [15]:
model = get_2d_conv_model(X_train.shape)
model.fit(X_train, y_train_hot, batch_size=100, epochs=8, verbose=True, validation_data=(X_test, y_test_hot), callbacks=[tensorboard_cb])

Train on 421 samples, validate on 282 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<tensorflow.python.keras.callbacks.History at 0x7ff4b0616320>

In [16]:
#save the model and stats if you want to use it from the udp listener
save_model(model, NAME)
np.save("stats/{}.npy".format(NAME), [std, mean]) 
NAME

'mfccmodel1584667400'

In [19]:
%tensorboard --logdir logs/ --bind_all

In [17]:
def prep_data(filepath, shape):
    sample = wav2mfcc(filepath, n_mfcc=n_mfcc, max_sample_length=max_sample_length)
    sample = sample.reshape(1, shape[1], shape[2], 1)
    sample = (sample - mean)/std
    return sample

# Predicts one sample
def predict(filepath, model):
    sample_reshaped = prep_data(filepath, model.input_shape)
    return get_labels()[0][
            np.argmax(model.predict(sample_reshaped))
    ]

print(predict('./data/shh/W9M3W3YR15.wav', model=model))

shh
