In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

import librosa
import os
import glob
import numpy as np
import sklearn
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv1D, MaxPooling1D
from keras.utils import np_utils
from keras import optimizers

Using TensorFlow backend.


In [5]:
# import gunshot/glassbreak data and visualize first image
gunshot_sound_data = []
glassbreak_sound_data = []
sound_labels = []
sampling_rate_per_second = 22050

gunshot_sound_dir = "/home/alexm/Datasets/gunshot_data/gunshot/"

for file in os.listdir(gunshot_sound_dir):
    if file.endswith(".wav"):
        y, sr = librosa.load(gunshot_sound_dir + file)
        print(y)
        gunshot_sound_data.append(y)
        sound_labels.append(1)

glassbreak_sound_dir = "/home/alexm/Datasets/gunshot_data/glassbreak/"        

for file in os.listdir(glassbreak_sound_dir):
    if file.endswith(".wav"):
        y, sr = librosa.load(glassbreak_sound_dir + file)
        glassbreak_sound_data.append(y)
        sound_labels.append(0)

[0. 0. 0. ... 0. 0. 0.]
[-1.7242247e-04 -2.2673715e-04 -1.7907241e-04 ...  1.9452986e-05
  2.3644574e-05  0.0000000e+00]
[ 0.00639941 -0.00803491  0.01059294 ...  0.00013107  0.00021444
  0.        ]
[0.00255712 0.00383692 0.00292432 ... 0.         0.         0.        ]
[-4.2938183e-05 -1.1663888e-04 -1.3880288e-04 ... -4.7966841e-04
 -4.5381891e-04 -4.8970233e-04]
[-1.1114657e-05 -1.6231717e-05 -1.5209395e-05 ... -7.1362874e-06
 -8.5958254e-06  0.0000000e+00]
[-2.3058593e-02 -6.5194353e-02  8.5631050e-02 ... -1.5320746e-05
  5.8599303e-06  0.0000000e+00]
[-0.0688955  -0.06834549 -0.0140107  ...  0.          0.
  0.        ]
[-2.9782531e-01 -4.8585540e-01 -4.0610278e-01 ... -1.2424431e-05
  1.6850068e-05  0.0000000e+00]
[-0.73689854 -1.0800318  -0.9519966  ... -1.0097102  -0.9775502
 -1.0539925 ]
[-0.00033946 -0.0001705   0.00014908 ...  0.00087216  0.0006108
  0.        ]
[0.0026418  0.01129374 0.0194991  ... 0.00040548 0.00025034 0.        ]
[-9.9921290e-06 -6.8405566e-06 -7.1044036

KeyboardInterrupt: 

In [3]:
# combine, reshape, & split data
combined_sound_data = gunshot_sound_data + glassbreak_sound_data
combined_sound_data_reshaped = np.zeros((126, sampling_rate_per_second))

for i in range(len(combined_sound_data)):
    if len(combined_sound_data[i]) < sampling_rate_per_second:
        number_of_missing_hertz = sampling_rate_per_second - len(combined_sound_data[i])
        padded_sound_data = combined_sound_data[i].tolist() + [0 for i in range(number_of_missing_hertz)]
        combined_sound_data_reshaped[i] = padded_sound_data
    else:
        combined_sound_data_reshaped[i] = combined_sound_data[i]      

X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(combined_sound_data_reshaped, sound_labels)

In [4]:
# reshape data
X_train = X_train.reshape(-1, sampling_rate_per_second, 1)
X_test = X_test.reshape(-1, sampling_rate_per_second, 1)

# make output a 2 dim vector indicating class
Y_train = np_utils.to_categorical(y_train, 2)
Y_test = np_utils.to_categorical(y_test, 2)

In [5]:
# create convolution neural network
model = Sequential()

# Kernels here are the filters. They need to scale with the sound data.
model.add(Conv1D(32, kernel_size=9,
                 activation='relu',
                 input_shape=(22050, 1)))

# Downsampling sound data.
model.add(MaxPooling1D(pool_size=4))

# Dropouts prevents overfitting.
model.add(Dropout(0.25))

# Another layer.
model.add(Conv1D(64, kernel_size=9,
                 activation='relu'))
model.add(MaxPooling1D(pool_size=4))
model.add(Dropout(0.25))

# Flatten transforms the convolution data into data a dense layer can receive.
model.add(Flatten())

# Dense means making a feed-forward NN from a 500 node graph.
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [6]:
# compile model
sgd=optimizers.SGD(lr=.01) 
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 22042, 32)         320       
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 5510, 32)          0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 5510, 32)          0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 5502, 64)          18496     
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 1375, 64)          0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 1375, 64)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 88000)             0         
__________

In [7]:
model.fit(X_train, Y_train,
          batch_size=500, epochs=10,verbose=1,
          validation_data=(X_test, Y_test))

Instructions for updating:
Use tf.cast instead.
Train on 94 samples, validate on 32 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fac8009d630>