In [4]:
# File Directory 
import glob
import os
from os.path import isdir, join
from pathlib import Path

# Math
import numpy as np
from scipy.fftpack import fft
from scipy import signal
import librosa

# Dimension Reduction
from sklearn.decomposition import PCA

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import IPython.display as ipd
import librosa.display

# Data Pre-processing
import pandas as pd
from sklearn.model_selection import KFold

# Deep Learning
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import Input, layers, optimizers, backend as K
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import cv2

# Configuration
#py.init_notebook_mode(connected=True)

%matplotlib inline

In [None]:
samples=[]
labels = []
sample_slice_iteration = 0

gunshot_sound_dir = "C:\\Users\\hosle\\Documents\\_REU2019\\gunshot\\"

for file in os.listdir(gunshot_sound_dir):
    if file.endswith(".wav"):
        sample, sample_rate = librosa.load(gunshot_sound_dir + file)
        if (sample.size <= 44100):
            sample_slice = np.zeros(44100)
            sample_slice[0:sample.size] = sample
            label = 1
            sample_slice_iteration += 1
            if np.max(abs(sample_slice)) < 0.25:
                label = 0

            samples.append(sample_slice)
            labels.append(label)
            
        for i in range(0, sample.size - 44100, 44100):
            sample_slice = sample[i : i + 44100]
            label = 1
            sample_slice_iteration += 1
            if np.max(abs(sample_slice)) < 0.25:
                label = 0

            samples.append(sample_slice)
            labels.append(label)
        
glassbreak_sound_dir = "C:\\Users\\hosle\\Documents\\_REU2019\\glassbreak\\"

print("...Switching to glassbreak sounds...")

for file in os.listdir(glassbreak_sound_dir):
    if file.endswith(".wav"):
        sample, sample_rate = librosa.load(glassbreak_sound_dir + file)
        if (sample.size <= 44100):
            sample_slice = np.zeros(44100)
            sample_slice[0:sample.size] = sample
            label = 0
            sample_slice_iteration += 1
            if np.max(abs(sample_slice)) < 0.5:
                label = 0

            samples.append(sample_slice)
            labels.append(label)
            
        for i in range(0, sample.size - 44100, 44100):
            sample_slice = sample[i : i + 44100]
            label = 0
            sample_slice_iteration += 1
            if np.max(abs(sample_slice)) < 0.5:
                label = 0

            samples.append(sample_slice)
            labels.append(label)

In [None]:
np.save("samples.npy", samples)
np.save("labels.npy", labels)

In [None]:
mypath = "C:\\Users\\hosle\\Documents\\_REU2019\\_project\\gunshot_detection\\Train\\"
dr = os.listdir(mypath)
for i in range(len(dr)):
    dr[i] = int(dr[i][:-4])
dr = np.sort(dr)
files = np.zeros(len(dr)).astype('str')
for i in range(len(dr)):
    files[i] = str(dr[i]) + '.wav'
files

In [None]:
#read in the csv file of descriptors for all other urban sounds
sound_types = pd.read_csv("C:\\Users\\hosle\\Documents\\_REU2019\\_project\\gunshot_detection\\train.csv")
print(sound_types.loc[0,'Class'])

j=0
count = 0
#read in all of the wav files similar to above
urban_sound_dir = "C:\\Users\\hosle\\Documents\\_REU2019\\_project\\gunshot_detection\\Train\\"

for file in files:
    if file.endswith(".wav"):
        count += 1
        sample, sample_rate = librosa.load(urban_sound_dir + file)
        if (count % 100 == 0):
            print (count)
        if (sample.size <= 44100):
            sample_slice = np.zeros(44100)
            sample_slice[0:sample.size] = sample
            if(sound_types.loc[j, 'Class'] == "gun_shot"):
                label = 1
            else:
                label = 0
            sample_slice_iteration += 1
            if np.max(abs(sample_slice)) < 0.25:
                label = 0

            samples.append(sample_slice)
            labels.append(label)


        for i in range(0, sample.size - 44100, 44100):
            sample_slice = sample[i : i + 44100]
            if(sound_types.loc[j, 'Class'] == "gun_shot"):
                label = 1
            else:
                label = 0
            sample_slice_iteration += 1
            if np.max(abs(sample_slice)) < 0.25:
                label = 0

            samples.append(sample_slice)
            labels.append(label)
        j +=1


## Fireworks

In [None]:
samples = list(np.load("samples.npy"))
labels = list(np.load("labels.npy"))

In [None]:
sample_slice_iteration = 0

sound_dir = "C:\\Users\\hosle\\Documents\\_REU2019\\_project\\gunshot_detection\\fireworks\\"

for file in os.listdir(sound_dir):
    if file.endswith(".wav"):
        sample, sample_rate = librosa.load(sound_dir + file)
        if (sample.size <= 44100):
            sample_slice = np.zeros(44100)
            sample_slice[0:sample.size] = sample
            label = 0
            sample_slice_iteration += 1
            if np.max(abs(sample_slice)) < 0.25:
                label = 0

            samples.append(sample_slice)
            labels.append(label)
            
        for i in range(0, sample.size - 44100, 44100):
            sample_slice = sample[i : i + 44100]
            label = 0
            sample_slice_iteration += 1
            if np.max(abs(sample_slice)) < 0.25:
                label = 0

            samples.append(sample_slice)
            labels.append(label)

In [None]:
print(len(samples))
i=450
samp=samples[i]
sr=2050
print(np.max(abs(samp)))
print(labels[i])
ipd.Audio(samp, rate=sr)

In [None]:
sum(labels)

In [None]:
np.save("samples.npy", samples)
np.save("labels.npy", labels)

In [None]:
samples = np.load("samples.npy")
labels = np.load("labels.npy")

## Augment data

In [None]:
def time_shift(wav):
    start_ = int(np.random.uniform(-4800,4800))
    if start_ >= 0:
        wav_time_shift = np.r_[wav[start_:], np.random.uniform(-0.001,0.001, start_)]
    else:
        wav_time_shift = np.r_[np.random.uniform(-0.001,0.001, -start_), wav[:start_]]
    return wav_time_shift

def speed_change(wav):
    speed_rate = np.random.uniform(0.7,1.3)
    wav_speed_tune = cv2.resize(wav, (1, int(len(wav) * speed_rate))).squeeze()
    #print('speed rate: %.3f' % speed_rate, '(lower is faster)')
    if len(wav_speed_tune) < len(wav):
        pad_len = len(wav) - len(wav_speed_tune)
        wav_speed_tune = np.r_[np.random.uniform(-0.001,0.001,int(pad_len/2)),
                               wav_speed_tune,
                               np.random.uniform(-0.001,0.001,int(np.ceil(pad_len/2)))]
    else: 
        cut_len = len(wav_speed_tune) - len(wav)
        wav_speed_tune = wav_speed_tune[int(cut_len/2):int(cut_len/2)+len(wav)]
    return wav_speed_tune

In [None]:
samples.shape[0]

In [None]:
aug_labels = np.zeros((labels.shape[0]*3,))
aug_labels[1]

In [None]:
aug_samples = np.zeros((samples.shape[0]*3, samples.shape[1]))
aug_labels = np.zeros((labels.shape[0]*3,))
j = 0
for i in range (0, len(aug_samples), 3):
    aug_samples[i,:] = samples[j,:]
    aug_samples[i+1,:] = time_shift(samples[j,:])
    aug_samples[i+2,:] = speed_change(samples[j,:])
    
    aug_labels[i] = labels[j]
    aug_labels[i+1] = labels[j]
    aug_labels[i+2] = labels[j]
    j += 1

In [None]:
np.save("aug_samples.npy", aug_samples)
np.save("aug_labels.npy", aug_labels)

## Model

In [5]:
from sklearn.preprocessing import LabelBinarizer
samples = np.load("samples.npy")
labels = np.load("labels.npy")

In [6]:
l = []
for i in range(len(labels)):
    if labels[i] == 1:
        l.append('gunshot')
    else:
        l.append('other')

In [7]:
lb = LabelBinarizer()
labels = lb.fit_transform(l)

In [8]:
labels = np.hstack((labels, 1 - labels))
labels

array([[0, 1],
       [0, 1],
       [0, 1],
       ...,
       [1, 0],
       [1, 0],
       [1, 0]])

In [9]:
kf = KFold(n_splits=3, shuffle=True)
'''samples = aug_samples #np.array(samples)
labels = aug_labels #np.array(labels)'''

#labels = keras.utils.to_categorical(labels, 2)
for train_index, test_index in kf.split(samples):
    print("TRAIN:", train_index, "TEST:", test_index)
    train_wav, test_wav = samples[train_index], samples[test_index]
    train_label, test_label = labels[train_index], labels[test_index]

TRAIN: [   0    1    2 ... 6374 6375 6377] TEST: [   5    6    8 ... 6366 6373 6376]
TRAIN: [   1    4    5 ... 6375 6376 6377] TEST: [   0    2    3 ... 6370 6371 6372]
TRAIN: [   0    2    3 ... 6372 6373 6376] TEST: [   1    4   10 ... 6374 6375 6377]


In [10]:
def show(data):
    plt.figure(figsize=(12, 4))
    librosa.display.waveplot(data, sr = 16000)

In [11]:
# Parameters
lr = 0.001
generations = 20000
num_gens_to_wait = 250
batch_size = 32
drop_out_rate = 0.2
input_shape = (44100,1)

In [12]:
#For Conv1D add Channel
train_wav = np.array(train_wav)
test_wav = np.array(test_wav)
train_wav = train_wav.reshape(-1,44100,1)
test_wav = test_wav.reshape(-1,44100,1)
#train_label = keras.utils.to_categorical(train_label, 2)
#test_label = keras.utils.to_categorical(test_label, 2)

### ROC AUC metric used.

In [13]:
def auc(y_true, y_pred):
    auc = tf.metrics.auc(y_true, y_pred)[1]
    K.get_session().run(tf.local_variables_initializer())
    return auc

In [14]:
input_tensor = Input(shape=input_shape)
nclass = 2

x = layers.Convolution1D(16, 9, activation="relu", padding="same")(input_tensor)
x = layers.Convolution1D(16, 9, activation="relu", padding="same")(x)
x = layers.MaxPool1D(16)(x)
x = layers.Dropout(rate=0.1)(x)

x = layers.Convolution1D(32, 3, activation="relu", padding="same")(x)
x = layers.Convolution1D(32, 3, activation="relu", padding="same")(x)
x = layers.MaxPool1D(8)(x)
x = layers.Dropout(rate=0.1)(x)

x = layers.Convolution1D(128, 3, activation="relu", padding="same")(x)
x = layers.Convolution1D(128, 3, activation="relu", padding="same")(x)
x = layers.GlobalMaxPool1D()(x)
x = layers.Dropout(rate=0.2)(x)

x = layers.Dense(64, activation="relu")(x)
x = layers.Dense(1028, activation="relu")(x)
output_tensor = layers.Dense(nclass, activation="softmax")(x)

model = tf.keras.Model(inputs=input_tensor, outputs=output_tensor)
opt = optimizers.Adam(0.001, 0.001 / 100)

model.compile(optimizer=opt, loss=keras.losses.binary_crossentropy, metrics=[auc, 'acc'])

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


In [15]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 44100, 1)          0         
_________________________________________________________________
conv1d (Conv1D)              (None, 44100, 16)         160       
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 44100, 16)         2320      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 2756, 16)          0         
_________________________________________________________________
dropout (Dropout)            (None, 2756, 16)          0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 2756, 32)          1568      
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 2756, 32)          3104      
__________

In [16]:
model_filename = '1Dcnngunglass.pkl' 
callbacks = [
    EarlyStopping(monitor='val_acc',
                  patience=10,
                  verbose=1,
                  mode='max'),
    
    ModelCheckpoint(model_filename, monitor='val_acc',
                    verbose=1,
                    save_best_only=True,
                    mode='max'),
]

In [17]:
model.fit(train_wav, train_label, 
          validation_data=[test_wav, test_label],
          batch_size=batch_size,
          callbacks = callbacks,
          epochs=100,
          verbose=1)

Train on 4252 samples, validate on 2126 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/100
Epoch 00001: val_acc improved from -inf to 0.96566, saving model to 1Dcnngunglass.pkl
Epoch 2/100
Epoch 00002: val_acc did not improve from 0.96566
Epoch 3/100
Epoch 00003: val_acc improved from 0.96566 to 0.96849, saving model to 1Dcnngunglass.pkl
Epoch 4/100
Epoch 00004: val_acc did not improve from 0.96849
Epoch 5/100
Epoch 00005: val_acc improved from 0.96849 to 0.97742, saving model to 1Dcnngunglass.pkl
Epoch 6/100
Epoch 00006: val_acc improved from 0.97742 to 0.98354, saving model to 1Dcnngunglass.pkl
Epoch 7/100
Epoch 00007: val_acc did not improve from 0.98354
Epoch 8/100
Epoch 00008: val_acc did not improve from 0.98354
Epoch 9/100
Epoch 00009: val_acc improved from 0.98354 to 0.98495, saving model to 1Dcnngunglass.pkl
Epoch 10/100
Epoch 00010: val_acc did not improve from 0.98495
Epoch 11/100
Epoch 00011: val_acc did not improve from 0.98495
Epoch 12/100
Epoch 00012: va

Epoch 25/100
Epoch 00025: val_acc did not improve from 0.98918
Epoch 26/100
Epoch 00026: val_acc did not improve from 0.98918
Epoch 27/100
Epoch 00027: val_acc did not improve from 0.98918
Epoch 28/100
Epoch 00028: val_acc did not improve from 0.98918
Epoch 29/100
Epoch 00029: val_acc did not improve from 0.98918
Epoch 30/100
Epoch 00030: val_acc did not improve from 0.98918
Epoch 31/100
Epoch 00031: val_acc did not improve from 0.98918
Epoch 32/100
Epoch 00032: val_acc did not improve from 0.98918
Epoch 00032: early stopping


<tensorflow.python.keras.callbacks.History at 0x2ba42b467f0>

### Validation

In [18]:
model.load_weights("1Dcnngunglass.pkl")

In [19]:
model.save("model.h5")

In [None]:
mypath = "C:\\Users\\hosle\\Downloads\\Metal Bang-SoundBible.com-672025076.wav"
#mypath = "C:\\Users\\hosle\\Documents\\_REU2019\\extra\\320134.wav"
#mypath = "C:\\Users\\hosle\\Documents\\_REU2019\\_project\\gunshot_detection\\fireworks\\56608__syna-max__fireworks-well-i-guess-you-missed-it.wav"

fire, sr = librosa.load(mypath)

In [None]:
show(fire)

In [None]:
validation = []
if (fire.size <= 44100):
    sample_slice = np.zeros(44100)
    sample_slice[0:fire.size] = fire
    validation.append(sample_slice)

for i in range(0, fire.size - 44100, 44100):
    sample_slice = fire[i : i + 44100]
    validation.append(sample_slice)
    
validation = np.array(validation)
validation_1 = validation.reshape(-1,44100,1)

In [None]:
val_pred = model.predict(validation_1)

In [None]:
val_pred1 = lb.inverse_transform(val_pred[:, 0])
print(len(val_pred1))

In [None]:
np.argwhere(val_pred1 == 'gunshot')

In [None]:
#[0, 1] = gunshot
i = 0
print(val_pred[i, :])
print(val_pred1[i])
show(validation [i])
ipd.Audio(validation [i], rate=22050)

## Tflite converter

In [20]:
model_name = "gunshot_sound_model.h5"

In [22]:
converter = tf.contrib.lite.TFLiteConverter.from_keras_model_file(model_name, allow_custom_ops = True)# custom_objects={'auc': auc})
print("\n made the converter using from_keras_model \n")

TypeError: from_keras_model_file() got an unexpected keyword argument 'allow_custom_ops'