In [31]:
# File Directory 
import glob
import os
from os.path import isdir, join
from pathlib import Path

# Math
import numpy as np
from scipy.fftpack import fft
from scipy import signal
import librosa

# Dimension Reduction
from sklearn.decomposition import PCA

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import IPython.display as ipd
import librosa.display
'''import plotly.offline as py
import plotly.graph_objs as go
import plotly.tools as tls'''

# Data Pre-processing
import pandas as pd
from sklearn.model_selection import KFold
import soundfile

# Deep Learning
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras import Input, layers
from tensorflow.keras import backend as K
import cv2

# Configuration
#py.init_notebook_mode(connected=True)

from keras.activations import relu, softmax
from keras.layers import (Convolution1D, Dense, Dropout, GlobalAveragePooling1D, 
                          GlobalMaxPool1D, Input, MaxPool1D, concatenate)
from keras import losses, models, optimizers
from keras.callbacks import (EarlyStopping, LearningRateScheduler,
                             ModelCheckpoint, TensorBoard, ReduceLROnPlateau)

%matplotlib inline

In [4]:
samples=[]
labels = []
sample_slice_iteration = 0

gunshot_sound_dir = "C:\\Users\\hosle\\Documents\\_REU2019\\gunshot\\"

for file in os.listdir(gunshot_sound_dir):
    if file.endswith(".wav"):
        sample, sample_rate = librosa.load(gunshot_sound_dir + file)
        if (sample.size <= 44100):
            sample_slice = np.zeros(44100)
            sample_slice[0:sample.size] = sample
            label = 1
            sample_slice_iteration += 1
            if np.max(abs(sample_slice)) < 0.25:
                label = 0

            samples.append(sample_slice)
            labels.append(label)
            
        for i in range(0, sample.size - 44100, 44100):
            sample_slice = sample[i : i + 44100]
            label = 1
            sample_slice_iteration += 1
            if np.max(abs(sample_slice)) < 0.25:
                label = 0

            samples.append(sample_slice)
            labels.append(label)
        
glassbreak_sound_dir = "C:\\Users\\hosle\\Documents\\_REU2019\\glassbreak\\"

print("...Switching to glassbreak sounds...")

for file in os.listdir(glassbreak_sound_dir):
    if file.endswith(".wav"):
        sample, sample_rate = librosa.load(glassbreak_sound_dir + file)
        if (sample.size <= 44100):
            sample_slice = np.zeros(44100)
            sample_slice[0:sample.size] = sample
            label = 0
            sample_slice_iteration += 1
            if np.max(abs(sample_slice)) < 0.5:
                label = 0

            samples.append(sample_slice)
            labels.append(label)
            
        for i in range(0, sample.size - 44100, 44100):
            sample_slice = sample[i : i + 44100]
            label = 0
            sample_slice_iteration += 1
            if np.max(abs(sample_slice)) < 0.5:
                label = 0

            samples.append(sample_slice)
            labels.append(label)

...Switching to glassbreak sounds...


In [5]:
np.save("samples.npy", samples)
np.save("labels.npy", labels)

In [6]:
mypath = "C:\\Users\\hosle\\Documents\\_REU2019\\_project\\gunshot_detection\\Train\\"
dr = os.listdir(mypath)
for i in range(len(dr)):
    dr[i] = int(dr[i][:-4])
dr = np.sort(dr)
files = np.zeros(len(dr)).astype('str')
for i in range(len(dr)):
    files[i] = str(dr[i]) + '.wav'
files

array(['0.wav', '1.wav', '2.wav', ..., '8727.wav', '8728.wav', '8729.wav'],
      dtype='<U32')

In [7]:
#read in the csv file of descriptors for all other urban sounds
sound_types = pd.read_csv("C:\\Users\\hosle\\Documents\\_REU2019\\_project\\gunshot_detection\\train.csv")
print(sound_types.loc[0,'Class'])

j=0
count = 0
#read in all of the wav files similar to above
urban_sound_dir = "C:\\Users\\hosle\\Documents\\_REU2019\\_project\\gunshot_detection\\Train\\"

for file in files:
    if file.endswith(".wav"):
        count += 1
        sample, sample_rate = librosa.load(urban_sound_dir + file)
        if (count % 100 == 0):
            print (count)
        if (sample.size <= 44100):
            sample_slice = np.zeros(44100)
            sample_slice[0:sample.size] = sample
            if(sound_types.loc[j, 'Class'] == "gun_shot"):
                label = 1
            else:
                label = 0
            sample_slice_iteration += 1
            if np.max(abs(sample_slice)) < 0.25:
                label = 0

            samples.append(sample_slice)
            labels.append(label)


        for i in range(0, sample.size - 44100, 44100):
            sample_slice = sample[i : i + 44100]
            if(sound_types.loc[j, 'Class'] == "gun_shot"):
                label = 1
            else:
                label = 0
            sample_slice_iteration += 1
            if np.max(abs(sample_slice)) < 0.25:
                label = 0

            samples.append(sample_slice)
            labels.append(label)
        j +=1


siren
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400


In [9]:
print(len(samples))
i=450
samp=samples[i]
sr=2050
print(np.max(abs(samp)))
print(labels[i])
ipd.Audio(samp, rate=sr)

6112
0.2860078
0


In [10]:
sum(labels)

552

In [11]:
np.save("samples.npy", samples)
np.save("labels.npy", labels)

In [2]:
samples = np.load("samples.npy")
labels = np.load("labels.npy")

## Augment data

In [34]:
def time_shift(wav):
    start_ = int(np.random.uniform(-4800,4800))
    if start_ >= 0:
        wav_time_shift = np.r_[wav[start_:], np.random.uniform(-0.001,0.001, start_)]
    else:
        wav_time_shift = np.r_[np.random.uniform(-0.001,0.001, -start_), wav[:start_]]
    return wav_time_shift

def speed_change(wav):
    speed_rate = np.random.uniform(0.7,1.3)
    wav_speed_tune = cv2.resize(wav, (1, int(len(wav) * speed_rate))).squeeze()
    #print('speed rate: %.3f' % speed_rate, '(lower is faster)')
    if len(wav_speed_tune) < len(wav):
        pad_len = len(wav) - len(wav_speed_tune)
        wav_speed_tune = np.r_[np.random.uniform(-0.001,0.001,int(pad_len/2)),
                               wav_speed_tune,
                               np.random.uniform(-0.001,0.001,int(np.ceil(pad_len/2)))]
    else: 
        cut_len = len(wav_speed_tune) - len(wav)
        wav_speed_tune = wav_speed_tune[int(cut_len/2):int(cut_len/2)+len(wav)]
    return wav_speed_tune

In [46]:
samples.shape[0]

6112

In [56]:
aug_labels = np.zeros((labels.shape[0]*3,))
aug_labels[1]

0.0

In [58]:
aug_samples = np.zeros((samples.shape[0]*3, samples.shape[1]))
aug_labels = np.zeros((labels.shape[0]*3,))
j = 0
for i in range (0, len(aug_samples), 3):
    aug_samples[i,:] = samples[j,:]
    aug_samples[i+1,:] = samples[j,:]
    aug_samples[i+2,:] = samples[j,:]
    
    aug_labels[i] = labels[j]
    aug_labels[i+1] = labels[j]
    aug_labels[i+2] = labels[j]
    j += 1

In [59]:
np.save("aug_samples.npy", aug_samples)
np.save("aug_labels.npy", aug_labels)

In [60]:
kf = KFold(n_splits=3, shuffle=True)
samples = aug_samples #np.array(samples)
labels = aug_labels #np.array(labels)
for train_index, test_index in kf.split(samples):
    print("TRAIN:", train_index, "TEST:", test_index)
    train_wav, test_wav = samples[train_index], samples[test_index]
    train_label, test_label = labels[train_index], labels[test_index]

TRAIN: [    0     2     4 ... 18332 18333 18335] TEST: [    1     3     5 ... 18329 18331 18334]
TRAIN: [    0     1     3 ... 18331 18334 18335] TEST: [    2     4     8 ... 18330 18332 18333]
TRAIN: [    1     2     3 ... 18332 18333 18334] TEST: [    0     6     7 ... 18324 18325 18335]


In [61]:
# Parameters
lr = 0.001
generations = 20000
num_gens_to_wait = 250
batch_size = 32
drop_out_rate = 0.2
input_shape = (44100,1)

In [62]:
#For Conv1D add Channel
train_wav = np.array(train_wav)
test_wav = np.array(test_wav)
train_wav = train_wav.reshape(-1,44100,1)
test_wav = test_wav.reshape(-1,44100,1)
train_label = keras.utils.to_categorical(train_label, 2)
test_label = keras.utils.to_categorical(test_label, 2)

In [63]:
print(train_wav.shape)

(12224, 44100, 1)


In [64]:
input_tensor = Input(shape=input_shape)
nclass = 2

x = Convolution1D(16, 9, activation=relu, padding="same")(input_tensor)
x = Convolution1D(16, 9, activation=relu, padding="same")(x)
x = MaxPool1D(16)(x)
x = Dropout(rate=0.1)(x)

x = Convolution1D(32, 3, activation=relu, padding="same")(x)
x = Convolution1D(32, 3, activation=relu, padding="same")(x)
x = MaxPool1D(4)(x)
x = Dropout(rate=0.1)(x)

x = Convolution1D(32, 3, activation=relu, padding="same")(x)
x = Convolution1D(32, 3, activation=relu, padding="same")(x)
x = MaxPool1D(4)(x)
x = Dropout(rate=0.1)(x)

x = Convolution1D(256, 3, activation=relu, padding="same")(x)
x = Convolution1D(256, 3, activation=relu, padding="same")(x)
x = GlobalMaxPool1D()(x)
x = Dropout(rate=0.2)(x)

x = Dense(64, activation=relu)(x)
x = Dense(1028, activation=relu)(x)
output_tensor = Dense(nclass, activation=softmax)(x)

model = models.Model(inputs=input_tensor, outputs=output_tensor)
opt = optimizers.Adam(0.001, 0.001 / 100)

model.compile(optimizer=opt, loss=losses.binary_crossentropy, metrics=['acc'])

In [65]:
model_filename = '1Dcnngunglass.pkl' 
callbacks = [
    EarlyStopping(monitor='val_acc',
                  patience=10,
                  verbose=1,
                  mode='auto'),
    
    ModelCheckpoint(model_filename, monitor='val_acc',
                    verbose=1,
                    save_best_only=True,
                    mode='auto'),
]

In [67]:
model.fit(train_wav, train_label, 
          validation_data=[test_wav, test_label],
          batch_size=batch_size,
          callbacks = callbacks,
          epochs=100,
          verbose=1)

Train on 12224 samples, validate on 6112 samples
Epoch 1/100

Epoch 00001: val_acc improved from -inf to 0.97971, saving model to 1Dcnngunglass.pkl
Epoch 2/100

Epoch 00002: val_acc improved from 0.97971 to 0.98560, saving model to 1Dcnngunglass.pkl
Epoch 3/100
  384/12224 [..............................] - ETA: 29s - loss: 0.0637 - acc: 0.9766

KeyboardInterrupt: 

In [66]:
Y_test_pred = model.predict(test_wav)
y_predicted_classes_test = Y_test_pred.argmax(axis=-1)
y_actual_classes_test= test_label.argmax(axis=-1)
wrong_examples = np.nonzero(y_predicted_classes_test != y_actual_classes_test)

In [67]:
print(wrong_examples)
y_test = test_label

(array([   1,    3,    9,   12,   13,   16,   17,   39,   49,   59,   65,
         89,   92,   93,   95,   99,  101,  103,  105,  106,  107,  116,
        132,  144,  145,  146,  163,  192,  277,  278,  281,  397,  411,
        442,  448,  497,  528,  564,  571,  575,  577,  651,  657,  682,
        700,  728,  755,  762,  814,  849,  866,  870,  923,  954,  986,
        994, 1000, 1075, 1084, 1096, 1230, 1236, 1254, 1268, 1299, 1308,
       1317, 1399, 1438, 1457, 1466, 1490, 1535, 1537, 1541, 1559, 1566,
       1578, 1587, 1594, 1628, 1712, 1729, 1735, 1794, 1797, 1905, 1972,
       2012, 2023, 2036, 2074, 2079, 2097, 2117, 2161, 2182, 2186, 2211],
      dtype=int64),)


In [83]:
i=2012
samp=np.reshape(test_wav[i],44100,)
sr=22050
print(y_test[i],Y_test_pred[i])
ipd.Audio(samp, rate=sr)

[0. 1.] [0.99788946 0.0021105 ]


In [19]:
i=5
samp=np.reshape(test_wav[i],44100,)
sr=22050
print(y_test[i],Y_test_pred[i])
ipd.Audio(samp, rate=sr)

[0. 0. 1.] [1.2893060e-02 4.8187034e-05 9.8705882e-01]


In [20]:
i=19
samp=np.reshape(test_wav[i],44100,)
sr=22050
print(y_test[i],Y_test_pred[i])
ipd.Audio(samp, rate=sr)

[0. 0. 1.] [2.8285123e-03 3.3512665e-04 9.9683630e-01]


In [21]:
i=41
samp=np.reshape(test_wav[i],44100,)
sr=22050
print(y_test[i],Y_test_pred[i])
ipd.Audio(samp, rate=sr)

[0. 0. 1.] [4.8850230e-03 1.1059678e-04 9.9500436e-01]


In [27]:
i=504
samp=np.reshape(test_wav[i],44100,)
sr=22050
print(y_test[i],Y_test_pred[i])
ipd.Audio(samp, rate=sr)

[1. 0. 0.] [9.9916399e-01 1.4078681e-07 8.3591376e-04]
