In [1]:
# File Directory 
import glob
import os
from os.path import isdir, join
from pathlib import Path

# Math
import numpy as np
from scipy.fftpack import fft
from scipy import signal
import librosa

# Dimension Reduction
from sklearn.decomposition import PCA

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import IPython.display as ipd
import librosa.display

# Data Pre-processing
import pandas as pd
from sklearn.model_selection import KFold

# Deep Learning
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import Input, layers, optimizers, backend as K
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import cv2
import re

# Configuration
#py.init_notebook_mode(connected=True)

%matplotlib inline

In [2]:
def show(data):
    plt.figure(figsize=(12, 4))
    librosa.display.waveplot(data, sr = 16000)

## Data extraction

In [None]:
samples=[]
labels = []
gunshot_frequency_threshold = 0.25
sample_rate = 22050
sample_rate_per_two_seconds = 44100
input_shape = (sample_rate_per_two_seconds, 1)
base_dir = "C:\\Users\\hosle\\Documents\\_REU2019\\_project\\"
data_dir = base_dir + "REU_Samples_and_Labels\\"
sound_data_dir = data_dir + "Samples\\"

In [None]:
sound_types = pd.read_csv(data_dir + "labels.csv")

In [None]:
np.unique(sound_types.Class)

In [None]:
print("...Parsing sound data...")
sound_file_id = 0
sound_file_names = []
count = 0

for file in os.listdir(sound_data_dir):
    if (count % 100 == 0):
        print(count)
        
    count += 1
    if file.endswith(".wav"):
        # Adding 2 second-long samples to the list of samples
        sound_file_id = int(re.search(r'\d+', file).group())
        sample, sample_rate = librosa.load(sound_data_dir + file, res_type='kaiser_fast')
        prescribed_label = sound_types.loc[sound_types["ID"] == sound_file_id, "Class"].values[0]
        label = prescribed_label

        if len(sample) <= sample_rate_per_two_seconds:
            number_of_missing_hertz = sample_rate_per_two_seconds - len(sample)
            padded_sample = np.array(sample.tolist() + [0 for i in range(number_of_missing_hertz)])
                
            samples.append(padded_sample)
            labels.append(label)
            sound_file_names.append(file)
        else:
            for i in range(0, sample.size - sample_rate_per_two_seconds, sample_rate_per_two_seconds):
                sample_slice = sample[i : i + sample_rate_per_two_seconds]

                samples.append(sample_slice)
                labels.append(label)
                sound_file_names.append(file)

print("The number of samples available for training is currently " + str(len(samples)) + '.')
print("The number of labels available for training is currently " + str(len(labels)) + '.')

In [None]:
labels

In [None]:
p = 7000
show(samples[p])
print(max(abs(samples[p])))
print(labels[p])
ipd.Audio(samples[p], rate=22050)


In [None]:
np.unique(labels)

In [None]:
np.save("samples.npy", samples)
np.save("labels.npy", labels)
np.save("file_names.npy", sound_file_names)

In [None]:
samples = list(np.load("samples.npy"))
labels = list(np.load("labels.npy"))
sound_file_names = np.load("file_names.npy")

In [None]:
old = len(samples)

## Add other data

In [None]:
sound_data_dir = "C:\\Users\\hosle\\Documents\\_REU2019\\_project\\other\\"

In [None]:
count = 0
for file in os.listdir(sound_data_dir):
    if (count % 100 == 0):
        print(count)
        
    count += 1
    # Adding 2 second-long samples to the list of samples
    sample, sample_rate = librosa.load(sound_data_dir + file, res_type='kaiser_fast')
    label = "other"

    if len(sample) <= sample_rate_per_two_seconds:
        number_of_missing_hertz = sample_rate_per_two_seconds - len(sample)
        padded_sample = np.array(sample.tolist() + [0 for i in range(number_of_missing_hertz)])

        samples.append(padded_sample)
        labels.append(label)
        #sound_file_names.append(file)
    else:
        for i in range(0, sample.size - sample_rate_per_two_seconds, sample_rate_per_two_seconds):
            sample_slice = sample[i : i + sample_rate_per_two_seconds]

            samples.append(sample_slice)
            labels.append(label)
            #sound_file_names.append(file)


In [None]:
len(samples) - old

In [None]:
np.save("samples.npy", samples)
np.save("labels.npy", labels)

In [3]:
samples = np.load("samples.npy")
labels = np.load("labels.npy")

In [4]:
np.unique(labels)

array(['air_conditioner', 'car_horn', 'children_playing', 'dog_bark',
       'drilling', 'engine_idling', 'fireworks', 'glassbreak', 'gun_shot',
       'jackhammer', 'other', 'siren', 'street_music'], dtype='<U16')

In [23]:
labels1 = labels[labels != 'other']

In [24]:
urban = ['air_conditioner', 'car_horn', 'children_playing', 'dog_bark',
       'drilling', 'engine_idling', 'jackhammer', 'siren', 'street_music']

In [25]:
for i in range(len(labels1)):
    if labels1[i] in urban:
        labels1[i] = 'urban'

In [26]:
np.unique(labels1)

array(['fireworks', 'glassbreak', 'gun_shot', 'urban'], dtype='<U16')

## Augment data

In [None]:
def time_shift(wav):
    start_ = int(np.random.uniform(-wav.shape[0] * 0.5, wav.shape[0] * 0.5))
    if start_ >= 0:
        wav_time_shift = np.r_[wav[start_:], np.random.uniform(-0.001, 0.001, start_)]
    else:
        wav_time_shift = np.r_[np.random.uniform(-0.001, 0.001, -start_), wav[:start_]]
    return wav_time_shift
    
def change_pitch(wav, sample_rate):
    magnitude = int(np.random.uniform(-10, 10))
    wav_pitch_change = librosa.effects.pitch_shift(wav, sample_rate, magnitude)
    return wav_pitch_change
    
def speed_change(wav):
    speed_rate = np.random.uniform(0.7, 1.3)
    wav_speed_tune = cv2.resize(wav, (1, int(len(wav) * speed_rate))).squeeze()
    
    if len(wav_speed_tune) < len(wav):
        pad_len = len(wav) - len(wav_speed_tune)
        wav_speed_tune = np.r_[np.random.uniform(-0.001, 0.001, int(pad_len / 2)),
                               wav_speed_tune,
                               np.random.uniform(-0.001, 0.001, int(np.ceil(pad_len / 2)))]
    else: 
        cut_len = len(wav_speed_tune) - len(wav)
        wav_speed_tune = wav_speed_tune[int(cut_len / 2) : int(cut_len / 2) + len(wav)]
    return wav_speed_tune
    
def change_volume(wav, magnitude):
    # 0 < x < 1 quieter; x = 1 identity; x > 1 louder
    wav_volume_change = np.multiply(np.array([magnitude]), wav)
    return wav_volume_change
    
def add_background(wav, bg_files):
    sound_directory = "C:\\Users\\hosle\\Documents\\_REU2019\\_project\\REU_Samples_and_Labels\\Samples\\"
    chosen_bg_file = bg_files[np.random.randint(len(bg_files))]

    bg, sr = librosa.load(sound_directory + chosen_bg_file, res_type='kaiser_fast')
    ceil = max((bg.shape[0] - wav.shape[0]), 1)
    start_ = np.random.randint(ceil)
    bg_slice = bg[start_ : start_ + wav.shape[0]]
    if bg_slice.shape[0] < wav.shape[0]:
        pad_len = wav.shape[0] - bg_slice.shape[0]
        bg_slice = np.r_[np.random.uniform(-0.001, 0.001, int(pad_len / 2)), bg_slice, np.random.uniform(-0.001, 0.001, int(np.ceil(pad_len / 2)))]
    wav_with_bg = wav * np.random.uniform(0.8, 1.2) + bg_slice * np.random.uniform(0, 0.5)
    return wav_with_bg

In [None]:
label_csv = "C:\\Users\\hosle\\Documents\\_REU2019\\_project\\REU_Samples_and_Labels\\labels.csv"
sound_directory = "C:\\Users\\hosle\\Documents\\_REU2019\\_project\\REU_Samples_and_Labels\\Samples\\"
sound_types = pd.read_csv(label_csv)
bg_files = os.listdir(sound_directory)
print(len(bg_files))

for file in os.listdir(sound_directory):
    if file.endswith(".wav"):
        sound_file_id = int(re.search(r'\d+', file).group())
        prescribed_label = sound_types.loc[sound_types["ID"] == sound_file_id, "Class"].values[0]
        
        if prescribed_label == "gun_shot":
            bg_files.remove(file)
            
print(len(bg_files))

In [None]:
number_of_augmentations = 4
augmented_samples = np.zeros((samples.shape[0] * (number_of_augmentations + 1), samples.shape[1]))
augmented_labels = np.zeros((labels.shape[0] * (number_of_augmentations + 1),)).astype('str')
j = 0

for i in range (0, len(augmented_samples), (number_of_augmentations + 1)):
    if i % 100 == 0:
        print(i)
    
    augmented_samples[i,:] = samples[j,:]
    augmented_samples[i + 1,:] = time_shift(samples[j,:])
    augmented_samples[i + 2,:] = change_pitch(samples[j,:], sample_rate)
    augmented_samples[i + 3,:] = speed_change(samples[j,:])
    augmented_samples[i + 4,:] = change_volume(samples[j,:], np.random.uniform())
    ''' if labels[j] == 1:
        augmented_samples[i + 5,:] = add_background(samples[j,:], bg_files) 
    else:
        augmented_samples[i + 5,:] = add_background(samples[j,:], bg_files)
        '''
    
    augmented_labels[i] = labels[j]
    augmented_labels[i + 1] = labels[j]
    augmented_labels[i + 2] = labels[j]
    augmented_labels[i + 3] = labels[j]
    augmented_labels[i + 4] = labels[j]
    #augmented_labels[i + 5] = labels[j]
    j += 1

print("The number of samples available for training is currently " + str(len(samples)) + '.')
print("The number of labels available for training is currently " + str(len(labels)) + '.')

In [None]:
np.save("aug_samples.npy", augmented_samples)
np.save("aug_labels.npy", augmented_labels)

## Model

In [None]:
from sklearn.preprocessing import LabelBinarizer
samples = np.load("samples.npy")
labels = np.load("labels.npy")

In [None]:
np.unique(labels)

In [None]:
samples.nbytes / (2**10)**3

In [None]:
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
labels

In [None]:
kf = KFold(n_splits=3, shuffle=True)
'''samples = aug_samples #np.array(samples)
labels = aug_labels #np.array(labels)'''

#labels = keras.utils.to_categorical(labels, 2)
for train_index, test_index in kf.split(samples):
    print("TRAIN:", train_index, "TEST:", test_index)
    train_wav, test_wav = samples[train_index], samples[test_index]
    train_label, test_label = labels[train_index], labels[test_index]
    break

In [None]:
# Parameters
lr = 0.001
generations = 20000
num_gens_to_wait = 250
batch_size = 32
drop_out_rate = 0.2
input_shape = (44100,1)

In [None]:
#For Conv1D add Channel
#train_wav = np.array(train_wav)
#test_wav = np.array(test_wav)
train_wav = train_wav.reshape(-1,44100,1)
test_wav = test_wav.reshape(-1,44100,1)
#train_label = keras.utils.to_categorical(train_label, 2)
#test_label = keras.utils.to_categorical(test_label, 2)

### ROC AUC metric used.

In [None]:
def auc(y_true, y_pred):
    auc = tf.metrics.auc(y_true, y_pred)[1]
    K.get_session().run(tf.local_variables_initializer())
    return auc

In [None]:
input_tensor = Input(shape=input_shape)
nclass = 13

x = layers.Convolution1D(32, 9, activation="relu", padding="same")(input_tensor)
x = layers.Convolution1D(32, 9, activation="relu", padding="same")(x)
x = layers.MaxPool1D(16)(x)
x = layers.Dropout(rate=0.1)(x)

x = layers.Convolution1D(64, 9, activation="relu", padding="same")(x)
x = layers.Convolution1D(64, 9, activation="relu", padding="same")(x)
x = layers.MaxPool1D(4)(x)
x = layers.Dropout(rate=0.1)(x)

x = layers.Convolution1D(64, 9, activation="relu", padding="same")(x)
x = layers.Convolution1D(64, 9, activation="relu", padding="same")(x)
x = layers.MaxPool1D(4)(x)
x = layers.Dropout(rate=0.1)(x)

x = layers.Convolution1D(256, 9, activation="relu", padding="same")(x)
x = layers.Convolution1D(256, 9, activation="relu", padding="same")(x)
x = layers.GlobalMaxPool1D()(x)
x = layers.Dropout(rate=0.2)(x)

x = layers.Dense(128, activation="relu")(x)
x = layers.Dense(1028, activation="relu")(x)
output_tensor = layers.Dense(nclass, activation="softmax")(x)

model = tf.keras.Model(inputs=input_tensor, outputs=output_tensor)
opt = optimizers.Adam(lr, lr / 100)

model.compile(optimizer=opt, loss=keras.losses.categorical_crossentropy, metrics=['acc'])

In [None]:
model.summary()

In [None]:
model_filename = '1Dcnngunglass.pkl' 
callbacks = [
    EarlyStopping(monitor='val_acc',
                  patience=10,
                  verbose=1,
                  mode='max'),
    
    ModelCheckpoint(model_filename, monitor='val_acc',
                    verbose=1,
                    save_best_only=True,
                    mode='max'),
]

In [None]:
model.fit(train_wav, train_label, 
          validation_data=[test_wav, test_label],
          batch_size=batch_size,
          callbacks = callbacks,
          epochs=50,
          verbose=1)

In [None]:
model.load_weights("1Dcnngunglass.pkl")

In [None]:
model.load_weights("model.h5")

In [None]:
model.save("model.h5")

### Validation

In [None]:
mypath = "C:\\Users\\hosle\\Downloads\\felix_blume_fireworks_distant_new_years_eve_mexico.mp3"
#mypath = "C:\\Users\\hosle\\Documents\\_REU2019\\extra\\260600.wav"

fire, sr = librosa.load(mypath, res_type='kaiser_fast')

In [None]:
show(fire)

In [None]:
validation = []
if (fire.size <= 44100):
    sample_slice = np.zeros(44100)
    sample_slice[0:fire.size] = fire
    validation.append(sample_slice)

for i in range(0, fire.size - 44100, 44100):
    sample_slice = fire[i : i + 44100]
    validation.append(sample_slice)
    
validation = np.array(validation)
validation_1 = validation.reshape(-1,44100,1)

In [None]:
val_pred = model.predict(validation_1)

In [None]:
val_pred1 = lb.inverse_transform(val_pred)
print(len(val_pred1))

In [None]:
np.argwhere(val_pred1 == 'gun_shot').T

In [None]:
val_pred1

In [None]:
#[0, 1] = gunshot
i = 1
print(np.round(val_pred[i, :], 3))
print(val_pred1[i])
show(validation [i])
def show(data):
    plt.figure(figsize=(12, 4))
    librosa.display.waveplot(data, sr = 16000)
ipd.Audio(validation [i], rate=22050)

## Tflite converter

In [None]:
model_name = "model.h5";

In [None]:
converter = tf.contrib.lite.TFLiteConverter.from_keras_model_file(model_name)# custom_objects={'auc': auc})
print("\n made the converter using from_keras_model \n")

In [None]:
#post-training quantization
converter.post_training_quantize = True
print(" did post training quantization \n")

In [None]:
#convert
tflite_model = converter.convert()
print(" converted successfully \n")

In [None]:
#save
open("converted_gunshot_model_ptq.tflite", "wb").write(tflite_model)
print(" saved successfully \n")