# Library Imports

In [17]:
import os
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
import cv2
import six
import IPython.display as ipd
import tensorflow as tf
import tensorflow.keras as keras
from sklearn.preprocessing import LabelBinarizer
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from tensorflow.keras import Input, layers, backend as K
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Activation, BatchNormalization, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Creating Constants

In [2]:
AUDIO_RATE = 44100
HOP_LENGTH = 345
MINIMUM_FREQUENCY = 20
MAXIMUM_FREQUENCY = AUDIO_RATE // 2
NUMBER_OF_MELS = 128
NUMBER_OF_FFTS = NUMBER_OF_MELS * 20

# Defining Preliminary Functions

In [3]:
def auc(y_true, y_pred):
    auc = tf.metrics.auc(y_true, y_pred)[1]
    K.get_session().run(tf.local_variables_initializer())
    return auc

def power_to_db(S, ref=1.0, amin=1e-10, top_db=80.0):
    S = np.asarray(S)
    if amin <= 0:
        print("ParameterError: amin must be strictly positive")
    if np.issubdtype(S.dtype, np.complexfloating):
        print("Warning: power_to_db was called on complex input so phase information will be discarded.")
        magnitude = np.abs(S)
    else:
        magnitude = S
    if six.callable(ref):
        # User supplied a function to calculate reference power
        ref_value = ref(magnitude)
    else:
        ref_value = np.abs(ref)
    log_spec = 10.0 * np.log10(np.maximum(amin, magnitude))
    log_spec -= 10.0 * np.log10(np.maximum(amin, ref_value))
    if top_db is not None:
        if top_db < 0:
            print("ParameterError: top_db must be non-negative")
        log_spec = np.maximum(log_spec, log_spec.max() - top_db)
    return log_spec


def convert_to_spectrogram(data):
    spectrogram = librosa.feature.melspectrogram(y=data, sr=AUDIO_RATE,
                                                 hop_length=HOP_LENGTH,
                                                 fmin=MINIMUM_FREQUENCY,
                                                 fmax=MAXIMUM_FREQUENCY,
                                                 n_mels=NUMBER_OF_MELS,
                                                 n_fft=NUMBER_OF_FFTS)
    spectrogram = power_to_db(spectrogram)
    spectrogram = spectrogram.astype(np.float32)
    return spectrogram

# Loading and Binarizing Labels

In [4]:
labels = np.load("/home/alexm/Datasets/all_labels.npy")

labels = np.array([("gun_shot" if label == "gun_shot" else "other") for label in labels])
label_binarizer = LabelBinarizer()
labels = label_binarizer.fit_transform(labels)
labels = np.hstack((labels, 1 - labels))

# Testing Model

In [31]:
# Loads 128 x 64 Keras model from H5 file
model = keras.models.load_model("/home/alexm/Datasets/128_128_RYAN_smaller_spectrogram_model.h5", custom_objects = {"auc" : auc})

# Gets the input shape from the 128 x 64 Keras model
input_shape = (1, 128, 128, 1)

# Loads in a test sample WAV file
gunshot_sample, sr = librosa.load("/home/alexm/Datasets/gunshot.wav")
gunshot_sample = convert_to_spectrogram(gunshot_sample)
gunshot_sample = gunshot_sample.reshape(input_shape)

# Performs inference with the 128 x 128 Keras model
probabilities = model.predict(gunshot_sample)
# probabilities = model.predict(np.array(np.random.random_sample(input_shape), dtype = "float32"))
print("The model-predicted probability values: " + str(probabilities[0]))
print("Model-predicted sample class: " + str(label_binarizer.inverse_transform(probabilities[0])))

The model-predicted probability values: [1.9151036e-04 9.9980849e-01]
Model-predicted sample class: ['gun_shot' 'other']


# Loading TFLite Version of Model

In [29]:
interpreter = tf.lite.Interpreter(model_path = "/home/alexm/Datasets/128_x_128.tflite")
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
input_shape = input_details[0]['shape']
# gunshot_sample = np.array(np.random.random_sample(input_shape), dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], gunshot_sample)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data[0])
label_binarizer.inverse_transform(output_data[0])

[1.9150981e-04 9.9980849e-01]


array(['gun_shot', 'other'], dtype='<U8')

# Playing Original Audio Sample

In [18]:
# Loads in the test sample WAV file again for playback
gunshot_sample, sr = librosa.load("/home/alexm/Datasets/gunshot.wav")

# Plays the gunshot sample selected from the training set
ipd.Audio(gunshot_sample, rate = sr)