## Package Imports

In [27]:
import pyaudio
import wave
import librosa
import librosa.effects
import IPython.display as ipd
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import Input, layers, optimizers, backend as K
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

## Variable Initializations

In [19]:
audio_format = pyaudio.paFloat32
audio_rate = 44100
audio_channels = 1
audio_device_index = 1
audio_frames_per_buffer = 4096
audio_sample_duration = 4

## Processing Microphone Audio

In [36]:
pa = pyaudio.PyAudio()
    
stream = pa.open(format = audio_format,
                 rate = audio_rate,
                 channels = audio_channels,
                 input_device_index = audio_device_index,
                 frames_per_buffer = audio_frames_per_buffer,
                 input = True)

print("--- Recording Audio ---")
np_array_data = []

# Loops through the stream and appends audio chunks to the frame array
for i in range(0, int((audio_rate / audio_frames_per_buffer) * audio_sample_duration)):
    data = stream.read(audio_frames_per_buffer, exception_on_overflow = False)
    np_array_data.append(np.frombuffer(data, dtype=np.float32))
    
microphone_data = np.concatenate(np_array_data)
print("--- Finished Recording Audio ---")

# Stops the stream, closes it, and terminates the PyAudio instance
stream.stop_stream()
stream.close()
pa.terminate()

# Saves the audio frames as WAV files
# wavefile = wave.open("mic_test",'wb')
# wavefile.setnchannels(audio_channels)
# wavefile.setsampwidth(pa.get_sample_size(audio_format))
# wavefile.setframerate(audio_rate)
# wavefile.writeframes(b''.join(microphone_data))
# wavefile.close()

--- Recording Audio ---
--- Finished Recording Audio ---


## Playing Recorded Audio Sample

In [40]:
ipd.Audio(microphone_data, rate=44100)

## Resampling Audio Sample

In [38]:
microphone_data = librosa.resample(y=microphone_data, orig_sr=44100, target_sr=22050)

176128
88064


## Trimming Audio Sample

In [None]:
microphone_data = microphone_data[:44100]

## Loading the Lite Model (Deprecated)

In [None]:
interpreter = tf.lite.Interpreter("./models/converted_gunshot_sound_model.tflite")
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

input_shape = input_details[0]["shape"]
input_data = microphone_data
interpreter.set_tensor(input_details[0]["index"], input_data)

interpreter.invoke()

output_data = interpreter.get_tensor(output_details[0]["index"])
print(output_data)

## ROC (AUC) metric - Uses the import "from tensorflow.keras import backend as K"

In [2]:
def auc(y_true, y_pred):
    auc = tf.metrics.auc(y_true, y_pred)[1]
    K.get_session().run(tf.local_variables_initializer())
    return auc

## Model Parameters

In [4]:
sample_rate_per_two_seconds = 44100
input_shape = (sample_rate_per_two_seconds, 1)
drop_out_rate = 0.1
learning_rate = 0.001
number_of_epochs = 100
number_of_classes = 2
batch_size = 32
optimizer = optimizers.Adam(learning_rate, learning_rate / 100)
input_tensor = Input(shape=input_shape)
metrics = [auc, "accuracy"]

## Loading Original Model

In [5]:
x = layers.Conv1D(16, 9, activation="relu", padding="same")(input_tensor)
x = layers.Conv1D(16, 9, activation="relu", padding="same")(x)
x = layers.MaxPool1D(16)(x)
x = layers.Dropout(rate=drop_out_rate)(x)

x = layers.Conv1D(32, 3, activation="relu", padding="same")(x)
x = layers.Conv1D(32, 3, activation="relu", padding="same")(x)
x = layers.MaxPool1D(4)(x)
x = layers.Dropout(rate=drop_out_rate)(x)

x = layers.Conv1D(32, 3, activation="relu", padding="same")(x)
x = layers.Conv1D(32, 3, activation="relu", padding="same")(x)
x = layers.MaxPool1D(4)(x)
x = layers.Dropout(rate=drop_out_rate)(x)

x = layers.Conv1D(256, 3, activation="relu", padding="same")(x)
x = layers.Conv1D(256, 3, activation="relu", padding="same")(x)
x = layers.GlobalMaxPool1D()(x)
x = layers.Dropout(rate=(drop_out_rate * 2))(x) # Increasing drop-out rate here to prevent overfitting

x = layers.Dense(64, activation="relu")(x)
x = layers.Dense(1028, activation="relu")(x)
output_tensor = layers.Dense(number_of_classes, activation="softmax")(x)

model = tf.keras.Model(input_tensor, output_tensor)
model.compile(optimizer=optimizer, loss=keras.losses.binary_crossentropy, metrics=metrics)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


## Loading Original Model Weights

In [6]:
model.load_weights("./models/gunshot_sound_model.h5")

## Classification with the Loaded Model

In [18]:
# microphone_data = keras.utils.to_categorical(microphone_data, 2)
microphone_data = microphone_data.reshape(-1, sample_rate_per_two_seconds, 1)
probabilities = model.predict(microphone_data)
print(probabilities)

ValueError: cannot reshape array of size 4227072 into shape (44100,1)