## Package Imports

In [None]:
import pyaudio
import librosa
import logging
import time
import multiprocessing
import audioop
import wave
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from sys import byteorder
from array import array
from struct import pack
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras import Input, layers, optimizers, backend as K
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
#from gsmmodem.modem import GsmModem

## Configuring the Logger

In [None]:
logger = logging.getLogger('debugger')
logger.setLevel(logging.DEBUG)
ch = logging.FileHandler('output.log')
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)

## Variable Initializations

In [None]:
audio_format = pyaudio.paInt16
audio_rate = 44100
audio_channels = 1
audio_device_index = 0
audio_frames_per_buffer = 4410
audio_sample_duration = 2
audio_volume_threshold = 30000
inference_model_confidence_threshold = 0.99
designated_alert_recipients = ["8163449956", "9176202840", "7857642331"]

## Loading in Augmented Labels

In [None]:
labels = np.load("/home/alexm/Datasets/gunshot_augmented_sound_labels.npy")

## Binarizing Labels

In [None]:
labels = labels.astype("str")
labels = np.array([("gun_shot" if label == "1" else "other") for label in labels])
label_binarizer = LabelBinarizer()
labels = label_binarizer.fit_transform(labels)
labels = np.hstack((labels, 1 - labels))

## Sound Post-Processing Functions

In [None]:
def normalize(sound_data):
    # Averages the volume out
    sound_normalization_threshold = 16384
    times = float(sound_normalization_threshold) / max(abs(i) for i in sound_data)
    
    r = array('h')
    for datum in sound_data:
        r.append(int(datum * times))
    return np.array(r)

## Model Construction Functions

In [None]:
# ROC (AUC) metric - Uses the import "from tensorflow.keras import backend as K"
def auc(y_true, y_pred):
    auc = tf.metrics.auc(y_true, y_pred)[1]
    K.get_session().run(tf.local_variables_initializer())
    return auc

In [None]:
def load_model_one(weights_file):
    # 1D Time-Series Model Parameters
    drop_out_rate = 0.1
    learning_rate = 0.001
    number_of_epochs = 100
    number_of_classes = 2
    batch_size = 32
    optimizer = optimizers.Adam(learning_rate, learning_rate / 100)
    input_shape = (44100, 1)
    input_tensor = Input(shape = input_shape)
    metrics = [auc, "accuracy"]
    
    # Reconstructing 1D Time-Series Model
    x = layers.Conv1D(16, 9, activation = "relu", padding = "same")(input_tensor)
    x = layers.Conv1D(16, 9, activation = "relu", padding = "same")(x)
    x = layers.MaxPool1D(16)(x)
    x = layers.Dropout(rate = drop_out_rate)(x)

    x = layers.Conv1D(32, 3, activation = "relu", padding = "same")(x)
    x = layers.Conv1D(32, 3, activation = "relu", padding = "same")(x)
    x = layers.MaxPool1D(4)(x)
    x = layers.Dropout(rate = drop_out_rate)(x)

    x = layers.Conv1D(32, 3, activation = "relu", padding = "same")(x)
    x = layers.Conv1D(32, 3, activation = "relu", padding = "same")(x)
    x = layers.MaxPool1D(4)(x)
    x = layers.Dropout(rate = drop_out_rate)(x)

    x = layers.Conv1D(256, 3, activation = "relu", padding = "same")(x)
    x = layers.Conv1D(256, 3, activation = "relu", padding = "same")(x)
    x = layers.GlobalMaxPool1D()(x)
    x = layers.Dropout(rate = (drop_out_rate * 2))(x) # Increasing drop-out rate here to prevent overfitting

    x = layers.Dense(64, activation = "relu")(x)
    x = layers.Dense(1028, activation = "relu")(x)
    
    # Compiling 1D Time-Series Model
    output_tensor = layers.Dense(number_of_classes, activation = "softmax")(x)
    model = tf.keras.Model(input_tensor, output_tensor)
    model.compile(optimizer = optimizer, loss = keras.losses.binary_crossentropy, metrics = metrics)
    
    # Loading 1D Time-Series Model Weights
    model.load_weights(weights_file)
    
    return model

In [None]:
def load_model_two(weights_file):
    # 2D Spectrogram Model Parameters
    input_shape = (128, 87, 1)
    input_tensor = Input(shape = input_shape)
    learning_rate = 0.001
    optimizer = optimizers.Adam(learning_rate, learning_rate / 100)
    filter_size = (3,3)
    maxpool_size = (3,3)
    activation = "relu"
    drop_out_rate = 0.1
    number_of_classes = 2
    metrics = [auc, "accuracy"]
    
    # Reconstructing 2D Spectrogram Model
    x = layers.Conv2D(16, filter_size, activation = activation, padding = "same")(input_tensor)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool2D(maxpool_size)(x)
    x = layers.Dropout(rate = drop_out_rate)(x)

    x = layers.Conv2D(32, filter_size, activation = activation, padding = "same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool2D(maxpool_size)(x)
    x = layers.Dropout(rate = drop_out_rate)(x)

    x = layers.Conv2D(64, filter_size, activation = activation, padding = "same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool2D(maxpool_size)(x)
    x = layers.Dropout(rate = drop_out_rate)(x)

    x = layers.Conv2D(256, filter_size, activation = activation, padding = "same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.GlobalMaxPool2D()(x)
    x = layers.Dropout(rate = (drop_out_rate * 2))(x) # Increasing drop-out rate here to prevent overfitting

    x = layers.Dense(64, activation = activation)(x)
    x = layers.Dense(1028, activation = activation)(x)
    
    # Compiling 2D Spectrogram Model
    output_tensor = layers.Dense(number_of_classes, activation = "softmax")(x)
    spec_model = tf.keras.Model(input_tensor, output_tensor)
    spec_model.compile(optimizer = optimizer, loss = keras.losses.binary_crossentropy, metrics = metrics)

    # Loading 2D Spectrogram Model Weights
    spec_model.load_weights(weights_file)
    
    return spec_model

# Multiprocessing Inference: A main process which adds two second samples of microphone data to the audio analysis queue; An audio analysis process which retrieves and analyzes samples in the audio analysis queue; A prediction process which detects the presence of a gunshot in an analyzed audio sample; And an SMS alert process which dispatches groups of messages to designated recipients.

## Defining Multiprocess Functions

In [None]:
def analyze_microphone_data(audio_volume_threshold):
    # Instantiates our sequence management variable
    sequence_started = False
    
    # The audio analysis process will run indefinitely
    while True:
        # Gets a sample from the audio analysis queue
        microphone_data = audio_analysis_queue.get()
        maximum_frequency_value = max(microphone_data)
        
        # Outputs a given sample's maximum frequency value
        logger.debug("The maximum frequency value for a given two-second audio sample: " + str(maximum_frequency_value))
        
        # If a sample meets a certain threshold, it is placed on the gunshot sequence queue
        if maximum_frequency_value >= audio_volume_threshold and not sequence_started:
            sequence_started = True
            gunshot_sequence_queue.put(microphone_data)
            gunshot_sequence_queue.put(maximum_frequency_value)
            
        # Until there are four samples (plus one frequency value) in the gunshot sequence queue, we continue to fill the gunshot sequence queue
        elif sequence_started and gunshot_sequence_queue.qsize() < 5:
            gunshot_sequence_queue.put(microphone_data)
 
        # Once there are four samples (plus one frequency value) in the gunshot sequence queue, we process them to make three new samples
        elif gunshot_sequence_queue.qsize() == 5:
            # Pops off four samples from the gunshot sequence queue
            first_slice = gunshot_sequence_queue.get()
            maximum_frequency_value = gunshot_sequence_queue.get()
            second_slice = gunshot_sequence_queue.get()
            third_slice = gunshot_sequence_queue.get()
            fourth_slice = gunshot_sequence_queue.get()
            
            # Finds the location of the first maximum frequency value in the sequence
            maximum_frequency_value_location = first_slice.index(maximum_frequency_value)
            
            # Creates three new samples from the four original samples with the start of the first sample being where the loud noise occurred
            new_first_slice = first_slice[maximum_frequency_value_location:] + second_slice[:(88200 - maximum_frequency_value_location)]
            new_second_slice = second_slice[maximum_frequency_value_location:] + third_slice[:(88200 - maximum_frequency_value_location)]
            new_third_slice = third_slice[maximum_frequency_value_location:] + fourth_slice[:(88200 - maximum_frequency_value_location)]
            
            # Packages the three samples into a list for inference by the model
            loud_noise_sequence = [new_first_slice, new_second_slice, new_third_slice]
            
            # Places the new list of processed samples on the prediction queue
            prediction_queue.put(loud_noise_sequence)
                
            # Marks the end of the loud noise sequence
            sequence_started = False

In [None]:
def predict_microphone_data_class(audio_rate):
    # Loading 1D Time-Series Model
    model = load_model_one("./models/gunshot_sound_model.h5")
    
    # Loading 2D Spectrogram Model
#     model = load_model_two("./models/gunshot_sound_model_spectrograph_model.h5")
    
    # An iterator variable for counting the number of gunshot sounds detected
    gunshot_sound_counter = 1
    
    # The prediction process will run indefinitely
    while True:
        
        # Waits to continue until something is in the queue
        microphone_data = prediction_queue.get()
            
        # Unwraps the first slice from a list of microphone samples and packs it as a NumPy array
        first_microphone_data_slice = microphone_data[0]
                            
        # Performs post-processing on an audio sample
        first_microphone_data_slice = np.array(first_microphone_data_slice)
        modified_microphone_data = librosa.resample(y = first_microphone_data_slice, orig_sr = audio_rate, target_sr = 22050)
        modified_microphone_data = normalize(first_microphone_data_slice)
        modified_microphone_data = modified_microphone_data[:44100]
        modified_microphone_data = modified_microphone_data.reshape(-1, 44100, 1)

        # Passes a given audio sample into the model for prediction
        probabilities = model.predict(modified_microphone_data)
        logger.debug("Probabilities derived by the model: " + str(probabilities))
        logger.debug("Model-predicted sample class: " + label_binarizer.inverse_transform(probabilities[:, 0])[0])
        
        if (probabilities[0][1] >= inference_model_confidence_threshold):
            
            # Sends out an SMS alert
            sms_alert_queue.put("Gunshot Detected")
            
            # Instantiates a new clip
            new_clip = []
            
            # Post-processes the rest of the audio samples in the list
            for sample in microphone_data:
                sample = np.array(sample)
                sample = librosa.resample(y = sample, orig_sr = audio_rate, target_sr = 22050)
                sample = normalize(sample)
                sample = sample[:44100]
                new_clip += sample.tolist()
                logger.debug("Length of new clip: " + str(len(new_clip)))
                
            # Saves the new six-second sample as a WAV file
            wave_file = wave.open("./recordings/Gunshot Sound Sample #" + str(gunshot_sound_counter) + ".wav", "wb")
            wave_file.setnchannels(audio_channels)
            wave_file.setsampwidth(2)
            wave_file.setframerate(22050)
            wave_file.writeframes(array('h', new_clip))
            wave_file.close()
            
            # Increments the counter for gunshot sound file names
            gunshot_sound_counter += 1

In [None]:
def send_sms_alert(designated_alert_recipients):
    # Continuously dispatches SMS alerts to a list of designated recipients
    while True:
        sms_alert_status = sms_alert_queue.get()
        if sms_alert_status == "Gunshot Detected":
            logger.debug("ALERT: A Gunshot Has Been Detected")
    
    """
    
    # Configuring the Modem Connection
    modem_port = '/dev/ttyUSB0'
    modem_baudrate = 115200
    modem_sim_pin = None  # SIM card PIN (if any)
    
    # Establishing a Connection to the SMS Modem
    logger.debug("Initializing connection to modem...")
    modem = GsmModem(modem_port, modem_baudrate)
    modem.smsTextMode = False
    modem.connect(modem_sim_pin)
    
    # The SMS alert process will run indefinitely
    while True:
        sms_alert_status = sms_alert_queue.get()
        if sms_alert_status == "Gunshot Detected":
            try:
                # At this point in execution, an attempt to send an SMS alert to local authorities will be made
                modem.waitForNetworkCoverage(timeout = 86400)
                message = "(Testing) ALERT: A Gunshot Has Been Detected (Testing)"
                for number in designated_alert_recipients:
                    modem.sendSms(number, message)
                logger.debug(" *** Sent out an SMS alert to all designated recipients *** ")
            except:
                logger.debug("ERROR: Unable to successfully send an SMS alert to the designated recipients.")
                pass
            finally:
                logger.debug(" ** Finished evaluating an audio sample with the model ** ")
    
    """

## Opening the Microphone Audio Stream

In [None]:
pa = pyaudio.PyAudio()
    
stream = pa.open(format = audio_format,
                 rate = audio_rate,
                 channels = audio_channels,
                 input_device_index = audio_device_index,
                 frames_per_buffer = audio_frames_per_buffer,
                 input = True,
                 output = True)

## Capturing Microphone Audio

In [None]:
logger.debug("--- Listening to Audio Stream ---")

audio_analysis_process = multiprocessing.Process(target = analyze_microphone_data, args = (audio_volume_threshold,))
prediction_process = multiprocessing.Process(target = predict_microphone_data_class, args = (audio_rate,))
sms_alert_process = multiprocessing.Process(target = send_sms_alert, args = (designated_alert_recipients,))
audio_analysis_queue = multiprocessing.Queue()
gunshot_sequence_queue = multiprocessing.Queue()
prediction_queue = multiprocessing.Queue()
sms_alert_queue = multiprocessing.Queue()
audio_analysis_process.start()
prediction_process.start()
sms_alert_process.start()

while True:
    sound_data = array('h')
    
    # Loops through the stream and appends audio chunks to the frame array
    for i in range(0, int(audio_rate / audio_frames_per_buffer * audio_sample_duration)):
        sound_buffer = array('h', stream.read(audio_frames_per_buffer, exception_on_overflow = False))
        if byteorder == 'big':
            sound_buffer.byteswap()
        sound_data.extend(sound_buffer)
    
    # Places a new sample of microphone data on the audio analysis queue
    audio_analysis_queue.put(sound_data)
        
    # Closes all finished processes   
    multiprocessing.active_children()