# Library Imports

### File Directory Libraries

In [None]:
import os

### Math Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt

### Data Pre-Processing Libraries

In [None]:
import pandas as pd
import librosa
import librosa.display
import soundfile
import re
import cv2
import six
from array import array
from sklearn.preprocessing import LabelBinarizer
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas

### Visualization Libraries

In [None]:
import IPython.display as ipd

### Deep Learning Libraries

In [None]:
import tensorflow as tf
from tensorflow.keras import Input, layers, backend as K
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Activation, BatchNormalization, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

### Configuration of Imported Libraries

In [None]:
%matplotlib inline

# Initialization of Variables

In [None]:
BASE_DIRECTORY = "/home/alexm/Datasets/"
SOUND_DATA_DIRECTORY = BASE_DIRECTORY + "training_gunshots/"
MAXIMUM_AUDIO_FRAME_INTEGER_VALUE = 2 ** 15 - 1
SOUND_NORMALIZATION_THRESHOLD = 10 ** (-1.0 / 20)
SAMPLE_RATE_PER_TWO_SECONDS = 44100

## Loading in Training Samples

In [None]:
print("...Parsing sound data...")
samples = []
i = 0

for file in os.listdir(SOUND_DATA_DIRECTORY):
    if file.endswith(".wav") and i <= 25:
        # Adding 2 second-long samples to the list of samples
        sample, sample_rate = librosa.load(SOUND_DATA_DIRECTORY + file)
            
        if len(sample) <= SAMPLE_RATE_PER_TWO_SECONDS:
            number_of_missing_hertz = SAMPLE_RATE_PER_TWO_SECONDS - len(sample)
            padded_sample = np.array(sample.tolist() + [0 for i in range(number_of_missing_hertz)])
            samples.append(padded_sample)
            
            print("Added a sample...")
            i += 1
                
        else:
            for i in range(0, sample.size - SAMPLE_RATE_PER_TWO_SECONDS, SAMPLE_RATE_PER_TWO_SECONDS):
                sample_slice = sample[i : i + SAMPLE_RATE_PER_TWO_SECONDS]
                samples.append(sample_slice)
                print("Added a sample...")
                i += 1

print("The number of samples available for training is currently " + str(len(samples)) + '.')

## Loading in Augmented Labels

In [None]:
labels = np.load("/home/alexm/Datasets/gunshot_augmented_sound_labels.npy")

## Binarizing Labels

In [None]:
labels = np.array([("gun_shot" if label == 1 else "other") for label in labels])
label_binarizer = LabelBinarizer()
labels = label_binarizer.fit_transform(labels)
labels = np.hstack((labels, 1 - labels))

## Sound Post-Processing Functions

In [None]:
def normalize(sound_data):
    absolute_maximum_sound_datum = max(abs(i) for i in sound_data)
    
    # Prevents a divide by zero scenario
    if absolute_maximum_sound_datum == 0.0:
        absolute_maximum_sound_datum = 0.001
    
    normalization_factor = float(SOUND_NORMALIZATION_THRESHOLD * MAXIMUM_AUDIO_FRAME_INTEGER_VALUE) / absolute_maximum_sound_datum
    
    # Averages the volume out
    r = array('f')
    for datum in sound_data:
        r.append(int(datum * normalization_factor))
    return np.array(r, dtype = np.float32)


def convert_to_spectrogram(data, sample_rate):
    return np.array(librosa.feature.melspectrogram(y = data, sr = sample_rate), dtype = "float32")


def power_to_db(S, ref = 1.0, amin = 1e-10, top_db = 80.0):
    S = np.asarray(S)
    if amin <= 0:
        logger.debug('ParameterError: amin must be strictly positive')
    if np.issubdtype(S.dtype, np.complexfloating):
        logger.debug('Warning: power_to_db was called on complex input so phase '
                      'information will be discarded. To suppress this warning, '
                      'call power_to_db(np.abs(D)**2) instead.')
        magnitude = np.abs(S)
    else:
        magnitude = S
    if six.callable(ref):
        # User supplied a function to calculate reference power
        ref_value = ref(magnitude)
    else:
        ref_value = np.abs(ref)
    log_spec = 10.0 * np.log10(np.maximum(amin, magnitude))
    log_spec -= 10.0 * np.log10(np.maximum(amin, ref_value))
    if top_db is not None:
        if top_db < 0:
            logger.debug('ParameterError: top_db must be non-negative')
        log_spec = np.maximum(log_spec, log_spec.max() - top_db)
    return log_spec


def convert_spectrogram_to_image(spectrogram):
    plt.interactive(False)
    
    figure = plt.figure(figsize = [0.72, 0.72], dpi = 400)
    plt.tight_layout(pad = 0)
    ax = figure.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    
    librosa.display.specshow(power_to_db(spectrogram, ref = np.max))
    
    canvas = FigureCanvas(figure)
    canvas.draw()
    s, (width, height) = canvas.print_to_buffer()

    image = np.fromstring(figure.canvas.tostring_rgb(), dtype = "uint8")
    image = image.reshape((width, height, 3))
    image = cv2.resize(image, (192, 192))

    # Cleaning up the matplotlib instance
    plt.close()    
    figure.clf()
    plt.close(figure)
    plt.close("all")
    
    # Returns a NumPy array containing an image of a spectrogram
    return image

# Model

## ROC (AUC) metric - Uses the import "from tensorflow.keras import backend as K"

In [None]:
def auc(y_true, y_pred):
    auc = tf.metrics.auc(y_true, y_pred)[1]
    K.get_session().run(tf.local_variables_initializer())
    return auc

## Loading previous model

In [None]:
model = load_model(BASE_DIRECTORY + "gunshot_2d_spectrogram_model.h5", custom_objects = {"auc" : auc})

### Debugging of model predictions (optional)

In [None]:
# Loads in test sample WAV files
for sample in samples:
#     gunshot_training_sample = normalize(sample)
    number_of_missing_hertz = 44100 - len(gunshot_training_sample)
    gunshot_training_sample = np.array(gunshot_training_sample.tolist() + [0 for i in range(number_of_missing_hertz)], dtype = "float32")
    gunshot_training_sample_spectrogram = convert_to_spectrogram(gunshot_training_sample, 22050)
    gunshot_training_sample_spectrogram = convert_spectrogram_to_image(gunshot_training_sample_spectrogram)
    gunshot_training_sample_spectrogram = gunshot_training_sample_spectrogram.reshape((-1, 192, 192, 3))
    gunshot_training_sample_spectrogram = gunshot_training_sample_spectrogram.astype("float32")
    gunshot_training_sample_spectrogram /= 255

    probabilities = model.predict(gunshot_training_sample_spectrogram)
    print("The model-predicted probability values: " + str(probabilities[0]))
    print("Model-predicted sample class: " + label_binarizer.inverse_transform(probabilities[:, 0])[0])

### Debugging of an individual incorrectly-labeled example (optional)

In [None]:
ipd.Audio(gunshot_training_sample, rate = 22050)