In [161]:
#code to record X mins of audio, overlay gunshot clips on top of it, run inference on it with both the 1D and 2D
#tflite models, and output precision, recall, etc, in order to evaluate the performance of models
    #can easily jump to overlaying clips/inference if you already have a recording (jump down to "Run models on
    #overlaid audio" section)
    
#for sliding predictions it's a bit harder to calculate the metrics -- be wary of them I'd say
    
    
import pyaudio
import librosa
import wave
import numpy as np
from threading import Thread
from array import array
from scipy.io import wavfile
import soundfile as sf
import scipy.signal
from queue import Queue
import time
import os
import tensorflow as tf
import tensorflow.keras as keras
import matplotlib.pyplot as plt
import cv2
import six
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras import Input, layers, optimizers, backend as K
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.preprocessing import LabelBinarizer


## Variables and Functions

In [162]:
#variables
audio_format = pyaudio.paInt16
audio_rate = 44100
audio_channels = 1
audio_device_index = 2
audio_frames_per_buffer = 4410
audio_sample_duration = 2
sound_data = np.zeros(0, dtype = "int16")
sound_normalization_threshold = 10 ** (-1.0 / 20)
max_audio_frame_int_value = 2 ** 15 - 1
confidence_level = 0.50

#how long to record audio for (in minutes)
recording_length = 30

#directory to save files to
#for my laptop
files_directory = "/Users/laurenogden/Downloads/"
#relative path to directory on github
#files_directory = "../recordings/"

#directory of gunshot files to put on top of data (2s or less long files)
gunshot_directory = files_directory + "Original Gunshot Trimmed/"

audio_analysis_queue = Queue()

In [163]:
#normalization function
def normalize(sound_data):
    
    absolute_maximum_sound_datum = max(abs(i) for i in sound_data)
    # Prevents a divide by zero scenario
    if absolute_maximum_sound_datum == 0.0:
        absolute_maximum_sound_datum = 0.001
        
    normalization_factor = float(sound_normalization_threshold * max_audio_frame_int_value) / absolute_maximum_sound_datum
    
    # Averages the volume out
    r = array('h')
    for datum in sound_data:
        r.append(int(datum * normalization_factor))
    return np.array(r, dtype = np.int16)

In [164]:
#function to save a wav file
def create_wav_file(microphone_data, name):
    librosa.output.write_wav(files_directory + name + ".wav", microphone_data, 22050)

In [165]:
#prints the times of each sound (mins : secs)
def time_in_mins(index):
    secs = index/22050
    return str(int(secs//60)) + ":" + str(secs%60)

In [166]:
#makes a spectrogram for 2D CNN ~~ OLD
def convert_to_spectrogram(data, sample_rate):
    return np.array(librosa.feature.melspectrogram(y = data, sr = sample_rate), dtype = "float32")

In [167]:
#functions to make a spectrogram image for other 2D CNN ~~ OLD
def power_to_db(S, ref = 1.0, amin = 1e-10, top_db = 80.0):
    S = np.asarray(S)
    if amin <= 0:
        print('ParameterError: amin must be strictly positive')
    if np.issubdtype(S.dtype, np.complexfloating):
        print('Warning: power_to_db was called on complex input so phase '
                      'information will be discarded. To suppress this warning, '
                      'call power_to_db(np.abs(D)**2) instead.')
        magnitude = np.abs(S)
    else:
        magnitude = S
    if six.callable(ref):
        # User supplied a function to calculate reference power
        ref_value = ref(magnitude)
    else:
        ref_value = np.abs(ref)
    log_spec = 10.0 * np.log10(np.maximum(amin, magnitude))
    log_spec -= 10.0 * np.log10(np.maximum(amin, ref_value))
    if top_db is not None:
        if top_db < 0:
            print('ParameterError: top_db must be non-negative')
        log_spec = np.maximum(log_spec, log_spec.max() - top_db)
    return log_spec


def convert_spectrogram_to_image(spectrogram):
    plt.interactive(False)
    
    figure = plt.figure(figsize = [0.72, 0.72], dpi = 400)
    plt.tight_layout(pad = 0)
    ax = figure.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    
    librosa.display.specshow(power_to_db(spectrogram, ref = np.max))
    
    canvas = FigureCanvas(figure)
    canvas.draw()
    s, (width, height) = canvas.print_to_buffer()

    image = np.fromstring(figure.canvas.tostring_rgb(), dtype = "uint8")
    image = image.reshape((width, height, 3))
    image = cv2.resize(image, (192, 192))

    # Cleaning up the matplotlib instance
    plt.close()    
    figure.clf()
    plt.close(figure)
    plt.close("all")
    
    # Returns a NumPy array containing an image of a spectrogram
    return image

In [168]:
#functions to make a spectrogram ~~ NEW

SAMPLE_RATE_PER_SECOND = 22050
SAMPLE_RATE_PER_TWO_SECONDS = 44100
HOP_LENGTH = 345 * 2
MINIMUM_FREQUENCY = 20
MAXIMUM_FREQUENCY = SAMPLE_RATE_PER_SECOND
NUMBER_OF_MELS = 128
NUMBER_OF_FFTS = NUMBER_OF_MELS * 20

def convert_audio_to_spectrogram(data):
    spectrogram = librosa.feature.melspectrogram(y=data, sr=SAMPLE_RATE_PER_TWO_SECONDS,
                                                 hop_length=HOP_LENGTH,
                                                 fmin=MINIMUM_FREQUENCY,
                                                 fmax=MAXIMUM_FREQUENCY,
                                                 n_mels=NUMBER_OF_MELS,
                                                 n_fft=NUMBER_OF_FFTS)
    spectrogram = power_to_db(spectrogram)
    spectrogram = spectrogram.astype(np.float32)
    return spectrogram


def power_to_db(S, ref=1.0, amin=1e-10, top_db=80.0):
    S = np.asarray(S)
    if amin <= 0:
        logger.debug("ParameterError: amin must be strictly positive")
    if np.issubdtype(S.dtype, np.complexfloating):
        logger.debug("Warning: power_to_db was called on complex input so phase information will be discarded.")
        magnitude = np.abs(S)
    else:
        magnitude = S
    if six.callable(ref):
        # User supplied a function to calculate reference power
        ref_value = ref(magnitude)
    else:
        ref_value = np.abs(ref)
    log_spec = 10.0 * np.log10(np.maximum(amin, magnitude))
    log_spec -= 10.0 * np.log10(np.maximum(amin, ref_value))
    if top_db is not None:
        if top_db < 0:
            logger.debug("ParameterError: top_db must be non-negative")
        log_spec = np.maximum(log_spec, log_spec.max() - top_db)
    return log_spec

In [169]:
#label binarizer
labels = np.load("/Users/laurenogden/Downloads/gunshot_augmented_sound_labels.npy")
labels = np.array([("gun_shot" if label == 1 else "other") for label in labels])
label_binarizer = LabelBinarizer()
labels = label_binarizer.fit_transform(labels)
labels = np.hstack((labels, 1 - labels))


In [170]:
#auc metric for loading original models
def auc(y_true, y_pred):
    auc = tf.metrics.auc(y_true, y_pred)[1]
    K.get_session().run(tf.local_variables_initializer())
    return auc

In [171]:
#callback function for pyaudio strean
def callback(in_data, frame_count, time_info, status):
    global sound_data
    sound_buffer = np.frombuffer(in_data, dtype = "int16")
    sound_data = np.append(sound_data, sound_buffer)
    if len(sound_data) >= 88200:
        audio_analysis_queue.put(sound_data)
        #empty out sound_data
        sound_data = np.zeros(0, dtype = "int16")

    return (sound_buffer, pyaudio.paContinue)

## Record X mins of audio

In [None]:
#open pyaudio stream
pa = pyaudio.PyAudio()
stream = pa.open(format = audio_format,
                 rate = audio_rate,
                 channels = audio_channels,
                 input_device_index = audio_device_index,
                 frames_per_buffer = audio_frames_per_buffer,
                 input = True,
                 stream_callback = callback)

# Starts the callback thread
stream.start_stream()

#get first bit of mic data from the stream
mic_data = audio_analysis_queue.get()
mod_mic_data = librosa.resample(y = mic_data, orig_sr = audio_rate, target_sr = 22050)
mod_mic_data = normalize(mod_mic_data)
mic_data = mod_mic_data
#create_wav_file(mod_mic_data, "-1_" + str(time.time()))

#get 10 mins of audio data (300 2s clips)
for i in range(0, recording_length*30 - 1):
    #print(time.ctime(time.time()))
    new_data = np.array(audio_analysis_queue.get(), dtype = "int16")
    mod_new_data = librosa.resample(y = new_data, orig_sr = audio_rate, target_sr = 22050)
    mod_mic_data = normalize(mod_new_data)
    #create_wav_file(mod_new_data, str(i) + "_" + str(time.time()))
    mic_data = np.append(mic_data, mod_new_data)

#save the clip
createwav_file(mic_data, str(recording_length) + "_mins_background")

#### Mark loud noises in noise clip

In [None]:
#mark locations of loud noises in the clips (taps/claps/etc)

#sort the data, figure out the threshold
sorted_data = np.sort(mic_data)
threshold = sorted_data[len(sorted_data) - int(len(sorted_data)*0.001)]

#find all values above that threshold
above_threshold = []
for i in range(0, len(mic_data)):
    if mic_data[i] > threshold:
        above_threshold.append(i)

#separate out individual sounds from that whole chunk
distinct_sounds = []
distinct_sounds.append(above_threshold[0])
for i in range(1, len(above_threshold)):
    #if within 5ms of each other, assume from same shot
    if above_threshold[i] - above_threshold[i-1] > 0.05*22050:
        distinct_sounds.append(above_threshold[i])

#times relative to beginning of the saved clip
print("There were " + str(len(distinct_sounds)) + " distinct loud sounds detected")
distinct_times = []
for i in distinct_sounds:
    distinct_times.append(i/22050)
    
#save them in a txt file for future possible use
loud_noises_file = open(files_directory + "background_loud_noises_indices.txt", "w")
for i in distinct_sounds:
    loud_noises_file.write(str(i)+"\n")
loud_noises_file.close()
    

## Overlay gunshot clips onto recorded noise clip

In [172]:
#load back the X min clip saved above (or whenever, idc)
#10mins_lab is the 10 minutes I recorded on the Sizheng mic just sitting in the lab
    #has some people talking, me tapping the mic, moving my water bottle, etc
#noise, rate = librosa.load(files_directory + "10mins_lab.wav") 
#mohler_outside.wav is what I named the hour long audio clip Dr. Mohler recorded outside his house on the Sizheng
    #I'm only reading in half an hour of it because the whole hour seems like a lot
noise, rate = librosa.load(files_directory + "mohler_outside.wav", duration = 60*recording_length) 
#noise, rate = librosa.load(files_directory + str(recording_length) + "_mins_background.wav")

#list files in a gunshot audio directory
gunshot_files = os.listdir(gunshot_directory)

In [173]:
# make a numpy array the size of your clip, fill it with zeros
all_gunshots = np.zeros(len(noise), dtype = "float32")

#how many gunshot clips to put on top of your audio
n_gunshot_files_to_use = 125

locs = []
locs_of_actual_shots = []
files_used = []
#add gunshots to that 0 numpy array
for i in range(0, n_gunshot_files_to_use):
    #pick a random gunshot file
    file = gunshot_files[np.random.randint(0, len(gunshot_files))]
    #avoid using the exact same gunshot twice, or attempting to load a non-wav file in the folder
    while file in files_used or ".wav" not in file:
        file = gunshot_files[np.random.randint(0, len(gunshot_files))]
    files_used.append(file)
    #load the file
    gunshot, sr = librosa.load(gunshot_directory + file)
    
    
    #find the location(s) of the actual gunshot(s) in that file
    #sort the data, figure out the threshold
    sorted_gunshot_data = np.sort(gunshot)
    threshold = sorted_gunshot_data[len(sorted_gunshot_data) - int(len(sorted_gunshot_data)*0.001)]
    #find all values above that threshold
    above_threshold = []
    for i in range(0, len(gunshot)):
        if gunshot[i] > threshold:
            above_threshold.append(i)
    #separate out individual sounds from that whole chunk
    distinct_shots = []
    distinct_shots.append(above_threshold[0])
    for i in range(1, len(above_threshold)):
        #if within 5ms of each other, assume from same shot
        if above_threshold[i] - above_threshold[i-1] > 0.05*22050:
            distinct_shots.append(above_threshold[i])
    
    
    #pick a random location to put it at 
    loc = np.random.randint(0, len(noise) - 44100)
    #to avoid putting two gunshots at the exact same place in the clip
        #to be improved potentially to avoid overlapping gunshots??
    while loc in locs:
        loc = np.random.randint(0, len(noise))
    locs.append(loc)
    #append location(s) of the actual gunshot(s) in the entire clip
    for i in distinct_shots:
        locs_of_actual_shots.append(loc+i)
    
    #print("putting " + file + " at location " + str(loc) + ", time= " + time_in_mins(loc))
    
    #place the data at that location
    for j in range(loc, loc+len(gunshot)):
        all_gunshots[j] = all_gunshots[j] + gunshot[j-loc]
    

In [174]:
#combine the noise clip with the gunshot array and save it
overlaid = noise * 0.5 + all_gunshots * 0.5
overlaid_normed = normalize(overlaid)
#create_wav_file(overlaid_normed, str(recording_length) + "_mins_overlaid_1_0.7x1.0")
create_wav_file(overlaid, str(recording_length) + "_mins_overlaid_2_0.5x0.5_not_normed")

#save the audio of just the gunshots w silence in the back (no noise clip) to see how the model does on that as well
create_wav_file(all_gunshots, str(recording_length) + "_only_gunshots_2")

### Save locations of gunshots in new overlaid clip

In [175]:
#sort the list of locations of actual gunshots, print out their time locations (if you wanna)
print("Added " + str(len(locs_of_actual_shots)) + " gunshots")
locs_of_actual_shots.sort()
'''
print("Times of the gunshots in the recording: ")
for i in locs_of_actual_shots:
    print("location = " + str(i) + " at time = " + str(time_in_mins(i)))
'''
    
#figure out what two second clips contain gunshots
clips_w_guns = []
for i in locs_of_actual_shots:
    if (i//44100)*44100 not in clips_w_guns:
        clips_w_guns.append((i//44100)*44100)
        
#sliding every half second, this tells you the clips that have gunshots right at the very beginning
clips_sliding_half_w_guns = []
for i in locs_of_actual_shots:
    if (i//11025)*11025 not in clips_sliding_half_w_guns:
        clips_sliding_half_w_guns.append((i//11025)*11025)
        
#sliding every second, same sorta deal  
clips_sliding_one_w_guns = []
for i in locs_of_actual_shots:
    if (i//22050)*22050 not in clips_sliding_one_w_guns:
        clips_sliding_one_w_guns.append((i//22050)*22050)
        

print("NO SLIDING: There are " + str(len(clips_w_guns)) + " 2s clips in our " + str(len(noise)/22050/60) 
      + " min recording containing gunshots")

print("SLIDING EVERY HALF SECOND: There are " + str(len(clips_sliding_half_w_guns)) + " 2s clips in our " + str(len(noise)/22050/60) 
      + " min recording containing gunshots")

print("SLIDING EVERY SECOND: There are " + str(len(clips_sliding_one_w_guns)) + " 2s clips in our " + str(len(noise)/22050/60) 
      + " min recording containing gunshots")

Added 194 gunshots
NO SLIDING: There are 128 2s clips in our 30.0 min recording containing gunshots
SLIDING EVERY HALF SECOND: There are 152 2s clips in our 30.0 min recording containing gunshots
SLIDING EVERY SECOND: There are 142 2s clips in our 30.0 min recording containing gunshots


In [176]:
#save the locations of gunshots in a textfile for later use
locations_file = open(files_directory + str(recording_length) + "_gunshot_clip_2_exact_locations.txt", "w")
for i in locs_of_actual_shots:
    locations_file.write(str(i)+"\n")
locations_file.close()

#save the indices of the clips with guns in a textfile for later use: no sliding
locations_file = open(files_directory + str(recording_length) + "_gunshot_clip_2_indices_no_sliding.txt", "w")
for i in clips_w_guns:
    locations_file.write(str(i)+"\n")
locations_file.close()

#save the indices of the clips with guns in a textfile for later use: sliding every half second
locations_file = open(files_directory + str(recording_length) + "_gunshot_clip_2_indices_sliding_half.txt", "w")
for i in clips_sliding_half_w_guns:
    locations_file.write(str(i)+"\n")
locations_file.close()

#save the indices of the clips with guns in a textfile for later use: sliding every second
locations_file = open(files_directory + str(recording_length) + "_gunshot_clip_2_indices_sliding_one.txt", "w")
for i in clips_sliding_one_w_guns:
    locations_file.write(str(i)+"\n")
locations_file.close()

## Run models on overlaid audio

In [355]:
#load back in the clip overlaid with gunshots
input_audio_file = files_directory + str(recording_length) + "_mins_overlaid_2_0.5x0.5_not_normed.wav"
#input_audio_file = files_directory + str(recording_length) + "_only_gunshots_2.wav"
#input_audio_file = files_directory + str(recording_length) + "_mins_background.wav"
#input_audio_file = files_directory + "mohler_outside.wav"

audio, sr = librosa.load(input_audio_file, duration = 60*recording_length)

#load back the indices of clips that contain gunshots from the txt file, choosing how much you want to slide
sliding_half = False
sliding_one = False
if sliding_half:
    locations_file = open(files_directory + str(recording_length) + "_gunshot_clip_2_indices_sliding_half.txt", "r")
elif sliding_one:
    locations_file = open(files_directory + str(recording_length) + "_gunshot_clip_2_indices_sliding_one.txt", "r")
else:
    locations_file = open(files_directory + str(recording_length) + "_gunshot_clip_2_indices_no_sliding.txt", "r")

clips_w_guns = locations_file.readlines()
locations_file.close()
#turn strings to ints
clips_w_guns = list(map(int, clips_w_guns))

#are we using the forgiving way of classification
forgiving = True

exact_locations_file = open(files_directory + str(recording_length) + "_gunshot_clip_2_exact_locations.txt", "r")
exact_locs = exact_locations_file.readlines()
exact_locations_file.close()
#turn strings to ints
exact_locs = list(map(int, exact_locs))

In [356]:
#open a file to info about the audio clip, modes, eventually metrics, etc
metrics_filename = files_directory + "model_metrics_21.txt"
metrics_file = open(metrics_filename, "w")
#audio file we're analyzing
metrics_file.write("Analyzing audio file: " + input_audio_file + "\n")
#how many clips there are in it w gunshots
metrics_file.write("There are " + str(recording_length*30) + " 2 second clips, " 
                           + str(len(clips_w_guns)) + " of which contain gunshots.\n")
#confidence level
metrics_file.write("Predictions were done at a confidence level of " + str(confidence_level) + "\n")

#are we sliding
if sliding_half:
    metrics_file.write("Sliding predictions were done every half second\n")
elif sliding_one:
    metrics_file.write("Sliding predictions were done every second\n")
else:
    metrics_file.write("Predictions were done every 2 seconds (no sliding)\n")

#are you using forgiving true positive classification (see below)
if forgiving:
    metrics_file.write("Using forgiving true positive classification \n\n")
else:
    metrics_file.write("Using strict true positive classification \n\n")

metrics_file.close()

### Load Models

#### 1D TFLite

In [357]:
#load 1D tflite model
model_name_1D = "gunshot_sound_model_1d.tflite"
interpreter_1D = tf.lite.Interpreter(model_path = "../models/" + model_name_1D)
interpreter_1D.allocate_tensors()
# Get input and output tensors.
input_details_1D = interpreter_1D.get_input_details()
output_details_1D = interpreter_1D.get_output_details()
input_shape_1D = input_details_1D[0]['shape']

#### 2D TFLite

In [358]:
#load 2D tflite model
model_name_2D = "spectrogram_gunshot_model_1.tflite"
interpreter_2D = tf.lite.Interpreter(model_path = "../models/" + model_name_2D)
interpreter_2D.allocate_tensors()
# Get input and output tensors.
input_details_2D = interpreter_2D.get_input_details()
output_details_2D = interpreter_2D.get_output_details()
input_shape_2D = input_details_2D[0]['shape']

#### Ryan's 2D TFLite

In [359]:
#load 2D tflite model
model_name_ryan = "RYAN_LATEST_gunshot_2d_spectrogram_model.tflite"
interpreter_2D_ryan = tf.lite.Interpreter(model_path = "../models/" + model_name_ryan)
interpreter_2D_ryan.allocate_tensors()
# Get input and output tensors.
input_details_2D_ryan = interpreter_2D_ryan.get_input_details()
output_details_2D_ryan = interpreter_2D_ryan.get_output_details()
input_shape_2D_ryan = input_details_2D_ryan[0]['shape']

#### 2D 128x128 input

In [360]:
#load 2D tflite model
model_name_128x128 = "128_128_gunshot_2d_spectrogram_model.tflite"
interpreter_2D_128x128 = tf.lite.Interpreter(model_path = "../models/" + model_name_ryan)
interpreter_2D_128x128.allocate_tensors()
# Get input and output tensors.
input_details_2D_128x128 = interpreter_2D_128x128.get_input_details()
output_details_2D_128x128 = interpreter_2D_128x128.get_output_details()
input_shape_2D_128x128 = input_details_2D_128x128[0]['shape']

### Inference

In [361]:
positives_1D = []
positives_2D = []
positives_2D_ryan = []
all_positives = []
all_positives_and = []

#pass two second slices into each of the models to predict
#slide every 1 second
for i in range (0, len(audio) - 22050, 22050):
#slide every half second
#for i in range(0, len(audio) - 33075, 11025):
#no sliding, discrete 2 second chunks
#for i in range (0, len(audio), 44100):
    audio_slice = audio[i:i+44100]
    
    #Normalize
    #audio_slice = normalize(audio_slice)

    #1D reshaping
    reshaped_audio_slice_1D = audio_slice.reshape(input_shape_1D)
    #1D predictions
    input_tensor_1D = tf.convert_to_tensor(reshaped_audio_slice_1D, np.float32)
    interpreter_1D.set_tensor(input_details_1D[0]["index"], reshaped_audio_slice_1D.astype("float32"))
    interpreter_1D.invoke()
    probabilities_1D = interpreter_1D.get_tensor(output_details_1D[0]["index"])
    #print("1D model-predicted sample class: " + label_binarizer.inverse_transform(probabilities_1D[:, 0])[0])
    if probabilities_1D[0][1] >= confidence_level:
        #create_wav_file(audio_slice, str(i))
        positives_1D.append(i) 
        #print("        POSITIVE, APPENDED")
        
        
    #2D reshaping
    reshaped_audio_slice_2D = convert_to_spectrogram(data = audio_slice, sample_rate = 22050)
    reshaped_audio_slice_2D = reshaped_audio_slice_2D.reshape(input_shape_2D)
    #2D predictions
    input_tensor_2D = tf.convert_to_tensor(reshaped_audio_slice_2D, np.float32)
    interpreter_2D.set_tensor(input_details_2D[0]["index"], reshaped_audio_slice_2D)
    interpreter_2D.invoke()
    probabilities_2D = interpreter_2D.get_tensor(output_details_2D[0]["index"])
    #print("2D model-predicted sample class: " + label_binarizer.inverse_transform(probabilities_2D[:, 0])[0])
    if probabilities_2D[0][1] >= confidence_level:
        #create_wav_file(audio_slice, str(i))
        positives_2D.append(i) 
        #print("        POSITIVE, APPENDED")
    
    
    #Ryan 2D reshaping
    reshaped_audio_slice_2D_ryan = convert_audio_to_spectrogram(data = audio_slice)
    #reshaped_audio_slice_2D_ryan = convert_spectrogram_to_ryan(spectrogram = reshaped_audio_slice_2D_ryan)
    reshaped_audio_slice_2D_ryan = reshaped_audio_slice_2D_ryan.reshape(input_shape_2D_ryan)
    reshaped_audio_slice_2D_ryan = reshaped_audio_slice_2D_ryan.astype("float32")
    #reshaped_audio_slice_2D_ryan /= 255
    #Ryan 2D Image predictions
    input_tensor_2D_ryan = tf.convert_to_tensor(reshaped_audio_slice_2D_ryan, np.float32)
    interpreter_2D_ryan.set_tensor(input_details_2D_ryan[0]["index"], reshaped_audio_slice_2D_ryan)
    interpreter_2D_ryan.invoke()
    probabilities_2D_ryan = interpreter_2D_ryan.get_tensor(output_details_2D_ryan[0]["index"])
    #print("Ryan 2D model-predicted sample class: " + label_binarizer.inverse_transform(probabilities_2D_ryan[:, 0])[0])
    if probabilities_2D_ryan[0][1] >= confidence_level:
        #create_wav_file(audio_slice, str(i))
        positives_2D_ryan.append(i)
        #print("        POSITIVE, APPENDED")
        
        
    #if any of them predicted it as a gunshot, it's positive
    if probabilities_1D[0][1] >= confidence_level or probabilities_2D[0][1] >= confidence_level or probabilities_2D_ryan[0][1] >= confidence_level:
        all_positives.append(i)
    
    #if all of them predicted it as a gunshot, it's positive
    if probabilities_1D[0][1] >= confidence_level and probabilities_2D[0][1] >= confidence_level and probabilities_2D_ryan[0][1] >= confidence_level:
        all_positives_and.append(i)


### Separate TP, FP, TN, FN and output metrics

In [362]:
#function to find true positives
def find_true_positives(positives, model_name):
    true_positives = []
    for i in positives:
        if i in clips_w_guns:
            true_positives.append(i)
            create_wav_file(audio[i: i+44100], model_name + "_true_positive_" + str(i))
            #create_wav_file(normalize(audio[i: i+44100]), model_name + "_true_positive_" + str(i))
        
        elif forgiving and not sliding_half and not sliding_one:
            for j in clips_w_guns:
                if i == j + 44100:
                    #if this is the clip right after one labeled as having a gunshot, I'm gonna say that's still a
                    # true positive, because odds are its a weird split
                    if i not in true_positives:
                        true_positives.append(i)
                        
        elif forgiving and sliding_one:
            #accept the clips before and after 
            for j in clips_w_guns:
                if i == j-22050 or i == j+22050:
                    if i not in true_positives:
                        true_positives.append(i)
            
    print(len(true_positives))
            
    return true_positives

In [363]:
#function to find false positives
def find_false_positives(positives, true_positives, model_name):
    false_positives = []
    for i in positives:
        if i not in true_positives:
            false_positives.append(i)
            create_wav_file(audio[i: i+44100], model_name + "_false_positive_" + str(i))
            #create_wav_file(normalize(audio[i: i+44100]), model_name + "_false_positive_" + str(i))
            
    return false_positives

In [364]:
#function to find false negatives
def find_false_negatives(positives, true_positives, model_name):
    false_negatives = []
    for i in clips_w_guns:
        if forgiving and sliding_one:
            #don't call something a false negative if we caught the clip right next to it bc thats probs
            #the same gunshot, just caught at a slightly different clip
            if i not in positives:
                for j in true_positives:
                    if i != j-22050 and i != j+22050:
                        if i not in false_negatives:
                            false_negatives.append(i)
                            create_wav_file(audio[i: i+44100], model_name + "_false_negative_" + str(i))
 
        elif forgiving and not sliding_half and not sliding_one:
            #don't call something a false negative if we caught the clip right next to it bc thats probs
            #the same gunshot, just caught at a slightly different clip
            if i not in positives:
                for j in true_positives:
                    if i != j-44100:
                        if i not in false_negatives:
                            false_negatives.append(i)
                            create_wav_file(audio[i: i+44100], model_name + "_false_negative_" + str(i))
        else:
            if i not in positives:
                #slice_location = i//44100
                false_negatives.append(i)
                create_wav_file(audio[i: i+44100], model_name + "_false_negative_" + str(i))
                #create_wav_file(normalize(audio[i: i+44100]), model_name + "_false_negative_" + str(i))
            
    return false_negatives

In [365]:
#function to calculate metrics (precison, recall, accuracy, etc)
def find_metrics(positives, true_positives, false_positives, false_negatives, model_name):
    total = recording_length*30
    total_pos = len(positives)
    total_neg = total - total_pos
    TP = len(true_positives)
    FP = len(false_positives)
    FN = len(false_negatives)
    TN = total_neg - FN 

    print("1D TFLite Model Metrics:")
    print("Total # of clips classified as gunshots: " + str(total_pos))
    print("True Positives: " + str(TP))
    print("False Positives: " + str(FP))
    print("False Negatives: " + str(FN))
    print("True Negatives: " + str(TN))

    #calculate precision, accuracy, recall, avoid dividing by 0
    if TP + FP == 0:
        precision = 0
    else:
        precision = TP / (TP + FP)
    if TP + FN == 0:
        recall = 0
    else:
        recall = TP / (TP + FN)
    accuracy = (TP + TN) / total
    print("Precision: " + str(precision))
    print("Recall: " + str(recall))
    print("Accuracy: " + str(accuracy))
    
    #write metrics out to a text file
    metrics_file = open(metrics_filename, "a")
    metrics_file.write( model_name + " Model Metrics: \n")
    metrics_file.write("Total # of clips classified as gunshots: " + str(total_pos) + "\n")
    metrics_file.write("True Positives: " + str(TP) + "\n")
    metrics_file.write("False Positives: " + str(FP) + "\n")
    metrics_file.write("False Negatives: " + str(FN) + "\n")
    metrics_file.write("True Negatives: " + str(TN) + "\n")
    metrics_file.write("Precision: " + str(precision) + "\n")
    metrics_file.write("Recall: " + str(recall) + "\n")
    metrics_file.write("Accuracy: " + str(accuracy) + "\n\n")
    metrics_file.close()

#### 1D TFLite

In [366]:
false_positives_1D = []
true_positives_1D = []
false_negatives_1D = []

#figure out all true positives
true_positives_1D = find_true_positives(positives_1D, model_name_1D)
#figure out all false positives
false_positives_1D = find_false_positives(positives_1D, true_positives_1D, model_name_1D) 
#figure out all false negatives
false_negatives_1D = find_false_negatives(positives_1D, true_positives_1D, model_name_1D)

find_metrics(positives_1D, true_positives_1D, false_positives_1D, false_negatives_1D, model_name_1D)


51
1D TFLite Model Metrics:
Total # of clips classified as gunshots: 105
True Positives: 51
False Positives: 54
False Negatives: 80
True Negatives: 715
Precision: 0.4857142857142857
Recall: 0.3893129770992366
Accuracy: 0.8511111111111112


#### 2D TFLite

In [367]:
false_positives_2D = []
true_positives_2D = []
false_negatives_2D = []

#figure out all true positives
true_positives_2D = find_true_positives(positives_2D, model_name_2D)
#figure out all false positives
false_positives_2D = find_false_positives(positives_2D, true_positives_2D, model_name_2D) 
#figure out all false negatives
false_negatives_2D = find_false_negatives(positives_2D, true_positives_2D, model_name_2D)

find_metrics(positives_2D, true_positives_2D, false_positives_2D, false_negatives_2D, model_name_2D)


76
1D TFLite Model Metrics:
Total # of clips classified as gunshots: 139
True Positives: 76
False Positives: 63
False Negatives: 78
True Negatives: 683
Precision: 0.5467625899280576
Recall: 0.4935064935064935
Accuracy: 0.8433333333333334


#### Ryan's 2D TFLite

In [368]:
false_positives_2D_ryan = []
true_positives_2D_ryan = []
false_negatives_2D_ryan = []

#figure out all true positives
true_positives_2D_ryan = find_true_positives(positives_2D_ryan, model_name_ryan)
#figure out all false positives
false_positives_2D_ryan = find_false_positives(positives_2D_ryan, true_positives_2D_ryan, model_name_ryan) 
#figure out all false negatives
false_negatives_2D_ryan = find_false_negatives(positives_2D_ryan, true_positives_2D_ryan, model_name_ryan)

find_metrics(positives_2D_ryan, true_positives_2D_ryan, false_positives_2D_ryan, false_negatives_2D_ryan, model_name_ryan)


67
1D TFLite Model Metrics:
Total # of clips classified as gunshots: 138
True Positives: 67
False Positives: 71
False Negatives: 75
True Negatives: 687
Precision: 0.4855072463768116
Recall: 0.47183098591549294
Accuracy: 0.8377777777777777


#### All models - or

In [369]:
false_positives_all = []
true_positives_all = []
false_negatives_all = []

#figure out all true positives
true_positives_all = find_true_positives(all_positives, "All (or)")
#figure out all false positives
false_positives_all = find_false_positives(all_positives, true_positives_all, "All (or)") 
#figure out all false negatives
false_negatives_all = find_false_negatives(all_positives, true_positives_all, "All (or)")

find_metrics(all_positives, true_positives_all, false_positives_all, false_negatives_all, "All (or)")


98
1D TFLite Model Metrics:
Total # of clips classified as gunshots: 190
True Positives: 98
False Positives: 92
False Negatives: 56
True Negatives: 654
Precision: 0.5157894736842106
Recall: 0.6363636363636364
Accuracy: 0.8355555555555556


#### All models - and

In [370]:
false_positives_all_and = []
true_positives_all_and = []
false_negatives_all_and = []

#figure out all true positives
true_positives_all_and = find_true_positives(all_positives_and, "All (and)")
#figure out all false positives
false_positives_all_and = find_false_positives(all_positives_and, true_positives_all_and, "All (and)") 
#figure out all false negatives
false_negatives_all_and = find_false_negatives(all_positives_and, true_positives_all_and, "All (and)")

find_metrics(all_positives_and, true_positives_all_and, false_positives_all_and, false_negatives_all_and, "All (and)")


31
1D TFLite Model Metrics:
Total # of clips classified as gunshots: 67
True Positives: 31
False Positives: 36
False Negatives: 100
True Negatives: 733
Precision: 0.4626865671641791
Recall: 0.2366412213740458
Accuracy: 0.8488888888888889


### Compare Models

In [249]:
'''
#find gunshot clips that all models classified correctly (TP for all models)
all_classified_TPs = []
for i in clips_w_guns:
    if i in true_positives_1D and i in true_positives_2D and i in true_positives_2D_image and i in true_positives_1D_original:
        all_classified_TPs.append(i)
        
print("There were " + str(len(all_classified_TPs)) + " gunshot clips that all models classified correctly:")
print(all_classified_TPs)
        
#find gunshot clips that all models classified incorrectly (FN for all models)
all_classified_FNs = []
for i in clips_w_guns:
    if i in false_negatives_1D and i in false_negatives_2D and i in false_negatives_2D_image and i in false_negatives_1D_original:
        all_classified_FNs.append(i)
        
print("There were " + str(len(all_classified_FNs)) + " gunshot clips that all models classified incorrectly:")
print(all_classified_FNs)
'''

'\n#find gunshot clips that all models classified correctly (TP for all models)\nall_classified_TPs = []\nfor i in clips_w_guns:\n    if i in true_positives_1D and i in true_positives_2D and i in true_positives_2D_image and i in true_positives_1D_original:\n        all_classified_TPs.append(i)\n        \nprint("There were " + str(len(all_classified_TPs)) + " gunshot clips that all models classified correctly:")\nprint(all_classified_TPs)\n        \n#find gunshot clips that all models classified incorrectly (FN for all models)\nall_classified_FNs = []\nfor i in clips_w_guns:\n    if i in false_negatives_1D and i in false_negatives_2D and i in false_negatives_2D_image and i in false_negatives_1D_original:\n        all_classified_FNs.append(i)\n        \nprint("There were " + str(len(all_classified_FNs)) + " gunshot clips that all models classified incorrectly:")\nprint(all_classified_FNs)\n'