In [1]:
import numpy as np

# Create monkey patches
np.float = float
np.int = int
np.object = object
np.bool = bool

In [3]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

import matplotlib.pyplot as plt
import gunshot_utils as utils
import importlib
import ast
import re
import os
import pickle
from glob import glob
import librosa.display
import IPython.display as ipd
import random
from IPython.display import Audio, display

import torch as th
import torchaudio
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from pydub import AudioSegment
from pydub.playback import play

importlib.reload(utils)

In [4]:
# Longer timeframes by modifying the model architecture so it can take any number of frames.
class GunshotDetectionCNN(nn.Module):
    def __init__(self, num_frames):
        super(GunshotDetectionCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 10, kernel_size=(3, 7))
        self.pool1 = nn.MaxPool2d(kernel_size=(3, 1))
        self.conv2 = nn.Conv2d(10, 20, kernel_size=(3, 3))
        self.pool2 = nn.MaxPool2d(kernel_size=(3, 1))

        dummy_input = th.zeros(1, 3, 80, num_frames)  # Shape: (batch_size, channels, height, width)
        dummy_output = self.pool2(F.relu(self.conv2(self.pool1(F.relu(self.conv1(dummy_input))))))
        output_size = dummy_output.view(-1).shape[0]

        self.fc1 = nn.Linear(output_size, 256)
        self.fc2 = nn.Linear(256, 1)
        self.dropout = nn.Dropout(0.5)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))

        # Flatten the tensor
        x = x.view(x.size(0), -1)

        x = self.dropout(F.relu(self.fc1(x)))  # Apply dropout
        x = self.sigmoid(self.fc2(x))
        return x

# Example usage
model = GunshotDetectionCNN(num_frames=150)

In [5]:
# Filter out the gunshots where the decibel levels are too low

df = pd.read_csv('/Users/borosabel/Documents/Uni/Thesis/PopMIR/Data/Audio/Gunshots/csv/edge-collected-gunshot-audio/updated_gunshot_metadata.csv') # Gunshot metadata 
audio_dir = '/Users/borosabel/Documents/Uni/Thesis/PopMIR/Data/Audio/Gunshots/csv/edge-collected-gunshot-audio/edge-collected-gunshot-audio' # Gunshot folder

def get_max_decibel_level(audio_segment):
    return audio_segment.max_dBFS

def filter_gunshots_by_decibel(df, audio_dir, threshold_db=-20.0):
    filtered_records = []
    low_db_files = []

    for index, row in df.iterrows():
        file_path = row['filename']
        full_path = os.path.join(audio_dir, file_path)

        # Load the audio file
        try:
            audio = AudioSegment.from_file(full_path)
        except Exception as e:
            print(f"Error loading {full_path}: {e}")
            continue

        # Get the maximum decibel level
        max_db = get_max_decibel_level(audio)
        
        if max_db > threshold_db:
            filtered_records.append(row)
        else:
            low_db_files.append(full_path)

    filtered_df = pd.DataFrame(filtered_records)
    return filtered_df, low_db_files

threshold_db = -5.0

# Filter the DataFrame based on max decibel level and get the low decibel files
filtered_df, low_db_files = filter_gunshots_by_decibel(df, audio_dir, threshold_db=threshold_db)

print(f"Number of files with low decibel levels:{len(low_db_files)}")

In [8]:
# Since we have the gunshots at 2 seconds hard, it produces all the time the same patterns. To generalize the model a bit more we shift the gunshots randomly and generate multiple samples.

# Load the audio file
def load_audio(audio_path):
    waveform, sample_rate = torchaudio.load(audio_path)
    return waveform, sample_rate

# Function to plot multiple waveforms
def plot_waveforms(waveforms, sample_rate):
    fig, axes = plt.subplots(len(waveforms), 1, figsize=(10, 10), sharex=True)
    if len(waveforms) == 1:
        axes = [axes]

    for i, waveform in enumerate(waveforms):
        axes[i].plot(waveform[0].numpy())
        axes[i].set_title(f"Waveform {i+1}")
        axes[i].set_xlabel('Time (samples)')
        axes[i].set_ylabel('Amplitude')

    plt.tight_layout()
    plt.show()

# The modified function from earlier
def select_gunshot_segment(waveform, sample_rate, gunshot_time, frame_length, max_shift_sec=0.9):
    random_shift = random.uniform(-max_shift_sec, max_shift_sec)
    shifted_gunshot_time = gunshot_time + random_shift
    start_time = max(0, shifted_gunshot_time - (frame_length / sample_rate) / 2)
    start_sample = int(start_time * sample_rate)
    end_sample = start_sample + int(frame_length)
    end_sample = min(end_sample, waveform.size(1))
    start_sample = max(0, end_sample - int(frame_length))

    return waveform[:, start_sample:end_sample]

# Function to play multiple audio segments
def play_audio_segments(waveforms, sample_rate):
    for i, waveform in enumerate(waveforms):
        display(Audio(waveform.numpy(), rate=sample_rate))  # Convert waveform to numpy array and play

# Example usage
audio_path = "/Users/borosabel/Documents/Uni/Thesis/PopMIR/Code/Audio/Gunshot/csv_combined/gunshot_dataset/with_gunshot_And Me (Remastered 2009)_glock_17_9mm(34)_6394.mp3"
waveform, sample_rate = load_audio(audio_path)

# Parameters
gunshot_time = 2.0
frame_length = utils.FRAME_LENGTH

# Run the selection 5 times and plot + play
waveforms = []
for _ in range(5):
    selected_segment = select_gunshot_segment(waveform, sample_rate, gunshot_time, frame_length)
    waveforms.append(selected_segment)

# Plot the waveforms
plot_waveforms(waveforms, sample_rate)

# Play the waveforms
play_audio_segments(waveforms, sample_rate)