In [3]:
import numpy as np

# Create monkey patches
np.float = float
np.int = int
np.object = object
np.bool = bool

In [4]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

import matplotlib.pyplot as plt
import gunshot_utils as utils
import importlib
import ast
import re
import os
import pickle
from glob import glob
import librosa.display
import IPython.display as ipd

import torch as th
import numpy as np
import torchaudio
from torch.utils.data import DataLoader
from IPython.display import Audio
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from pydub import AudioSegment
from pydub.playback import play

importlib.reload(utils)

In [5]:
gunshot_df = pd.read_csv("/Users/borosabel/Documents/Uni/Thesis/PopMIR/Code/Audio/Gunshot/csv_combined/filtered_gunshot_metadata.csv")
music_df = pd.read_excel('/Users/borosabel/Documents/Uni/Thesis/PopMIR/Data/Excel/baseline_data_w_topics_w_features.xlsx', engine='openpyxl')
gunshot_audio_dir = '/Users/borosabel/Documents/Uni/Thesis/PopMIR/Data/Audio/Gunshots/csv/edge-collected-gunshot-audio/edge-collected-gunshot-audio' # Gunshot folder

In [6]:
import os
from pydub import AudioSegment
import pandas as pd
from pydub.playback import play  # For playing the audio

# Function to get the maximum decibel level of an audio segment
def get_max_decibel_level(audio_segment):
    return audio_segment.max_dBFS

# Optional: Function to get the average RMS loudness in dB
def get_rms_decibel_level(audio_segment):
    rms = audio_segment.rms
    return 20 * (rms / (1 << 15)) if rms > 0 else -float('inf')

# Function to filter gunshots based on decibel level and optionally play the quietest one
def filter_gunshots_by_decibel(df, audio_dir, threshold_db=-20.0, use_rms=False, play_quietest=False, gunshot_names=None):
    filtered_records = []
    quietest_audio = None
    min_db = float('inf')  # Start with an infinitely high value for comparison

    for index, row in df.iterrows():
        file_path = row['filename']
        full_path = os.path.join(audio_dir, file_path)

        # Check if the file name contains any of the gunshot names (if the list is provided)
        if gunshot_names:
            if not any(name.lower() in file_path.lower() for name in gunshot_names):
                continue  # Skip files that don't match any of the gunshot names

        # Load the audio file
        try:
            audio = AudioSegment.from_file(full_path)
        except Exception as e:
            print(f"Error loading {full_path}: {e}")
            continue

        # Use RMS loudness if specified, otherwise use max dBFS
        if use_rms:
            loudness_db = get_rms_decibel_level(audio)
        else:
            loudness_db = get_max_decibel_level(audio)

        if loudness_db > threshold_db:
            filtered_records.append(row)  # Keep the loud files

            # Track the quietest among the loud files
            if loudness_db < min_db:
                min_db = loudness_db
                quietest_audio = audio

    # Create a DataFrame with filtered records
    filtered_df = pd.DataFrame(filtered_records)

    # Play the quietest audio if the flag is set
    if play_quietest and quietest_audio:
        print(f"Quietest audio from the filtered set has a max dBFS of {min_db}. Playing it...")
        play(quietest_audio)

    return filtered_df

# Example usage:

# Set your desired decibel threshold (example: -1 dB)
threshold_db = -1.0

# Set to True if you want to filter based on RMS loudness instead of max dBFS
use_rms = False

# Set to True if you want to play the quietest audio after filtering
play_quietest = True

# List of gunshot names to filter by (example: filenames containing "glock")
gunshot_names = ['glock']

# Filter the DataFrame based on the decibel level and filename matching
filtered_df = filter_gunshots_by_decibel(gunshot_df, gunshot_audio_dir, threshold_db=threshold_db, use_rms=use_rms, play_quietest=play_quietest, gunshot_names=gunshot_names)

# Display how many files were filtered
print(f"Number of filtered loud files: {len(filtered_df)}")

In [7]:
filtered_df

In [8]:
music_df.head()

In [9]:
import torchaudio
import numpy as np
import torch as th
import matplotlib.pyplot as plt
from IPython.display import Audio

class GunshotDataset(th.utils.data.Dataset):
    def __init__(self, music_df, gunshot_df, excerpt_len=5.0, gunshot_placement_sec=2.0, gunshot_prob=1.0, min_db=3, max_db=5):
        """
        :param excerpt_len: Length of the music segment in seconds.
        :param gunshot_placement_sec: Time in seconds where to place the gunshot in the music.
        :param gunshot_prob: Probability of adding a gunshot to the segment.
        :param min_db: Minimum gain (in dB) to apply to the gunshot.
        :param max_db: Maximum gain (in dB) to apply to the gunshot.
        """
        super().__init__()
        self.music_paths = music_df['Path'].tolist()
        self.gunshot_paths = gunshot_df['filename'].tolist()
        self.gunshot_truth = gunshot_df['gunshot_location_in_seconds'].apply(
            lambda x: preprocess_gunshot_times(x)
        ).tolist()
        self.excerpt_len = excerpt_len  # Excerpt length in seconds
        self.gunshot_placement_sec = gunshot_placement_sec  # Where to place the gunshot in the music segment
        self.gunshot_prob = gunshot_prob
        self.min_db = min_db
        self.max_db = max_db

    def __getitem__(self, idx):
        fn_music = self.music_paths[idx]

        waveform, sr = torchaudio.load(fn_music)  # Load the music file
        total_duration_sec = waveform.size(1) / sr  # Total music duration in seconds

        # Calculate excerpt length and start position in samples
        excerpt_len_in_samples = int(self.excerpt_len * sr)
        max_start_sample = max(0, waveform.size(1) - excerpt_len_in_samples)
        start_pos = np.random.randint(0, max_start_sample)

        music_segment = waveform[:, start_pos:start_pos + excerpt_len_in_samples]

        add_gunshot = (np.random.rand() < self.gunshot_prob)
        if add_gunshot:
            gunshot_idx = np.random.randint(0, len(self.gunshot_paths) - 1)
            fn_gunshot = self.gunshot_paths[gunshot_idx]
            print(f"Selected gunshot {fn_gunshot}")

            gunshot_times = self.gunshot_truth[gunshot_idx]
            gunshot_time = np.random.choice(gunshot_times)

            # Cut just before the gunshot (e.g., 0.1 seconds before)
            cut_before = 0.1  # Cut 0.1 seconds before the gunshot
            gunshot_start_time = max(0, gunshot_time - cut_before)
            gunshot_start_sample = int(gunshot_start_time * sr)

            # Load the gunshot file
            gunshot_waveform, sr_gunshot = torchaudio.load(fn_gunshot)

            # Extract only 1 second of the gunshot (or adjust as needed)
            gunshot_duration_sec = 1.0  # Gunshot duration in seconds
            gunshot_segment_len = int(gunshot_duration_sec * sr)

            gunshot_segment = gunshot_waveform[:, gunshot_start_sample:gunshot_start_sample + gunshot_segment_len]

            # Apply random gain to the gunshot
            gain_db = np.random.uniform(self.min_db, self.max_db)
            gain_factor = 10 ** (gain_db / 20)
            gunshot_segment *= gain_factor

            display(self.play_audio(gunshot_segment, sr_gunshot))

            # Place the gunshot at a specific time in the music segment
            placement_sample = int(self.gunshot_placement_sec * sr)
            music_segment[:, placement_sample:placement_sample + gunshot_segment.size(1)] += gunshot_segment

        # Plot and play the audio segment
        self.plot_waveform(music_segment, sr)
        display(self.play_audio(music_segment, sr))

        return music_segment.squeeze(), sr

    # Function to plot the waveform
    def plot_waveform(self, waveform, sr):
        time_axis = np.linspace(0, len(waveform[0]) / sr, num=len(waveform[0]))
        plt.figure(figsize=(10, 4))
        plt.plot(time_axis, waveform[0].numpy())
        plt.title('Waveform')
        plt.xlabel('Time (seconds)')
        plt.ylabel('Amplitude')
        plt.grid()
        plt.show()

    # Function to play the audio
    def play_audio(self, waveform, sr):
        return Audio(waveform.numpy(), rate=sr)

# Example usage
dataset = GunshotDataset(music_df, filtered_df, excerpt_len=5.0, gunshot_placement_sec=2.0, min_db=5, max_db=10)
music_segment, sr = dataset[0]


In [22]:
import torchaudio
import torch as th
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Audio
from torch.utils.data import DataLoader

class GunshotDataset(th.utils.data.Dataset):
    def __init__(self, music_df, gunshot_df, excerpt_len=5.0, gunshot_placement_sec=2.0, gunshot_prob=1.0, min_db=3, max_db=5, mean=None, std=None):
        """
        :param music_df: DataFrame containing paths to music files.
        :param gunshot_df: DataFrame containing paths to gunshot files and timing info.
        :param excerpt_len: Length of the music segment in seconds.
        :param gunshot_placement_sec: Time in seconds where to place the gunshot in the music.
        :param gunshot_prob: Probability of adding a gunshot to the segment.
        :param min_db: Minimum gain (in dB) to apply to the gunshot.
        :param max_db: Maximum gain (in dB) to apply to the gunshot.
        :param mean: Precomputed mean for normalization (optional).
        :param std: Precomputed std for normalization (optional).
        """
        super().__init__()
        self.music_paths = music_df['Path'].tolist()
        self.gunshot_paths = gunshot_df['filename'].tolist()
        self.gunshot_truth = gunshot_df['gunshot_location_in_seconds'].apply(
            lambda x: self.preprocess_gunshot_times(x)
        ).tolist()
        self.excerpt_len = excerpt_len
        self.gunshot_placement_sec = gunshot_placement_sec 
        self.gunshot_prob = gunshot_prob
        self.min_db = min_db
        self.max_db = max_db

        # Mean and std for normalization
        self.mean = mean
        self.std = std

        if self.mean is None or self.std is None:
            self.mean, self.std = self.calculate_mean_std()

    def preprocess_gunshot_times(self, gunshot_times, include_first_gunshot_only=False):
        """Preprocess the gunshot timing data (you can expand this based on your needs)."""
        if not isinstance(gunshot_times, str):
            return []
        gunshot_times = re.sub(r'\s+', ' ', gunshot_times).strip()
        gunshot_times = re.sub(r'(?<=\d)\s(?=\d)', ', ', gunshot_times)
        gunshot_times = gunshot_times.replace(', ]', ']')
        try:
            gunshot_list = ast.literal_eval(gunshot_times)
            if not isinstance(gunshot_list, list):
                return []
            gunshot_list = [float(x) for x in gunshot_list if isinstance(x, (int, float))]
            if include_first_gunshot_only and gunshot_list:
                return [gunshot_list[0]]
            return gunshot_list
        except (ValueError, SyntaxError):
            return []

    def calculate_mean_std(self):
        """Calculate the mean and std for normalization over a subset of audio samples."""
        all_spectrograms = []

        for path in self.music_paths[:100]:  # Limit to first 100 files to save time/memory
            waveform, sr = torchaudio.load(path)
            excerpt_len_in_samples = int(self.excerpt_len * sr)
            excerpt = waveform[:, :excerpt_len_in_samples]  # Short excerpt from the start
            spectrogram = torchaudio.transforms.MelSpectrogram()(excerpt)
            all_spectrograms.append(spectrogram)

        tmp = th.cat(all_spectrograms, dim=0)
        mean = th.mean(tmp, dim=(0, 2)).unsqueeze(1)
        std = th.std(tmp, dim=(0, 2)).unsqueeze(1)
        return mean, std


    def __getitem__(self, idx):
        fn_music = self.music_paths[idx]

        # Load the music file
        waveform, sr = torchaudio.load(fn_music)
        total_duration_sec = waveform.size(1) / sr  # Total music duration in seconds

        # Calculate excerpt length and start position in samples
        excerpt_len_in_samples = int(self.excerpt_len * sr)
        max_start_sample = max(0, waveform.size(1) - excerpt_len_in_samples)
        start_pos = np.random.randint(0, max_start_sample)

        music_segment = waveform[:, start_pos:start_pos + excerpt_len_in_samples]

        # Randomly decide if we add a gunshot
        add_gunshot = (np.random.rand() < self.gunshot_prob)
        if add_gunshot:
            gunshot_idx = np.random.randint(0, len(self.gunshot_paths) - 1)
            fn_gunshot = self.gunshot_paths[gunshot_idx]

            gunshot_times = self.gunshot_truth[gunshot_idx]
            gunshot_time = np.random.choice(gunshot_times)

            # Cut just before the gunshot (e.g., 0.1 seconds before)
            cut_before = 0.1  # Cut 0.1 seconds before the gunshot
            gunshot_start_time = max(0, gunshot_time - cut_before)
            gunshot_start_sample = int(gunshot_start_time * sr)

            # Load the gunshot file
            gunshot_waveform, sr_gunshot = torchaudio.load(fn_gunshot)

            # Extract only 1 second of the gunshot
            gunshot_duration_sec = 1.0
            gunshot_segment_len = int(gunshot_duration_sec * sr)

            gunshot_segment = gunshot_waveform[:, gunshot_start_sample:gunshot_start_sample + gunshot_segment_len]

            # Apply random gain to the gunshot
            gain_db = np.random.uniform(self.min_db, self.max_db)
            gain_factor = 10 ** (gain_db / 20)
            gunshot_segment *= gain_factor

            # Place the gunshot at a specific time in the music segment
            placement_sample = int(self.gunshot_placement_sec * sr)
            music_segment[:, placement_sample:placement_sample + gunshot_segment.size(1)] += gunshot_segment

        # Preprocess the resulting segment (music + gunshot) into a spectrogram
        spectrogram = self.preprocess_audio(music_segment, sr)

        return spectrogram, 1 if add_gunshot else 0  # Label: 1 for gunshot, 0 for no gunshot

    def __len__(self):
        return len(self.music_paths)

# Example usage
dataset = GunshotDataset(music_df, filtered_df, excerpt_len=5.0, gunshot_placement_sec=2.0, min_db=5, max_db=10)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

for spectrogram, label in dataloader:
    print(spectrogram.shape, label)