# Determining the mean time between false positives

## Environment Setup 

### Package Imports

In [1]:
# Machine Learning imports
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import torchaudio
import torchaudio.transforms as T
from torchvision.models import efficientnet_b3

# Processing imports
import os
import numpy as np
import pandas as pd

# Import the functions we designed to be used accross notebooks to avoid redundancies and improve clarity
from utils.common import list_files, create_dataframe, train_model, evaluate_model

### Global Variables

In [2]:
# Feel free to change the following in order to accommodate your environment
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
mode = "training" if str(device) == "cuda" else "development" 
print(f"Notebook in {mode} mode")

MODEL_DIR = "models/analysis"
TRAIN_PREFIX = "/home/sagemaker-user/Data/Training data" # Adapt this to your environment
VAL_PREFIX   = "/home/sagemaker-user/Data/Validation data"

SAMPLE_RATE = 8000

Notebook in training mode


### Training Parameters

In [3]:
batch_size = 128
num_workers = 8 if str(device) == "cuda" else 2
num_epochs = 30
lr = 0.001

### Data Loading

In [4]:
train_keys = list_files(TRAIN_PREFIX)
val_keys   = list_files(VAL_PREFIX)

train_df   = create_dataframe(train_keys)
val_df     = create_dataframe(val_keys)

# Creating a Sampler to account for the imbalance of the dataset
train_counts = train_df["label"].value_counts().to_dict()
val_counts = val_df["label"].value_counts().to_dict()
weights = train_df["label"].map(lambda x: 1.0 / train_counts[x])
sampler = WeightedRandomSampler(weights.tolist(), num_samples=len(weights), replacement=True)

print(f"Found {len(train_keys)} training audios ({train_counts[1]} gunshots, {train_counts[0]} backgrounds) and {len(val_keys)} validation audios ({val_counts[1]} gunshots, {val_counts[0]} backgrounds).")

Found 28790 training audios (597 gunshots, 28193 backgrounds) and 7190 validation audios (150 gunshots, 7040 backgrounds).


## Building the training pipeline

We have to redefine our own dataset to be able to pass the file paths in order to compute the mean time between false paths using the fact that the first characters of the file path represent the UNIX timestamp

In [5]:
class MeanTimeFPDataset(Dataset):
    def __init__(self, df, augmentation=None):
        self.file_paths = df.index.values
        self.labels = df["label"].values
        self.augmentation = augmentation

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        waveform = self.load_audio(idx)
        if self.augmentation is not None:
            waveform = self.augment_wf(waveform, self.augmentation)
        label = torch.tensor([self.labels[idx]])
        spectrogram = self.process(waveform)
        if self.augmentation is not None:
            spectrogram = self.augment_spec(spectrogram, self.augmentation)
        file_path = self.file_paths[idx] 
        return spectrogram, label, file_path


    def load_audio(self, idx, normalize=True):
        """Loads and normalizes an audio file."""
        waveform, sample_rate = torchaudio.load(self.file_paths[idx])
        if normalize:
            waveform = (waveform - waveform.mean()) / waveform.std()
        return waveform

    def augment_wf(self, waveform, p):
        if np.random.random() < p:
            shift_amt = np.random.randint(-1, 1)
            waveform = torch.roll(waveform, shifts=shift_amt, dims=-1)
        if np.random.random() < p:
            noise_scale = np.random.normal(0, 0.3)
            noise = torch.randn_like(waveform) * noise_scale
            waveform = waveform + noise
        return waveform

    def augment_spec(self, spectrogram, p):
        if np.random.random() < p:
            spectrogram = T.TimeMasking(time_mask_param=10)(spectrogram)
        if np.random.random() < p:
            spectrogram = T.FrequencyMasking(freq_mask_param=20)(spectrogram)
        return spectrogram

    def process(self, waveform):
        spectrogram = T.MelSpectrogram(sample_rate=SAMPLE_RATE, n_fft=256, hop_length=128, n_mels=64)
        return spectrogram(waveform)

train_delta = MeanTimeFPDataset(train_df)
val_delta = MeanTimeFPDataset(val_df)

In [6]:
train_loader_delta = DataLoader(
    train_delta,
    batch_size=batch_size,
    sampler=sampler,
    num_workers=num_workers,
    pin_memory=True
)
   
val_loader_delta = DataLoader(
    val_delta,
    batch_size=batch_size,
    num_workers=num_workers,
    pin_memory=True
)

In [7]:
evaluate_model("models/architecture/best_efficientnetb3.pth", val_loader_delta, optimized_f1=True, delta_fp_fn=True)

Evaluating: 100%|██████████| 57/57 [00:29<00:00,  1.91it/s]



=== Classification Report ===
              precision    recall  f1-score   support

  Background       1.00      1.00      1.00      7040
     Gunshot       0.89      0.79      0.84       150

    accuracy                           0.99      7190
   macro avg       0.95      0.90      0.92      7190
weighted avg       0.99      0.99      0.99      7190


=== Performance Metrics ===
Accuracy: 0.9937
Precision: 0.8947
Recall: 0.7933
F1 Score: 0.8410

=== Performance Metrics - Optimised Threshold ===
Threshold: 0.0835
Precision: 0.8421
Recall: 0.8533
F1 Score: 0.8477

=== False Positive analysis ===
Number of False Positives : 14
Mean Time Between False Positives: 6874206 seconds (79 days, 13:30:05)
Median Time Between False Positives: 11969 seconds (3:19:29)
Maximum Time Between False Positives: 86542545 seconds (1001 days, 15:35:45)
Minimum Time Between False Positives: 3 seconds (0:00:03)

=== False Negative analysis ===
Number of False Negatives : 31
Mean Time Between False Negative