In [3]:
from data_formatting import *

import torch
from torchvision.io import read_image
import torchaudio
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
from torch.utils.data import DataLoader
from torch.utils.data import WeightedRandomSampler
from torch.autograd import Variable
from torchvision.transforms import ToTensor, Lambda, Compose

import os
import pandas as pd
import numpy as np
import matplotlib as plt
import time
import librosa
import soundfile as sf
import warnings
from tqdm.notebook import tqdm
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
print(f"Using device: {device}")

warnings.filterwarnings('ignore')

Using device: cuda


In [5]:
# calculate_hpss_ratios("E:\\UAV_DISTASIO_DATA\\X\\test","E:\\UAV_DISTASIO_DATA\\X")
# select_data_by_hpss_ratio("E:\\UAV_DISTASIO_DATA\\X")
process_files("E:\\UAV_DISTASIO_DATA\\X\\reduced_files.xlsx", "E:\\UAV_DISTASIO_DATA\\X\\test", "E:\\UAV_DISTASIO_DATA\\X\\high_HP_Ratio", "E:\\UAV_DISTASIO_DATA\\y\\UAV_chunk_labels_high_HP_Ratio.xlsx")

Processing files:   0%|          | 0/18 [00:00<?, ?file/s]

In [2]:
class SpectrogramDataset(Dataset):
    def __init__(self, excel_file, audio_dir, transform=None, target_transform=None):
        self.df = pd.read_excel(excel_file)
        self.audio_dir = audio_dir
        self.transform = transform
        self.target_transform = target_transform
        self.label_map = {1: "Inspired Flight 1200", 2: "DJI Matrice 800", 3: "DJI Phantom 4 Pro v2", 5: "Phantom and Matrice"}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        audio_path = os.path.join(self.audio_dir, self.df.iloc[idx, 0])
        label = self.df.iloc[idx, 1]
        
        waveform, sample_rate = torchaudio.load(audio_path)
        
        # Resample the waveform if necessary
        if sample_rate != 44100:
            waveform = torchaudio.transforms.Resample(sample_rate, 44100)(waveform)
        
        # Convert waveform to spectrogram
        spectrogram = torchaudio.transforms.Spectrogram()(waveform)
        
        # Convert spectrogram to grayscale tensor
        grayscale_spectrogram = spectrogram.mean(dim=0).unsqueeze(0)
        
        if self.transform:
            grayscale_spectrogram = self.transform(grayscale_spectrogram)
        
        if self.target_transform:
            label = self.target_transform(label)
        
        return audio_path, grayscale_spectrogram, torch.tensor(label)
    
    
    def save_spectrograms_as_tensors(self, output_dir):
        os.makedirs(output_dir, exist_ok=True)
        
        for idx in range(len(self)):
            audio_file, spectrogram, label = self[idx]
            
            # Convert label to string
            label_str = self.label_map.get(label.item(), f"Unknown_{label.item()}")
            
            # Generate filename
            filename = f"spectrogram_{audio_file[len("E:_UAV_DISTASIO_DATA_X_test_") :-4]}.pt"
            filepath = os.path.join(output_dir, filename)
            print(filepath)
            
            # Save the spectrogram tensor
            torch.save(spectrogram, filepath)
            
            print(f"Saved spectrogram tensor: {filepath}")

# Usage example
# excel_file = "E:\\UAV_DISTASIO_DATA\\y\\UAV_chunk_labels.xlsx"
# audio_dir = r"E:\UAV_DISTASIO_DATA\X\ESCAPE_FORMAT_ONECHANNEL"
excel_file = "E:\\UAV_DISTASIO_DATA\\y\\UAV_chunk_labels_reduced.xlsx"
audio_dir = r"E:\UAV_DISTASIO_DATA\X\test"

# Define any transformations if needed
transform = None
label_mapping = {1:0, 2:1, 3:2, 5:3}
target_transform = lambda x: label_mapping[x]

dataset = SpectrogramDataset(excel_file, audio_dir, transform=transform, target_transform=target_transform)

In [3]:
train_size = int(.8 * len(dataset))
test_size = int(.75 * (len(dataset) - train_size))
val_size = len(dataset) - train_size - test_size

train_dataset, test_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, test_size, val_size])
print(train_dataset)
print(train_dataset[0])

# Using torch.utils.data.weightedRandomSampler to deal with non-uniform dataset
label_weights = [813/train_size, 609/train_size, 1248/train_size, 732/train_size] # label weights currently hard coded
train_loader = DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=False,
    sampler=WeightedRandomSampler(weights=label_weights, num_samples=train_size, replacement=True)
    )

test_loader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False
)

val_size = DataLoader(
    val_dataset,
    batch_size=64,
    shuffle=False
)

<torch.utils.data.dataset.Subset object at 0x000001DD8F4F0560>
('E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r03-Matrice-South_p02_d306_drone_chunk_20.wav', tensor([[[1.0960e-01, 6.4197e-02, 1.0492e-01,  ..., 1.9407e-03,
          1.9543e-02, 1.1300e-04],
         [1.3060e-02, 3.1552e-02, 7.1016e-02,  ..., 7.3887e-04,
          3.0412e-02, 3.0467e-04],
         [1.0622e-02, 1.4447e-03, 1.2788e-02,  ..., 9.9430e-03,
          2.6423e-02, 3.2539e-03],
         ...,
         [6.6398e-08, 1.7324e-10, 2.1351e-09,  ..., 1.5495e-09,
          4.1493e-09, 9.7345e-10],
         [1.2042e-07, 5.5553e-09, 1.2069e-08,  ..., 9.7802e-11,
          5.0046e-09, 3.2374e-09],
         [1.9026e-07, 1.1058e-08, 1.8026e-08,  ..., 3.4245e-10,
          3.3427e-09, 2.2084e-09]]]), tensor(1))


In [4]:
for audio_path, data, label in train_loader:
    print(audio_path)

('E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r06-Phantom-North_p02_d308_drone_chunk_16.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA2r03-Matrice-South-X_p02_d306_noDrone_chunk_3.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r03-Matrice-South_p02_d302_drone_chunk_19.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA2r03-Matrice-South-X_p02_d306_noDrone_chunk_3.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r03-Matrice-South_p02_d306_drone_chunk_20.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r03-Matrice-South_p02_d302_drone_chunk_19.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r06-Phantom-North_p02_d308_drone_chunk_16.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r03-Matrice-South_p02_d302_drone_chunk_19.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r03-Matrice-South_p02_d306_drone_chunk_20.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r03-Matrice-South_p02_d306_drone_chunk_20.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA2r03-Matrice-South-X_p02_d306_noDrone_chunk_3.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r03-Matrice-South_p02_d302_dro

In [5]:
train_dataset[400][1].shape

torch.Size([1, 201, 1103])

In [6]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.seq = nn.Sequential(
            # Kernel of size (5, 10) to emphasize the harmonic banding drones create in the input spectrograms
            nn.LazyConv2d(4, (5, 10), stride=1),
            nn.LazyBatchNorm2d(),
            nn.ReLU(),
            nn.LazyConv2d(8, (5, 10), stride=1),
            nn.LazyBatchNorm2d(),
            nn.ReLU(),
            nn.MaxPool2d((5, 10), stride=1),

            # Kernel of size (10, 5) on features extracted from the banding
            nn.LazyConv2d(8, (10, 5), stride=1),
            nn.LazyBatchNorm2d(),
            nn.ReLU(),
            nn.LazyConv2d(8, (10, 5), stride=1),
            nn.LazyBatchNorm2d(),
            nn.ReLU(),
            nn.MaxPool2d((10, 5), stride=1),

            # Large input from Conv layers requires a larger perceptron for classification
            nn.Flatten(),
            nn.LazyLinear(4096),
            nn.LazyBatchNorm1d(),
            nn.ReLU(),
            nn.LazyLinear(1024),
            nn.LazyBatchNorm1d(),
            nn.ReLU(),
            nn.LazyLinear(200),
            nn.LazyBatchNorm1d(),
            nn.ReLU(),
            nn.LazyLinear(4)
        )

    def forward(self, x):
        logits = self.seq(x)
        probs = F.softmax(logits, dim=1)
        return probs


In [7]:
def split_seconds(seconds):
    minutes = seconds // 60
    hours = minutes // 60
    days = hours // 24
    return seconds % 60, minutes % 60, hours % 24, days


In [8]:
from tqdm.notebook import tqdm

def main():
    # Define model
    model = CNN()
    # Cuda setup
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    #device = torch.device("cpu")
    model = model.to(device)
   
    print(f"Using device: {device}")
    # Optimizer setup
    optimizer = Adam(model.parameters(), lr=1e-3)
   
    # Loss function
    loss_fn = nn.CrossEntropyLoss(reduction="mean")
   
    # Number of epochs
    num_epochs = 64
   
    # Train or load model?
    model.train()
    train_model = True
    print("Training model....")
    start = time.time()
    for epoch in tqdm(range(num_epochs), desc="Epochs"):
        total_loss = 0
       
        for batch_idx, (audio_file, images, labels) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
           
            # CNN forward pass
            probabilities = model(images)
            loss = loss_fn(probabilities, labels)
            loss.backward()
            optimizer.step()
           
            total_loss += loss.item()
       
        avg_loss = total_loss / len(train_loader)
        tqdm.write(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")
       
        torch.save(model.state_dict(), str(start) + '_epoch_' + str(epoch) +".pt")
   
    end = time.time()
    seconds, minutes, hours, days = split_seconds(end - start)
    print(f"Training Runtime: {int(days)}d {int(hours)}h {int(minutes)}m {seconds:.2f}s")
   
    # Evaluate model on test data
    model.eval()
    print("Evaluating model....")
    start = time.time()
    num_test = 0
    num_correct = 0
   
    with torch.no_grad():
        for audio_file, images, labels in tqdm(test_loader, desc="Testing"):
            images, labels = images.to(device), labels.to(device)
            probabilities = model(images)
            _, preds = probabilities.max(1)
            num_test += labels.size(0)
            num_correct += preds.eq(labels).sum().item()
   
    print(f"Test accuracy: {num_correct / num_test * 100:.2f}%")
    end = time.time()
    seconds, minutes, hours, days = split_seconds(end - start)
    print(f"Testing Runtime: {int(days)}d {int(hours)}h {int(minutes)}m {seconds:.2f}s")

In [23]:
main()

Output()

Using device: cuda
Training model....


KeyboardInterrupt: 

In [26]:
# Define model
model = CNN()
# Cuda setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
model = model.to(device)

print(f"Using device: {device}")

# Optimizer setup
optimizer = Adam(model.parameters(), lr=1e-3)


state = torch.load("C:\\Users\\Alec\\Desktop\\afrl-uav-detection\\classify\\1719259767.909317_epoch_0.pt")
model.load_state_dict(state)


model.eval()
print("Evaluating model....")
start = time.time()
num_test = 0
num_correct = 0

with torch.no_grad():
    with Progress(
        TextColumn("[progress.description]{task.description}"),
        BarColumn(),
        MofNCompleteColumn(),
        TimeRemainingColumn(),
        refresh_per_second=10
    ) as progress:
        test_task = progress.add_task("[yellow]Testing", total=len(test_loader))
        for audio_file, images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            probabilities = model(images)
            _, preds = probabilities.max(1)
            num_test += labels.size(0)
            num_correct += preds.eq(labels).sum().item()
            progress.update(test_task, advance=1)
            progress.refresh()

print(f"Test accuracy: {num_correct / num_test * 100:.2f}%")
end = time.time()
seconds, minutes, hours, days = split_seconds(end - start)
print(f"Testing Runtime: {int(days)}d {int(hours)}h {int(minutes)}m {seconds:.2f}s")

Using device: cuda


Output()

Evaluating model....


Test accuracy: 27.36%


NameError: name 'split_seconds' is not defined

In [71]:
test_dataset[400][1].shape

torch.Size([1, 201, 1103])

In [16]:
# # Save spectrograms as tensors
dataset.save_spectrograms_as_tensors("E:\\UAV_DISTASIO_DATA\\X\\spectrograms")

E:\UAV_DISTASIO_DATA\X\spectrograms\spectrogram_sA1r01-Inspired-Hover_p01_d302_drone_chunk_1.pt
Saved spectrogram tensor: E:\UAV_DISTASIO_DATA\X\spectrograms\spectrogram_sA1r01-Inspired-Hover_p01_d302_drone_chunk_1.pt
E:\UAV_DISTASIO_DATA\X\spectrograms\spectrogram_sA1r01-Inspired-Hover_p01_d302_drone_chunk_10.pt
Saved spectrogram tensor: E:\UAV_DISTASIO_DATA\X\spectrograms\spectrogram_sA1r01-Inspired-Hover_p01_d302_drone_chunk_10.pt
E:\UAV_DISTASIO_DATA\X\spectrograms\spectrogram_sA1r01-Inspired-Hover_p01_d302_drone_chunk_11.pt
Saved spectrogram tensor: E:\UAV_DISTASIO_DATA\X\spectrograms\spectrogram_sA1r01-Inspired-Hover_p01_d302_drone_chunk_11.pt
E:\UAV_DISTASIO_DATA\X\spectrograms\spectrogram_sA1r01-Inspired-Hover_p01_d302_drone_chunk_12.pt
Saved spectrogram tensor: E:\UAV_DISTASIO_DATA\X\spectrograms\spectrogram_sA1r01-Inspired-Hover_p01_d302_drone_chunk_12.pt
E:\UAV_DISTASIO_DATA\X\spectrograms\spectrogram_sA1r01-Inspired-Hover_p01_d302_drone_chunk_2.pt
Saved spectrogram tensor: 

RuntimeError: [enforce fail at inline_container.cc:595] . unexpected pos 576 vs 470