In [49]:
import torch
from torchvision.io import read_image
import torchaudio
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
from torch.utils.data import DataLoader
from torch.autograd import Variable
from torchvision.transforms import ToTensor, Lambda, Compose

import os
import pandas as pd
import numpy as np
import matplotlib as plt
import time
import librosa
import soundfile as sf
import warnings
from tqdm import tqdm
from rich.progress import Progress, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
print(f"Using device: {device}")

warnings.filterwarnings('ignore')

Using device: cuda


In [2]:
# wav_directory = "E:\\UAV_DISTASIO_DATA\\X\\ESCAPE_FORMAT_ONECHANNEL"

# label_spreadsheet = "E:\\UAV_DISTASIO_DATA\\y\\escape_singleUAV_scenarios_cleaned.xlsx"

# # Read the existing label spreadsheet
# df_labels = pd.read_excel(label_spreadsheet, header=None, names=["filename", "type", "motion"])

# # Extract the unique identifiers (sA1r01) from the label filenames
# df_labels["identifier"] = df_labels["filename"].str.extract(r"(sA\d+r\d+)")

# wav_files = [file for file in os.listdir(wav_directory) if file.endswith(".wav")]

# # Create a new DataFrame to store the entries for each .wav file
# df_entries = pd.DataFrame(columns=["filename", "type", "motion"])

# # Iterate over each .wav file
# for wav_file in wav_files:
#     # Extract the identifier (sA1r01) from the .wav filename
#     identifier = wav_file.split("-")[0]
    
#     try:
#         # Find the corresponding label in the label DataFrame
#         label_row = df_labels[df_labels["identifier"] == identifier].iloc[0]
        
#         # Create a new DataFrame for the current entry
#         entry_df = pd.DataFrame({
#             "filename": [wav_file],
#             "type": [label_row["type"]],
#             "motion": [label_row["motion"]]
#         })
        
#         # Concatenate the new entry DataFrame with the existing DataFrame
#         df_entries = pd.concat([df_entries, entry_df], ignore_index=True)
        
#     except IndexError:
#         print(f"No corresponding label found for file: {wav_file}")
#         continue

# # Save the new DataFrame to a new Excel spreadsheet
# output_spreadsheet = "E:\\UAV_DISTASIO_DATA\\y\\UAV_chunk_labels.xlsx"
# df_entries.to_excel(output_spreadsheet, index=False)

In [2]:
wav_directory = r"E:\UAV_DISTASIO_DATA\X\test"

max_duration_sec = 5

for wav in tqdm(os.listdir(wav_directory)):
        file_path = os.path.join(wav_directory, wav)
        if os.path.isfile(file_path):
            try:
                audio, sr = librosa.load(file_path)
                duration_sec = librosa.get_duration(y=audio, sr=sr)
                
                if duration_sec > max_duration_sec:
                    # Trim the audio if it exceeds the maximum duration
                    audio = audio[:int(max_duration_sec * sr)]
                    tqdm.write(f"Audio file trimmed: {file_path}")
                elif duration_sec < max_duration_sec:
                    # Pad the audio with silence if it's shorter than the maximum duration
                    pad_length = int((max_duration_sec - duration_sec) * sr)
                    audio = librosa.util.pad_center(audio, size=len(audio) + pad_length, mode='constant')
                    tqdm.write(f"Audio file padded: {file_path}")
                
                # Save the processed audio file
                sf.write(file_path, audio, sr)
            except FileNotFoundError:
                tqdm.write(f"File not found: {file_path}")
            except librosa.util.exceptions.ParameterError as e:
                tqdm.write(f"Error loading audio file: {file_path}. Error message: {str(e)}")
        else:
            tqdm.write(f"File not found: {file_path}")

  0%|          | 0/7554 [00:00<?, ?it/s]

100%|██████████| 7554/7554 [14:13<00:00,  8.85it/s]


In [50]:
class SpectrogramDataset(Dataset):
    def __init__(self, excel_file, audio_dir, transform=None, target_transform=None):
        self.df = pd.read_excel(excel_file)
        self.audio_dir = audio_dir
        self.transform = transform
        self.target_transform = target_transform
        self.label_map = {1: "Inspired Flight 1200", 2: "DJI Matrice 800", 3: "DJI Phantom 4 Pro v2", 5: "Phantom and Matrice"}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        audio_path = os.path.join(self.audio_dir, self.df.iloc[idx, 0])
        label = self.df.iloc[idx, 1]
        
        waveform, sample_rate = torchaudio.load(audio_path)
        
        # Resample the waveform if necessary
        if sample_rate != 44100:
            waveform = torchaudio.transforms.Resample(sample_rate, 44100)(waveform)
        
        # Convert waveform to spectrogram
        spectrogram = torchaudio.transforms.Spectrogram()(waveform)
        
        # Convert spectrogram to grayscale tensor
        grayscale_spectrogram = spectrogram.mean(dim=0).unsqueeze(0)
        
        if self.transform:
            grayscale_spectrogram = self.transform(grayscale_spectrogram)
        
        if self.target_transform:
            label = self.target_transform(label)
        
        return audio_path, grayscale_spectrogram, torch.tensor(label)
    
    
    def save_spectrograms_as_tensors(self, output_dir):
        os.makedirs(output_dir, exist_ok=True)
        
        for idx in range(len(self)):
            spectrogram, label = self[idx]
            
            # Convert label to string
            label_str = self.label_map.get(label.item(), f"Unknown_{label.item()}")
            
            # Generate filename
            filename = f"spectrogram_{idx}_{label_str}.pt"
            filepath = os.path.join(output_dir, filename)
            
            # Save the spectrogram tensor
            torch.save(spectrogram, filepath)
            
            print(f"Saved spectrogram tensor: {filepath}")

# Usage example
# excel_file = "E:\\UAV_DISTASIO_DATA\\y\\UAV_chunk_labels.xlsx"
# audio_dir = r"E:\UAV_DISTASIO_DATA\X\ESCAPE_FORMAT_ONECHANNEL"
excel_file = "E:\\UAV_DISTASIO_DATA\\y\\UAV_chunk_labels_test.xlsx"
audio_dir = r"E:\UAV_DISTASIO_DATA\X\test"

# Define any transformations if needed
transform = None
label_mapping = {1:0, 2:1, 3:2, 5:3}
target_transform = lambda x: label_mapping[x]

dataset = SpectrogramDataset(excel_file, audio_dir, transform=transform, target_transform=target_transform)

In [69]:
train_size = int(.8 * len(dataset))
test_size = int(.75 * (len(dataset) - train_size))
val_size = len(dataset) - train_size - test_size

train_dataset, test_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, test_size, val_size])
print(train_dataset)
print(train_dataset[0])
train_loader = DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=True
    )

test_loader = DataLoader(
    test_dataset,
    batch_size=16,
    shuffle=False
)

val_size = DataLoader(
    val_dataset,
    batch_size=16,
    shuffle=False
)

<torch.utils.data.dataset.Subset object at 0x0000029DDCDB19D0>
('E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r05-Phantom-South_p01_d303_drone_chunk_23.wav', tensor([[[4.9057e-03, 2.4936e-06, 4.0502e-04,  ..., 9.3959e-04,
          1.9453e-03, 7.4286e-04],
         [9.7079e-04, 5.5265e-03, 3.1456e-05,  ..., 1.9556e-02,
          3.6024e-03, 1.0361e-02],
         [3.5381e-02, 1.0979e-03, 4.3600e-03,  ..., 1.5230e-02,
          7.4555e-03, 1.5918e-02],
         ...,
         [8.9967e-06, 2.2766e-10, 6.2047e-10,  ..., 2.3151e-09,
          1.1335e-09, 8.8429e-06],
         [8.6351e-06, 9.0796e-10, 8.3371e-12,  ..., 3.2976e-09,
          6.0230e-10, 8.9212e-06],
         [8.3947e-06, 1.1193e-12, 6.2255e-11,  ..., 1.5714e-10,
          3.2806e-10, 9.0675e-06]]]), tensor(2))


In [48]:
for audio_path, data, label in train_loader:
    print(audio_path)

('E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r10-Matrice+Phantom-North-CrossPath_p01_d307_drone_chunk_6.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r06-Phantom-North_p01_d304_drone_chunk_4.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r04-Matrice-North_p02_d304_drone_chunk_24.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r06-Phantom-North_p04_d302_drone_chunk_23.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r03-Matrice-South_p01_d306_noDrone_chunk_15.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r09-Matrice+Phantom-South-CrossPath_p02_d307_drone_chunk_11.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r05-Phantom-South_p01_d307_drone_chunk_16.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA2r01-Inspired-South-X_p02_d303_drone_chunk_8.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA2r05-Phantom-South-X_p04_d307_drone_chunk_6.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r06-Phantom-North_p01_d303_drone_chunk_1.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA3r04-Matrice-North_p05_d305_noDrone_chunk_10.wav', 'E:\\UAV_DISTASIO_DATA\\X\\test\\sA

In [57]:
train_dataset[400][1].shape

torch.Size([1, 201, 1103])

In [58]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.seq = nn.Sequential(
            nn.LazyConv2d(16, (5, 10), stride=1),
            nn.ReLU(),
            nn.LazyConv2d(32, (5, 10), stride=1),
            nn.ReLU(),
            nn.MaxPool2d((5, 10), stride=1),

            nn.LazyConv2d(32, (10, 5), stride=1),
            nn.ReLU(),
            nn.LazyConv2d(32, (10, 5), stride=1),
            nn.ReLU(),
            nn.MaxPool2d((10, 5), stride=1),

            nn.Flatten(),
            nn.LazyLinear(1024),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.LazyLinear(200),
            nn.ReLU(),
            nn.LazyLinear(4)
        )

    def forward(self, x):
        logits = self.seq(x)
        probs = F.softmax(logits, dim=1)
        return probs


In [59]:
def split_seconds(seconds):
    minutes = seconds // 60
    hours = minutes // 60
    days = hours // 24
    return seconds % 60, minutes % 60, hours % 24, days


In [61]:
def main():
    # Define model
    model = CNN()
    # Cuda setup
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # device = torch.device("cpu")
    model = model.to(device)
    
    print(f"Using device: {device}")

    # Optimizer setup
    optimizer = Adam(model.parameters(), lr=1e-3)
   
    # Loss function
    loss_fn = nn.CrossEntropyLoss(reduction="mean")
   
    # Number of epochs
    num_epochs = 64
   
    # Train or load model?
    model.train()
    train_model = True
    print("Training model....")
    start = time.time()

    with Progress(
        TextColumn("[progress.description]{task.description}"),
        BarColumn(),
        MofNCompleteColumn(),
        TimeRemainingColumn(),
        refresh_per_second=10
    ) as progress:
        epoch_task = progress.add_task("[cyan]Epochs", total=num_epochs)
        batch_task = progress.add_task("[green]Batches", total=len(train_loader), visible=False)
        for epoch in range(num_epochs):
            progress.update(batch_task, visible=True, completed=0, total=len(train_loader))
            progress.update(epoch_task, advance=1)
            total_loss = 0
            
            for batch_idx, (audio_file, images, labels) in enumerate(train_loader):
                images, labels = images.to(device), labels.to(device)
                #print(f'Running on batch {batch_idx}')
                optimizer.zero_grad()
                
                # CNN forward pass
                probabilities = model(images)
                loss = loss_fn(probabilities, labels)
                loss.backward()
                optimizer.step()
                
                total_loss += loss.item()
                progress.update(batch_task, advance=1)
                progress.refresh()
            
            avg_loss = total_loss / len(train_loader)
            progress.print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.4f}")
            progress.update(batch_task, visible=False)
    
            torch.save(model.state_dict(), str(start) + '_epoch_' + str(epoch) +".pt")
   
    end = time.time()
    seconds, minutes, hours, days = split_seconds(end - start)
    print(f"Training Runtime: {int(days)}d {int(hours)}h {int(minutes)}m {seconds:.2f}s")
   
    # Evaluate model on test data
    model.eval()
    print("Evaluating model....")
    start = time.time()
    num_test = 0
    num_correct = 0
   
    with torch.no_grad():
        with Progress(
            TextColumn("[progress.description]{task.description}"),
            BarColumn(),
            MofNCompleteColumn(),
            TimeRemainingColumn(),
            refresh_per_second=10
        ) as progress:
            test_task = progress.add_task("[yellow]Testing", total=len(test_loader))
            for audio_file, images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                probabilities = model(images)
                _, preds = probabilities.max(1)
                num_test += labels.size(0)
                num_correct += preds.eq(labels).sum().item()
                progress.update(test_task, advance=1)
                progress.refresh()
   
    print(f"Test accuracy: {num_correct / num_test * 100:.2f}%")
    end = time.time()
    seconds, minutes, hours, days = split_seconds(end - start)
    print(f"Testing Runtime: {int(days)}d {int(hours)}h {int(minutes)}m {seconds:.2f}s")

In [62]:
main()

Output()

Output()

Training Runtime: 0d 0h 17m 45.42s
Evaluating model....


ZeroDivisionError: division by zero

In [72]:
# Define model
model = CNN()
# Cuda setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
model = model.to(device)

print(f"Using device: {device}")

# Optimizer setup
optimizer = Adam(model.parameters(), lr=1e-3)


state = torch.load("C:\\Users\\Alec\\Desktop\\afrl-uav-detection\\classify\\6_24_24.pt")
model.load_state_dict(state)


model.eval()
print("Evaluating model....")
start = time.time()
num_test = 0
num_correct = 0

with torch.no_grad():
    with Progress(
        TextColumn("[progress.description]{task.description}"),
        BarColumn(),
        MofNCompleteColumn(),
        TimeRemainingColumn(),
        refresh_per_second=10
    ) as progress:
        test_task = progress.add_task("[yellow]Testing", total=len(test_loader))
        for audio_file, images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            probabilities = model(images)
            _, preds = probabilities.max(1)
            num_test += labels.size(0)
            num_correct += preds.eq(labels).sum().item()
            progress.update(test_task, advance=1)
            progress.refresh()

print(f"Test accuracy: {num_correct / num_test * 100:.2f}%")
end = time.time()
seconds, minutes, hours, days = split_seconds(end - start)
print(f"Testing Runtime: {int(days)}d {int(hours)}h {int(minutes)}m {seconds:.2f}s")

Output()

Using device: cuda
Evaluating model....


Test accuracy: 26.39%
Testing Runtime: 0d 0h 0m 12.74s


In [71]:
test_dataset[400][1].shape

torch.Size([1, 201, 1103])

In [None]:
# # Save spectrograms as tensors
# dataset.save_spectrograms_as_tensors(output_dir)