In [17]:
import utils
import torch
import numpy as np
import json
import torch.nn as nn
import os
import utils

In [6]:
from pydub import AudioSegment
import os
import torchaudio

def segment_audio(input_path, output_dir, segment_length_ms=3000, overlap=0.5):
    audio = AudioSegment.from_wav(input_path)
    step = int(segment_length_ms * (1 - overlap))
    specs = []
    segments = []
    
    for i in range(0, len(audio) - segment_length_ms + 1, step):
        segment = audio[i:i + segment_length_ms]
        segment_path = os.path.join(output_dir, f"segment_{i}.wav")
        segment.export(segment_path, format="wav")
        waveform, sr = torchaudio.load(segment_path)
        spec = utils.generate_spectrogram(waveform, sr)
        segments.append(segment)
        specs.append(spec)
    return segments, specs


In [7]:
from torch.utils.data import Dataset, DataLoader

class InferenceDataset(Dataset):
    def __init__(self, specs):
        self.specs = specs

    def __len__(self):
        return len(self.specs)

    def __getitem__(self, idx):
        return self.specs[idx]

def create_loader(specs, batch_size=16):
    dataset = InferenceDataset(specs)
    return DataLoader(dataset, batch_size=batch_size, shuffle=False)


In [8]:
# Paths
file_path = "/home/giacomoschiavo/Tovanella/20190601_000000.WAV"
segment_output_dir = "single_audio_analysis_tmp"
os.makedirs(segment_output_dir, exist_ok=True)

# Segmenta e crea spettrogrammi
segments, spectrograms = segment_audio(file_path, segment_output_dir)
loader = create_loader(spectrograms)

In [11]:
MODEL_NAME = 'DeeperSEnd'
DATASET_NAME = "DATASET_CNN_small"

In [12]:
import json
with open(f"utils/{DATASET_NAME}/dataset_config.json") as f:
    dataset_config = json.load(f)

In [14]:
mappings = dataset_config["mappings"]

In [15]:
# sudo modprobe nvidia_uvm
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [16]:
model_class = utils.load_model_class(MODEL_NAME)
model = model_class(len(mappings))
model.to(device)
saving_path = f'models/{MODEL_NAME}/checkpoint_small_2.pth'
checkpoint = torch.load(saving_path)
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [None]:
import torch
import torch.nn.functional as F

def predict_from_dataloader(model, dataloader, device):
    model.eval()
    model.to(device)
    
    all_preds = []
    all_probs = []

    with torch.no_grad():
        for specs in dataloader:
            specs = specs.to(device)
            if specs.ndim == 3:
                specs = specs.unsqueeze(1)  # [B, 256, 256] → [B, 1, 256, 256]

            outputs = model(specs)
            probs = F.softmax(outputs, dim=1)
            preds = torch.argmax(probs, dim=1)

            all_preds.extend(preds.cpu().tolist())
            all_probs.extend(probs.cpu().tolist())

    return {
        "predictions": all_preds,
        "probabilities": all_probs
    }


In [22]:
results = predict_from_dataloader(model, loader, device)

In [30]:
results["paths"]

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,