In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

import torch 
import torch.nn as nn
from torch.utils.data import DataLoader

# Libraries for processing sounds
import librosa
from IPython.display import Audio
import random

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [15]:
from AnimalSoundDataset import AnimalSoundDataset

data_path = 'data/Animal_Sound_processed.csv'
dataset_train = AnimalSoundDataset(data_path, split='train', split_ratio=0.8, seed=42)
dataset_val = AnimalSoundDataset(data_path, split='val', split_ratio=0.8, seed=42)

Loading dataset from data/Animal_Sound_processed.csv...
Classes found: ['bear', 'cat', 'chicken', 'cow', 'dog', 'dolphin', 'donkey', 'elephant', 'frog', 'horse', 'lion', 'monkey', 'sheep']
Total segments extracted: 2909
train set contains 2327 segments.
Loading dataset from data/Animal_Sound_processed.csv...
Classes found: ['bear', 'cat', 'chicken', 'cow', 'dog', 'dolphin', 'donkey', 'elephant', 'frog', 'horse', 'lion', 'monkey', 'sheep']
Total segments extracted: 2909
val set contains 582 segments.


In [16]:
batch_size = 128

In [17]:
train_loader = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset_val, batch_size=batch_size, shuffle=False)

print(f"Number of batches in train loader: {len(train_loader)}")

for batch in train_loader:
    x_batch, y_batch = batch
    print(x_batch.shape)  # [batch_size, 1, 128, 400]
    print(y_batch.shape)  # [batch_size]
    break

Number of batches in train loader: 19
torch.Size([128, 2, 60, 41])
torch.Size([128])


In [18]:
input_dim = dataset_train[0][0].shape[0]  # e.g., (2, 60, 41) for short segments
n_classes = len(dataset_train.classes)
print(f"Input dimension: {input_dim}, Number of classes: {n_classes}")

hyperparameters = dict(
    input_dim=input_dim,
    output_dim=n_classes,
    hidden_layers_size=5000,  # should be 5000 but do not want to kill my laptop
    activation='relu',
    kernel_size_conv=[(57, 6), (1, 3)],
    kernel_size_pool=[(4, 3), (1, 3)],
    stride_conv=[(1, 1), (1, 1)],
    stride_pool=[(1, 3), (1, 3)],
    filters=[80, 80],
    batch_normalization=False,
    dropout_rate=0.5,
    # trainer hyperparameters
    learning_rate=0.002,
    weight_decay=0.001,
    momentum=0.9,
    nesterov=True,

    # questionable hyperparameters
    #batch_size=batch_size,
    max_epoch=10
)

Input dimension: 2, Number of classes: 13


In [19]:
from ClassesML.AudioModel import AudioModel
model = AudioModel(hyperparameters).to(device)

In [None]:
def predict_clip(model, segment_tensors, device, method='prob'):
    """
    Generate prediction for a whole clip given its segments.
    
    segment_tensors: tensor of shape (n_segments, channels, mel_bands, frames)
    method: 'majority' or 'prob'
    """
    model.eval()
    segment_tensors = segment_tensors.to(device)
    with torch.no_grad():
        outputs = model(segment_tensors)  # (n_segments, num_classes)
        probs = F.softmax(outputs, dim=1)

    if method == 'majority':
        preds = torch.argmax(probs, dim=1)
        counts = torch.bincount(preds)
        clip_pred = torch.argmax(counts).item()
    elif method == 'prob':
        avg_probs = probs.mean(dim=0)
        clip_pred = torch.argmax(avg_probs).item()
    else:
        raise ValueError("method must be 'majority' or 'prob'")

    return clip_pred


In [11]:
from ClassesML.AudioTrainer import AudioTrainer
trainer = AudioTrainer(model, train_loader, val_loader, hyperparameters, device=device)
trainer.train()

Epoch 1/10: 100%|██████████| 19/19 [00:14<00:00,  1.31it/s]


Epoch 1: Train Loss: 2.4964, Train Acc: 18.4836, Val Loss: 2.4609, Val Acc: 31.7500


Epoch 2/10: 100%|██████████| 19/19 [00:13<00:00,  1.36it/s]


Epoch 2: Train Loss: 2.3739, Train Acc: 27.1256, Val Loss: 2.3982, Val Acc: 40.6027


Epoch 3/10: 100%|██████████| 19/19 [00:14<00:00,  1.27it/s]


Epoch 3: Train Loss: 2.2572, Train Acc: 33.3739, Val Loss: 2.2791, Val Acc: 37.5045


Epoch 4/10: 100%|██████████| 19/19 [00:13<00:00,  1.36it/s]


Epoch 4: Train Loss: 2.1057, Train Acc: 35.7587, Val Loss: 2.1626, Val Acc: 39.6652


Epoch 5/10: 100%|██████████| 19/19 [00:15<00:00,  1.26it/s]


Epoch 5: Train Loss: 1.9666, Train Acc: 38.4082, Val Loss: 2.0062, Val Acc: 47.2366


Epoch 6/10: 100%|██████████| 19/19 [00:16<00:00,  1.14it/s]


Epoch 6: Train Loss: 1.7846, Train Acc: 43.8233, Val Loss: 1.8615, Val Acc: 53.9777


Epoch 7/10: 100%|██████████| 19/19 [00:18<00:00,  1.02it/s]


Epoch 7: Train Loss: 1.6570, Train Acc: 46.9018, Val Loss: 1.7468, Val Acc: 55.9330


Epoch 8/10: 100%|██████████| 19/19 [00:19<00:00,  1.04s/it]


Epoch 8: Train Loss: 1.5434, Train Acc: 51.2425, Val Loss: 1.6342, Val Acc: 61.4777


Epoch 9/10: 100%|██████████| 19/19 [00:20<00:00,  1.06s/it]


Epoch 9: Train Loss: 1.4549, Train Acc: 53.9742, Val Loss: 1.6415, Val Acc: 58.8482


Epoch 10/10: 100%|██████████| 19/19 [00:21<00:00,  1.14s/it]


Epoch 10: Train Loss: 1.4187, Train Acc: 54.6035, Val Loss: 1.5450, Val Acc: 60.9062
