In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

import torch 
import torch.nn as nn
from torch.utils.data import DataLoader

# Libraries for processing sounds
import librosa
from IPython.display import Audio
import random

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [3]:
from AnimalSoundDataset import AnimalSoundDataset

data_path = 'data/Animal_Sound_modified.csv'
dataset_train = AnimalSoundDataset(data_path, split='train', split_ratio=0.8, seed=42)
dataset_val = AnimalSoundDataset(data_path, split='val', split_ratio=0.8, seed=42)

x_train_list = []
y_train_list = []

loader = DataLoader(dataset_train, batch_size=len(dataset_train))
x_train, y_train = next(iter(loader))

loader = DataLoader(dataset_val, batch_size=len(dataset_val))
x_val, y_val = next(iter(loader))



In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

import torch 
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from Blocks import Conv2DBlock
from AnimalSoundDataset import AnimalSoundDataset
from Utilities import Utilities

# Libraries for processing sounds
import librosa
from IPython.display import Audio
import random

def get_activation(activation_str: str or None):

    if activation_str == 'relu':
        return nn.ReLU()
    elif activation_str == 'sigmoid':
        return nn.Sigmoid()
    elif activation_str == 'tanh':
        return nn.Tanh()
    elif activation_str == "linear":
        return None
    else:
        raise ValueError(f"Unknown activation function: {activation_str}")


class AudioModel(nn.Module):

    def __init__(self, hyperparameters):
        nn.Module.__init__(self)

        self.input_dim = hyperparameters['input_dim']
        self.output_dim = hyperparameters['output_dim']
        self.hidden_layers_size = hyperparameters['hidden_layers_size']
        self.activation = hyperparameters['activation']
        self.kernel_size_conv = hyperparameters['kernel_size_conv']
        self.kernel_size_pool = hyperparameters['kernel_size_pool']
        self.stride_conv = hyperparameters['stride_conv']
        self.stride_pool = hyperparameters['stride_pool']
        self.filters = hyperparameters['filters']
        self.batch_normalization = hyperparameters['batch_normalization']
        self.dropout_rate = hyperparameters['dropout_rate']

        self.layers = nn.ModuleList()

        # Input Shape: (1, 128, target_width)
        layer = Conv2DBlock(in_channels=self.input_dim, out_channels=self.filters[0], 
                            kernel_size=self.kernel_size_conv[0],stride=self.stride_conv[0],
                            activation=get_activation(self.activation), 
                            batch_normalization=self.batch_normalization, dropout_rate=self.dropout_rate)
        self.layers.append(layer)

        layer = nn.MaxPool2d(kernel_size=self.kernel_size_pool[0], stride=self.stride_pool[0])
        self.layers.append(layer)

        layer = Conv2DBlock(in_channels=self.filters[0], out_channels=self.filters[1],
                            kernel_size=self.kernel_size_conv[1],stride=self.stride_conv[1],
                            activation=get_activation(self.activation),
                            batch_normalization=self.batch_normalization, dropout_rate=0.0)
        self.layers.append(layer)

        layer = nn.MaxPool2d(kernel_size=self.kernel_size_pool[1], stride=self.stride_pool[1])
        self.layers.append(layer)
        
        
        # Flatten
        self.layers.append(nn.Flatten())
        
        # 100 relus two times
        # First FC layer
        self.layers.append(nn.LazyLinear(self.hidden_layers_size))
        
        # ReLU activation
        self.layers.append(get_activation(self.activation))
        
        # Dropout
        self.layers.append(nn.Dropout(self.dropout_rate))
        
        # Second FC layer
        self.layers.append(nn.LazyLinear(self.output_dim))
        
        #Relu Activation
        self.layers.append(get_activation(self.activation))
        
        # Dropout
        self.layers.append(nn.Dropout(self.dropout_rate)) 
        
        # Softmax
        self.layers.append(nn.Softmax(dim=1)) 

        self.classifier = nn.Sequential(*self.layers)

    def forward(self, x):
        y_hat = self.classifier(x)
        return y_hat



In [8]:
input_dim = 1
n_classes = len(dataset_train.classes)
hyperparameters = dict(input_dim=input_dim,
                    output_dim=n_classes,
                    hidden_layers_size=100,
                    activation='relu',
                    kernel_size_conv=[(57,6),(1,3)],
                    kernel_size_pool=[(4,3),(1,3)],
                    stride_conv=[(1,1),(1,1)],
                    stride_pool=[(1,3),(1,3)],
                    filters=[80,80],
                    batch_normalization=False,
                    dropout_rate=0.5,
                    learning_rate=0.002,
                    max_epoch = 1)

In [9]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from tqdm import tqdm
from Utilities.Utilities import Utilities

class AudioTrainer:
    def __init__(self, model, train_dataset, val_dataset, hyperparameters, device='cpu'):
        self.hyperparameters = hyperparameters
        self.model = model
        self.device = device
        self.train_loader = DataLoader(train_dataset, batch_size=self.hyperparameters['batch_size'], shuffle=True)
        self.val_loader = DataLoader(val_dataset, batch_size=self.hyperparameters['batch_size'], shuffle=False)
        self.max_epoch = self.hyperparameters['max_epoch']
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(model.parameters(), lr=self.hyperparameters['learning_rate'])
        self.model.to(device)

    def train(self):
        for epoch in range(self.max_epoch):
            self.model.train()
            total_loss = 0.0 #Running loss
            total_accuracy = 0.0
            n_batch = self.hyperparameters['batch_size']

            for x_batch, y_batch in tqdm(self.train_loader, desc=f"Epoch {epoch+1}/{self.max_epoch}"):
                x, y = x_batch.to(self.device), y_batch.to(self.device)
                # Forward
                y_hat = self.model(x)
                loss = self.criterion(y_hat, y)
                # Backward
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                total_loss += loss.item()
                # Compute accuracy
                batch_accuracy = Utilities.compute_accuracy(y, y_hat)
                total_accuracy += batch_accuracy

            train_loss = total_loss / n_batch
            train_accuracy = total_accuracy / n_batch

            val_acc, val_loss = self.evaluate()

            print(f"Epoch {epoch+1}: "
                  f"Train Loss: {train_loss/len(self.train_loader):.4f}, "
                  f"Train Acc: {train_accuracy:.4f}, "
                  f"Val Loss: {val_loss:.4f}, "
                  f"Val Acc: {val_acc:.4f}")

    def evaluate(self):
        self.model.eval()
        total_loss = 0.0
        total_accuracy = 0.0
        n_batch = self.hyperparameters['batch_size']

        with torch.no_grad():
            for x_batch, y_batch in self.val_loader:
                x, y = x_batch.to(self.device), y_batch.to(self.device)
                # Forward
                y_hat = self.model(x)
                loss = self.criterion(y_hat, y)
                total_loss += loss.item()
                # Compute accuracy
                batch_accuracy = Utilities.compute_accuracy(y, y_hat)
                total_accuracy += batch_accuracy
            valid_loss = total_loss / n_batch
            valid_accuracy = total_accuracy / n_batch

        return valid_accuracy, valid_loss

In [None]:
# model = AudioModel(hyperparameters=hyperparameters)
# hyperparameters['batch_size'] = 128

# trainer = AudioTrainer(model, dataset_train, dataset_val, hyperparameters, device=device)
# trainer.train()

In [10]:
model = AudioModel(hyperparameters).to(device)
print(x_val[0])
y_hat = model(x_val)

tensor([[[-41.1521, -39.5143, -36.1628,  ...,   0.0000,   0.0000,   0.0000],
         [-47.2608, -57.9575, -49.7822,  ...,   0.0000,   0.0000,   0.0000],
         [-54.6373, -56.1950, -54.1422,  ...,   0.0000,   0.0000,   0.0000],
         ...,
         [-80.0000, -80.0000, -80.0000,  ...,   0.0000,   0.0000,   0.0000],
         [-80.0000, -80.0000, -80.0000,  ...,   0.0000,   0.0000,   0.0000],
         [-80.0000, -80.0000, -80.0000,  ...,   0.0000,   0.0000,   0.0000]]])


  return F.conv2d(


In [12]:
print(y_hat.shape)

torch.Size([260, 13])
