In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

import torch 
import torch.nn as nn
from torch.utils.data import DataLoader

# Libraries for processing sounds
import librosa
from IPython.display import Audio
import random

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [3]:
from AnimalSoundDataset import AnimalSoundDataset

data_path = 'data/Animal_Sound_reduced.csv'
dataset_train = AnimalSoundDataset(data_path, split='train', split_ratio=0.8, seed=42)
dataset_val = AnimalSoundDataset(data_path, split='val', split_ratio=0.8, seed=42)

x_train_list = []
y_train_list = []

loader = DataLoader(dataset_train, batch_size=len(dataset_train))
x_train, y_train = next(iter(loader))

loader = DataLoader(dataset_val, batch_size=len(dataset_val))
x_val, y_val = next(iter(loader))



In [4]:
len(dataset_val)

26

In [5]:
input_dim = 1
n_classes = len(dataset_train.classes)
hyperparameters = dict(input_dim=input_dim,
                    output_dim=n_classes,
                    hidden_layers_size=100,
                    activation='relu',
                    kernel_size_conv=[(57,6),(1,3)],
                    kernel_size_pool=[(4,3),(1,3)],
                    stride_conv=[(1,1),(1,1)],
                    stride_pool=[(1,3),(1,3)],
                    filters=[80,80],
                    batch_normalization=False,
                    dropout_rate=0.5,
                    learning_rate=0.002,
                    batch_size=len(dataset_val),
                    max_epoch = 10)

In [6]:
from ClassesML.AudioModel import AudioModel
model = AudioModel(hyperparameters).to(device)

  return F.conv2d(


In [7]:
from torchinfo import summary

print(summary(model=model))

Layer (type:depth-idx)                   Param #
AudioModel                               --
├─ModuleList: 1-1                        --
│    └─Conv2DBlock: 2-1                  --
│    │    └─Conv2d: 3-1                  27,440
│    │    └─ReLU: 3-2                    --
│    │    └─Dropout: 3-3                 --
│    └─MaxPool2d: 2-2                    --
│    └─Conv2DBlock: 2-3                  --
│    │    └─Conv2d: 3-4                  19,280
│    │    └─ReLU: 3-5                    --
│    │    └─Dropout: 3-6                 --
│    └─MaxPool2d: 2-4                    --
│    └─Flatten: 2-5                      --
│    └─Linear: 2-6                       44,000,100
│    └─ReLU: 2-7                         --
│    └─Dropout: 2-8                      --
│    └─LazyLinear: 2-9                   --
│    └─ReLU: 2-10                        --
│    └─Dropout: 2-11                     --
├─Sequential: 1-2                        44,046,820
│    └─Conv2DBlock: 2-12                 (recur

In [8]:
import torch.nn.functional as F
model = AudioModel(hyperparameters).to(device)
y_hat = model(x_val)
# those are not probabilities, but logits
y_hat
# to get probabilities, we need to apply softmax
y_prob = F.softmax(y_hat, dim=1) 

In [None]:
def run():
    
    train_accuracy_dict = {}
    valid_accuracy_dict = {}
    
    for epoch in range(hyperparameters['max_epochs']):
        # Train
        model.train()
        total_loss = 0.0
        total_accuracy = 0.0
        # it is a good practice to split the dataset into batches
        # as our computer can not handle everything at once
        # this is only if there is a lot of data
        n_batch = len(x_train)
        for n in range(n_batch):
            # you need to send the data to some device (GPU or CPU depending on what we have)
            # or else we will get an error
            x = x_train[n].to(device)
            y = y_train[n].to(device)

            # Forward pass
            y_hat = model(x)
            loss = scope.criterion(y_hat, y)

            # Backward pass
            self.scope.optimizer.zero_grad()
            loss.backward()
            self.scope.optimizer.step()
            total_loss += loss.item()
            
            # Calculate accuracy
            batch_accuracy = Utilities.compute_accuracy(y, y_hat)
            total_accuracy += batch_accuracy

        train_loss = total_loss / n_batch
        train_accuracy = total_accuracy / n_batch
        print("Epoch: ", str(epoch + 1) + "/" + str(self.hyperparameters['max_epochs']))
        print("Train Loss: ", str(train_loss) + " - Training Accuracy: " + str(train_accuracy))

        # Validation
    
        # very important to set the model to eval mode
        # we do not want to update the weights
        # we want the current state of the model
        self.model.eval() 
        total_loss = 0.0
        total_accuracy = 0.0
        n_batch = len(self.x_valid)
        for n in range(n_batch):
            x = self.x_valid[n].to(self.device)
            y = self.y_valid[n].to(self.device)

            # Forward pass
            y_hat = self.model(x)
            loss = self.scope.criterion(y_hat, y)
            total_loss += loss.item()
            
            # Calculate accuracy
            batch_accuracy = Utilities.compute_accuracy(y, y_hat)
            total_accuracy += batch_accuracy

        valid_loss = total_loss / n_batch
        valid_accuracy = total_accuracy / n_batch
        print("Epoch: ", str(epoch + 1) + "/" + str(self.hyperparameters['max_epochs']))
        print("Validation Loss: ", str(valid_loss) + " - Validation Accuracy: " + str(valid_accuracy))
        train_accuracy_dict[epoch] = train_accuracy
        valid_accuracy_dict[epoch] = valid_accuracy

        # add the learning rate scheduler
        if self.scope.scheduler:
            validation_metric = valid_accuracy
            old_lr = self.scope.optimizer.param_groups[0]['lr']
            self.scope.scheduler.step(validation_metric)
            new_lr = self.scope.optimizer.param_groups[0]['lr']
            if old_lr != new_lr:
                print(f"Learning rate changed from {old_lr} to {new_lr}")
        
        # add the early stopping mechanism
        if self.scope.early_stopper:
            validation_metric = valid_accuracy
            keep_training = self.scope.early_stopper.set(
                model=self.model,
                epoch= epoch,
                metric_epoch=validation_metric
            )
            if not keep_training:
                break

            if not keep_training:
                break

            
    train_accuracy_list = [train_accuracy_dict[i] for i in train_accuracy_dict.keys()]
    valid_accuracy_list = [valid_accuracy_dict[i] for i in valid_accuracy_dict.keys()]
    return train_accuracy_list, valid_accuracy_list

In [9]:
from ClassesML.AudioTrainer import AudioTrainer
trainer = AudioTrainer(model, dataset_train, dataset_val, hyperparameters, device=device)
trainer.train()

Epoch 1/10:   0%|          | 0/4 [00:00<?, ?it/s]



Epoch 1/10: 100%|██████████| 4/4 [00:06<00:00,  1.61s/it]


Epoch 1: Train Loss: 14.37485506, Train Acc: 0.73964497, Val Loss: 0.10593927, Val Acc: 0.29585799


Epoch 2/10:  50%|█████     | 2/4 [00:03<00:03,  1.53s/it]



Epoch 2/10: 100%|██████████| 4/4 [00:06<00:00,  1.53s/it]


Epoch 2: Train Loss: 0.98137984, Train Acc: 1.18343195, Val Loss: 0.09865190, Val Acc: 0.44378698


Epoch 3/10:   0%|          | 0/4 [00:00<?, ?it/s]



Epoch 3/10: 100%|██████████| 4/4 [00:06<00:00,  1.65s/it]


Epoch 3: Train Loss: 0.56099426, Train Acc: 1.47928994, Val Loss: 0.09865190, Val Acc: 0.44378698


Epoch 4/10:  75%|███████▌  | 3/4 [00:04<00:01,  1.58s/it]



Epoch 4/10: 100%|██████████| 4/4 [00:06<00:00,  1.57s/it]


Epoch 4: Train Loss: 0.22829280, Train Acc: 0.88757396, Val Loss: 0.09865190, Val Acc: 0.44378698


Epoch 5/10:  50%|█████     | 2/4 [00:03<00:03,  1.60s/it]



Epoch 5/10: 100%|██████████| 4/4 [00:06<00:00,  1.62s/it]


Epoch 5: Train Loss: 0.11604216, Train Acc: 0.73964497, Val Loss: 0.09865190, Val Acc: 0.44378698


Epoch 6/10:   0%|          | 0/4 [00:00<?, ?it/s]


KeyboardInterrupt: 