In [1]:
from tqdm import tqdm
import numpy as np
import tensorflow as tf
import tensorflow_io as tfio
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional, Add, Flatten, Normalization, Input, Resizing, Conv2D, MaxPooling2D

from tensorflow.keras.regularizers import l2
from tensorflow.keras import Sequential
import matplotlib.pyplot as plt

from src.const import AUDIO_PATH, MAIN_LABELS, BATCH_SIZE, VALIDATION_SPLIT, SEED

from src.preprocess import load_and_preprocess, load_augmented_data

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

### Unknown vs known task

In [2]:
train_ds_bin_X, train_ds_bin_y, val_ds_bin_X, val_ds_bin_y, train_ds_main_X, train_ds_main_y, val_ds_main_X, val_ds_main_y = load_and_preprocess(plot_samples = True)

Load augmented data
Found 64721 files belonging to 30 classes.
Using 51777 files for training.
Using 12944 files for validation.
Create data with only main classes
Create binary dataset
Create main dataset
Found 64721 files belonging to 30 classes.
Using 51777 files for training.
Using 12944 files for validation.
Transform to spectograms
Transform to numpy


Processing dataset: 100%|██████████| 70773/70773 [02:35<00:00, 456.06it/s] 
Processing dataset: 100%|██████████| 12944/12944 [00:15<00:00, 818.20it/s]
Processing dataset: 100%|██████████| 37992/37992 [00:44<00:00, 844.44it/s] 
Processing dataset: 100%|██████████| 12944/12944 [00:15<00:00, 834.84it/s]


In [3]:
val_ds_bin_X[val_ds_bin_X == -np.inf] = np.min(train_ds_bin_X)

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [188]:
train_ds_bin_X = torch.tensor(train_ds_bin_X, dtype=torch.float32).to(device)
train_ds_bin_y = torch.tensor(train_ds_bin_y, dtype=torch.float32).to(device)
val_ds_bin_X = torch.tensor(val_ds_bin_X, dtype=torch.float32).to(device)
val_ds_bin_y = torch.tensor(val_ds_bin_y, dtype=torch.float32).to(device)

train_dataset = TensorDataset(train_ds_bin_X, train_ds_bin_y)
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
val_dataset = TensorDataset(val_ds_bin_X, val_ds_bin_y)
val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False)

del train_ds_bin_X, train_ds_bin_y, val_ds_bin_X, val_ds_bin_y

In [207]:
class BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=input_size, 
            hidden_size=hidden_size, 
            batch_first=True, 
            bidirectional=True, 
            num_layers=2
        )
        self.dropout = nn.Dropout(0.2)
        self.fc1 = nn.Linear(2*hidden_size, 64)  # *2 because of bidirectional
        self.fc2 = nn.Linear(64, output_size)
        self.sigmoid = nn.Sigmoid()
        self.relu = nn.ReLU()

    def forward(self, x):
        out, _ = self.lstm(x)
        out = torch.sum(out, dim=1)
        out = self.dropout(out)
        out = self.fc1(out)
        out = self.dropout(out)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out

In [208]:
input_size = 128
hidden_size = 128
output_size = 1

model = BiLSTM(input_size, hidden_size, output_size).to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters())

In [None]:
best_val_loss = float('inf')
patience = 5
no_improve_count = 0

# Training
for epoch in range(50):
    model.train()
    train_loss = 0
    correct_train = 0
    total_train = 0

    for batch_X, batch_y in tqdm(train_loader, f"Epoch {epoch}"):
        model.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y.unsqueeze(1))
        loss.backward()
        optimizer.step()

        # Loss
        train_loss += loss.item() * batch_X.size(0)

        # Accuracy
        predicted = (outputs > 0.5).float()
        correct_train += (predicted == batch_y.unsqueeze(1)).sum().item()
        total_train += batch_y.size(0)

    train_loss /= len(train_loader.dataset)
    train_acc = correct_train / total_train
    
    model.eval()
    val_loss = 0
    correct_val = 0
    total_val = 0

    with torch.no_grad():
        for batch_X, batch_y in val_loader:
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y.unsqueeze(1))
            
            # Loss
            val_loss += loss.item() * batch_X.size(0)

            # Accuracy
            predicted = (outputs > 0.5).float()
            correct_val += (predicted == batch_y.unsqueeze(1)).sum().item()
            total_val += batch_y.size(0)

        val_loss /= len(val_loader.dataset)
        val_acc = correct_val / total_val
        
    print(f'Epoch {epoch+1}/{50}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}')
    
    # Early stopping check
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improve_count = 0
    else:
        no_improve_count += 1
    
    if no_improve_count >= patience:
        print('Early stopping')
        break