In [2]:
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import librosa
from sklearn.model_selection import train_test_split

from transformers import AutoModelForAudioClassification, AutoFeatureExtractor

from src.const import AUDIO_PATH, MAIN_LABELS, BATCH_SIZE, VALIDATION_SPLIT, SEED
from src.preprocess import load_and_preprocess, transform_to_data_loader, get_dl_for_pretrained
from src.preprocess_utils import normalize_ds

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(


### Unknown vs known task

In [2]:
train_ds_bin_X, train_ds_bin_y, val_ds_bin_X, val_ds_bin_y, train_ds_main_X, train_ds_main_y, val_ds_main_X, val_ds_main_y = load_and_preprocess(plot_samples = False, augment_specs = False)

Loading augmented data...
Found 64727 files belonging to 31 classes.
Using 51782 files for training.
Using 12945 files for validation.
Finished
Creating data with only main classes...
Finished
Creating binary dataset...
Finished
Creating main dataset...
Found 64727 files belonging to 31 classes.
Using 51782 files for training.
Using 12945 files for validation.
Finished
Transforming audio data to spectograms...
Finished
Transforming data to numpy arrays...


Processing dataset: 100%|██████████| 70776/70776 [03:43<00:00, 316.11it/s] 
Processing dataset: 100%|██████████| 12945/12945 [00:26<00:00, 481.61it/s]
Processing dataset: 100%|██████████| 37988/37988 [01:38<00:00, 385.74it/s]
Processing dataset: 100%|██████████| 12945/12945 [00:28<00:00, 454.01it/s]


Finished


In [3]:
train_ds_bin_X[train_ds_bin_X == -np.inf] = np.min(train_ds_bin_X[train_ds_bin_X != -np.inf])
val_ds_bin_X[val_ds_bin_X == -np.inf] = np.min(train_ds_bin_X[train_ds_bin_X != -np.inf])
train_ds_main_X[train_ds_main_X == -np.inf] = np.min(train_ds_main_X[train_ds_main_X != -np.inf])
val_ds_main_X[val_ds_main_X == -np.inf] = np.min(train_ds_main_X[train_ds_main_X != -np.inf])

In [4]:
train_ds_bin_X = normalize_ds(train_ds_bin_X)
val_ds_bin_X = normalize_ds(val_ds_bin_X)
train_ds_main_X = normalize_ds(train_ds_main_X)
val_ds_main_X = normalize_ds(val_ds_main_X)

Normalizing dataset:   0%|          | 0/70776 [00:00<?, ?it/s]

Normalizing dataset: 100%|██████████| 70776/70776 [00:31<00:00, 2235.83it/s]
Normalizing dataset: 100%|██████████| 12945/12945 [00:01<00:00, 7299.50it/s]
Normalizing dataset: 100%|██████████| 37988/37988 [00:05<00:00, 7263.14it/s]
Normalizing dataset: 100%|██████████| 4688/4688 [00:00<00:00, 7417.25it/s]


In [5]:
# np.save('data/arrays/train_ds_bin_X.npy', train_ds_bin_X)
# np.save('data/arrays/train_ds_bin_y.npy', train_ds_bin_y)
# np.save('data/arrays/val_ds_bin_X.npy', val_ds_bin_X)
# np.save('data/arrays/val_ds_bin_y.npy', val_ds_bin_y)
# np.save('data/arrays/train_ds_main_X.npy', train_ds_main_X)
# np.save('data/arrays/train_ds_main_y.npy', train_ds_main_y)
# np.save('data/arrays/val_ds_main_X.npy', val_ds_main_X)
# np.save('data/arrays/val_ds_main_y.npy', val_ds_main_y)

In [3]:
train_ds_bin_X = np.load('data/arrays/train_ds_bin_X.npy')
train_ds_bin_y = np.load('data/arrays/train_ds_bin_y.npy')
val_ds_bin_X = np.load('data/arrays/val_ds_bin_X.npy')
val_ds_bin_y = np.load('data/arrays/val_ds_bin_y.npy')
train_ds_main_X = np.load('data/arrays/train_ds_main_X.npy')
train_ds_main_y = np.load('data/arrays/train_ds_main_y.npy')
val_ds_main_X = np.load('data/arrays/val_ds_main_X.npy')
val_ds_main_y = np.load('data/arrays/val_ds_main_y.npy')

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [5]:
train_loader_bin = transform_to_data_loader(train_ds_bin_X, train_ds_bin_y, device=device)
val_loader_bin = transform_to_data_loader(val_ds_bin_X, val_ds_bin_y, device=device)

train_loader_main = transform_to_data_loader(train_ds_main_X, train_ds_main_y, device=device)
val_loader_main = transform_to_data_loader(val_ds_main_X, val_ds_main_y, device=device)

del train_ds_bin_X, train_ds_bin_y, val_ds_bin_X, val_ds_bin_y, train_ds_main_X, train_ds_main_y, val_ds_main_X, val_ds_main_y

In [6]:
for a, b in train_loader_bin:
    print(a.shape, b.shape)
    break


torch.Size([256, 63, 128]) torch.Size([256])


In [106]:
import torch
from torch import nn
import torch.nn.functional as f
from torch.autograd import Variable

KERNEL_SIZE = 3
PADDING = KERNEL_SIZE // 2

class ConvLSTMCell(nn.Module):
    """
    Generate a convolutional LSTM cell
    """

    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.Gates = nn.Conv2d(input_size + hidden_size, 4 * hidden_size, KERNEL_SIZE, padding=PADDING)

    def forward(self, input_, prev_state):

        # get batch and spatial sizes
        batch_size = input_.data.size()[0]
        spatial_size = input_.data.size()[2:]

        # generate empty prev_state, if None is provided
        if prev_state is None:
            state_size = [batch_size, self.hidden_size] + list(spatial_size)
            prev_state = (
                Variable(torch.zeros(state_size)).to(device),
                Variable(torch.zeros(state_size)).to(device)
            )

        prev_hidden, prev_cell = prev_state

        # data size is [batch, channel, height, width]
        stacked_inputs = torch.cat((input_, prev_hidden), 1)
        gates = self.Gates(stacked_inputs)

        # chunk across channel dimension
        in_gate, remember_gate, out_gate, cell_gate = gates.chunk(4, 1)

        # apply sigmoid non linearity
        in_gate = f.sigmoid(in_gate)
        remember_gate = f.sigmoid(remember_gate)
        out_gate = f.sigmoid(out_gate)

        # apply tanh non linearity
        cell_gate = f.tanh(cell_gate)

        # compute current cell and hidden state
        cell = (remember_gate * prev_cell) + (in_gate * cell_gate)
        hidden = out_gate * f.tanh(cell)

        return hidden, cell

In [123]:
class bin_BiLSTM(nn.Module):
    
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=input_size//2, 
            hidden_size=hidden_size//2, 
            batch_first=True, 
            bidirectional=True, 
            num_layers=1,
            dropout=0.5,
        )
        self.convlstm = ConvLSTMCell(input_size=16, hidden_size=1)
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(hidden_size, 128)  # *2 because of bidirectional
        self.fc2 = nn.Linear(128, 1)
        self.bc1 = nn.BatchNorm1d(hidden_size)
        self.bc2 = nn.BatchNorm1d(128)
        self.sigmoid = nn.Sigmoid()
        self.relu = nn.ReLU()
        self.conv = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

    def forward(self, x):
        # out = self.conv(x.unsqueeze(1))#.squeeze(1)
        # out = self.pool(out)
        # out = self.dropout(out)
        # out = self.relu(out)
        # out = torch.mean(out, dim=1)
        # out, _ = self.lstm(out)

        out, _ = self.convlstm(x, None)
        
        # # out = torch.sum(out, dim=1)
        # out = self.dropout(out[:, -1, :])
        # out = self.bc1(out)
        # out = self.fc1(out)
        # out = self.dropout(out)
        # out = self.relu(out)
        # out = self.bc2(out)
        # out = self.fc2(out)
        # out = self.sigmoid(out)
        return out

In [116]:
class main_BiLSTM(nn.Module):

    def __init__(self, input_size, hidden_size, num_class):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=input_size, 
            hidden_size=hidden_size, 
            batch_first=True, 
            bidirectional=True, 
            num_layers=2,
            dropout=0.3,
        )
        self.dropout = nn.Dropout(0.3)
        self.fc1 = nn.Linear(2*hidden_size, 128)  # *2 because of bidirectional
        self.fc2 = nn.Linear(128, num_class)
        self.bc1 = nn.BatchNorm1d(2*hidden_size)
        self.bc2 = nn.BatchNorm1d(128)
        self.relu = nn.ReLU()
        self.conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, padding=1)

    def forward(self, x):
        out = self.conv(x.unsqueeze(1)).squeeze(1)
        out = self.dropout(out)
        out = self.relu(out)
        out = torch.min(out, dim=1)
        out, _ = self.lstm(out)
        # out = torch.sum(out, dim=1)
        out = self.bc1(out[:, -1, :])
        out = self.fc1(out)
        out = self.dropout(out)
        out = self.relu(out)
        out = self.bc2(out)
        out = self.fc2(out)
        out = self.dropout(out)
        return out

In [117]:
class bin_Transformer(nn.Module):
    
    def __init__(self, d_model, n_head, num_layers):
        super().__init__()
        self.trans_enc_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=n_head, batch_first=True, activation="relu").to(device)
        self.transformer_encoder = nn.TransformerEncoder(self.trans_enc_layer, num_layers=num_layers).to(device)
        self.dropout = nn.Dropout(0.2)
        self.fc1 = nn.Linear(d_model, 64)
        self.fc2 = nn.Linear(64, 1)
        self.bc1 = nn.BatchNorm1d(d_model)
        self.bc2 = nn.BatchNorm1d(64)
        self.sigmoid = nn.Sigmoid()
        self.relu = nn.ReLU()
        self.conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, padding=1)

    def forward(self, x):
        out = self.conv(x.unsqueeze(1)).squeeze(1)
        out = self.dropout(out)
        out = self.relu(out)
        out = self.transformer_encoder(out)[:, -1, :]
        out = self.dropout(out)
        out = self.bc1(out)
        out = self.fc1(out)
        out = self.dropout(out)
        out = self.relu(out)
        out = self.bc2(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out

In [118]:
class main_Transformer(nn.Module):
    
    def __init__(self, d_model, n_head, num_layers, num_class):
        super().__init__()
        self.trans_enc_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=n_head, batch_first=True, activation="relu").to(device)
        self.transformer_encoder = nn.TransformerEncoder(self.trans_enc_layer, num_layers=num_layers).to(device)
        self.dropout = nn.Dropout(0.1)
        self.fc1 = nn.Linear(d_model, 64)
        self.fc2 = nn.Linear(64, num_class)
        self.bc1 = nn.BatchNorm1d(d_model)
        self.bc2 = nn.BatchNorm1d(64)
        self.relu = nn.ReLU()
        self.conv = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

    def forward(self, x):
        out = self.conv(x.unsqueeze(1)).squeeze(1)
        out = self.pool(out)
        out = self.dropout(out)
        out = self.relu(out)
        out = self.transformer_encoder(out)[:, -1, :]
        out = self.dropout(out)
        out = self.bc1(out)
        out = self.fc1(out)
        out = self.dropout(out)
        out = self.relu(out)
        out = self.bc2(out)
        out = self.fc2(out)
        out = self.dropout(out)
        return out

In [124]:
# BILSTM for known vs. unknown classification task
input_size = 128
hidden_size = 128

bin_bilstm_model = bin_BiLSTM(input_size, hidden_size).to(device)
bin_bilstm_criterion = nn.BCELoss()
bin_bilstm_optimizer = optim.Adam(bin_bilstm_model.parameters(), weight_decay=0.005)

In [125]:
# BILSTM for main classification task
input_size = 128
hidden_size = 128
num_class = 10

main_bilstm_model = main_BiLSTM(input_size, hidden_size, num_class).to(device)
main_bilstm_criterion = nn.CrossEntropyLoss()
main_bilstm_optimizer = optim.Adam(main_bilstm_model.parameters())

In [126]:
from torchinfo import summary
summary(bin_bilstm_model, (16, 63, 128,), device="cuda")
# print(bin_bilstm_model)

RuntimeError: Failed to run torchinfo. See above stack traces for more details. Executed layers up to: []

In [13]:
# Transformer for known vs. unknown task
d_model = 128
n_head = 8
num_layers = 1

bin_transformer_model = bin_Transformer(d_model, n_head, num_layers).to(device)
bin_transformer_criterion = nn.BCELoss()
bin_transformer_optimizer = optim.Adam(bin_transformer_model.parameters())

In [14]:
# Transformer for main task
d_model = 128
n_head = 8
num_class = 10
num_layers = 2

main_transformer_model = main_Transformer(d_model, n_head, num_layers, num_class).to(device)
main_transformer_criterion = nn.CrossEntropyLoss()
main_transformer_optimizer = optim.Adam(main_transformer_model.parameters())

In [33]:
def train_model(
        model, 
        criterion, 
        optimizer, 
        train_loader, 
        val_loader, 
        model_type, 
        epoch_count,
        using_pretrained=False,
        early_stopping=False,
    ):
    
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    patience = 5
    no_improve_count = 0

    # Training
    for epoch in range(epoch_count):
        model.train()
        train_loss = 0
        correct_train = 0
        total_train = 0

        for batch_X, batch_y in tqdm(train_loader, f"Epoch {epoch+1}"):
            model.zero_grad()
            outputs = model(batch_X)

            if using_pretrained:
                outputs = outputs.logits

            if model_type == "bin":
                loss = criterion(outputs, batch_y.unsqueeze(1))
            else:
                loss = criterion(outputs, batch_y.long())

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            # Loss
            train_loss += loss.item() * batch_X.size(0)

            # Accuracy
            if model_type == "bin":
                predicted = (outputs > 0.5).float()
                correct_train += (predicted == batch_y.unsqueeze(1)).sum().item()
            else:
                predicted = torch.argmax(outputs, dim=1)
                correct_train += (predicted == batch_y).sum().item()
            
            
            total_train += batch_y.size(0)

        train_loss /= len(train_loader.dataset)
        train_acc = correct_train / total_train
        train_losses.append(train_loss)
        
        model.eval()
        val_loss = 0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                outputs = model(batch_X)

                if using_pretrained:
                    outputs = outputs.logits

                if model_type == "bin":
                    loss = criterion(outputs, batch_y.unsqueeze(1))
                else:
                    loss = criterion(outputs, batch_y.long())
                
                # Loss
                val_loss += loss.item() * batch_X.size(0)

                # Accuracy
                if model_type == "bin":
                    predicted = (outputs > 0.5).float()
                    correct_val += (predicted == batch_y.unsqueeze(1)).sum().item()
                else:
                    predicted = torch.argmax(outputs, dim=1)
                    correct_val += (predicted == batch_y).sum().item()
                total_val += batch_y.size(0)

            val_loss /= len(val_loader.dataset)
            val_acc = correct_val / total_val
            val_losses.append(val_loss)
            
        print(f'Epoch {epoch+1}/{epoch_count}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}')
        
        
        if not early_stopping:
            continue
        
        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            no_improve_count = 0
        else:
            no_improve_count += 1
        
        if no_improve_count >= patience:
            print('Early stopping')
            break
            
    return train_losses, val_losses

### Custom architectures

In [16]:
# Known vs. Unknown with transformer
train_losses, val_losses = train_model(
    model=bin_transformer_model,
    criterion=bin_transformer_criterion,
    optimizer=bin_transformer_optimizer,
    train_loader=train_loader_bin,
    val_loader=val_loader_bin, 
    model_type="bin", 
    epoch_count=50
)

  attn_output = scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal)
Epoch 1: 100%|██████████| 277/277 [00:08<00:00, 31.16it/s]


Epoch 1/50, Train Loss: 0.6974, Train Acc: 0.52, Val Loss: 0.6816, Val Acc: 0.60


Epoch 2: 100%|██████████| 277/277 [00:07<00:00, 35.10it/s]


Epoch 2/50, Train Loss: 0.6675, Train Acc: 0.58, Val Loss: 0.6814, Val Acc: 0.61


Epoch 3: 100%|██████████| 277/277 [00:07<00:00, 34.74it/s]


Epoch 3/50, Train Loss: 0.6506, Train Acc: 0.60, Val Loss: 0.6586, Val Acc: 0.63


Epoch 4: 100%|██████████| 277/277 [00:07<00:00, 34.73it/s]


Epoch 4/50, Train Loss: 0.6393, Train Acc: 0.60, Val Loss: 0.6836, Val Acc: 0.62


Epoch 5: 100%|██████████| 277/277 [00:08<00:00, 34.55it/s]


Epoch 5/50, Train Loss: 0.6292, Train Acc: 0.62, Val Loss: 0.6858, Val Acc: 0.61


Epoch 6: 100%|██████████| 277/277 [00:07<00:00, 34.99it/s]


Epoch 6/50, Train Loss: 0.6260, Train Acc: 0.62, Val Loss: 0.6710, Val Acc: 0.62


Epoch 7: 100%|██████████| 277/277 [00:08<00:00, 34.29it/s]


Epoch 7/50, Train Loss: 0.6193, Train Acc: 0.63, Val Loss: 0.6877, Val Acc: 0.60


Epoch 8: 100%|██████████| 277/277 [00:07<00:00, 35.18it/s]


Epoch 8/50, Train Loss: 0.6165, Train Acc: 0.63, Val Loss: 0.6713, Val Acc: 0.62


Epoch 9: 100%|██████████| 277/277 [00:07<00:00, 35.06it/s]


Epoch 9/50, Train Loss: 0.6148, Train Acc: 0.63, Val Loss: 0.6721, Val Acc: 0.61


Epoch 10: 100%|██████████| 277/277 [00:07<00:00, 35.05it/s]


Epoch 10/50, Train Loss: 0.6110, Train Acc: 0.64, Val Loss: 0.6698, Val Acc: 0.61


Epoch 11: 100%|██████████| 277/277 [00:07<00:00, 34.85it/s]


Epoch 11/50, Train Loss: 0.6089, Train Acc: 0.64, Val Loss: 0.6998, Val Acc: 0.61


Epoch 12: 100%|██████████| 277/277 [00:07<00:00, 34.67it/s]


Epoch 12/50, Train Loss: 0.6075, Train Acc: 0.64, Val Loss: 0.6830, Val Acc: 0.60


Epoch 13: 100%|██████████| 277/277 [00:08<00:00, 33.30it/s]


Epoch 13/50, Train Loss: 0.6031, Train Acc: 0.64, Val Loss: 0.6862, Val Acc: 0.61


Epoch 14: 100%|██████████| 277/277 [00:08<00:00, 34.05it/s]


Epoch 14/50, Train Loss: 0.6032, Train Acc: 0.64, Val Loss: 0.6722, Val Acc: 0.61


Epoch 15: 100%|██████████| 277/277 [00:07<00:00, 34.97it/s]


Epoch 15/50, Train Loss: 0.6027, Train Acc: 0.65, Val Loss: 0.6699, Val Acc: 0.61


Epoch 16: 100%|██████████| 277/277 [00:08<00:00, 34.59it/s]


Epoch 16/50, Train Loss: 0.6024, Train Acc: 0.65, Val Loss: 0.6669, Val Acc: 0.62


Epoch 17: 100%|██████████| 277/277 [00:08<00:00, 34.11it/s]


Epoch 17/50, Train Loss: 0.5999, Train Acc: 0.65, Val Loss: 0.6731, Val Acc: 0.62


Epoch 18: 100%|██████████| 277/277 [00:07<00:00, 34.83it/s]


Epoch 18/50, Train Loss: 0.5992, Train Acc: 0.65, Val Loss: 0.6874, Val Acc: 0.60


Epoch 19: 100%|██████████| 277/277 [00:07<00:00, 34.88it/s]


Epoch 19/50, Train Loss: 0.5984, Train Acc: 0.65, Val Loss: 0.6898, Val Acc: 0.61


Epoch 20: 100%|██████████| 277/277 [00:08<00:00, 34.07it/s]


Epoch 20/50, Train Loss: 0.5978, Train Acc: 0.65, Val Loss: 0.6825, Val Acc: 0.60


Epoch 21: 100%|██████████| 277/277 [00:08<00:00, 33.49it/s]


Epoch 21/50, Train Loss: 0.5978, Train Acc: 0.65, Val Loss: 0.6709, Val Acc: 0.62


Epoch 22: 100%|██████████| 277/277 [00:08<00:00, 34.01it/s]


Epoch 22/50, Train Loss: 0.5943, Train Acc: 0.65, Val Loss: 0.7249, Val Acc: 0.60


Epoch 23: 100%|██████████| 277/277 [00:07<00:00, 34.82it/s]


Epoch 23/50, Train Loss: 0.5962, Train Acc: 0.65, Val Loss: 0.6865, Val Acc: 0.59


Epoch 24: 100%|██████████| 277/277 [00:07<00:00, 34.82it/s]


Epoch 24/50, Train Loss: 0.5944, Train Acc: 0.65, Val Loss: 0.6736, Val Acc: 0.61


Epoch 25: 100%|██████████| 277/277 [00:07<00:00, 34.88it/s]


Epoch 25/50, Train Loss: 0.5923, Train Acc: 0.66, Val Loss: 0.6770, Val Acc: 0.61


Epoch 26: 100%|██████████| 277/277 [00:08<00:00, 34.11it/s]


Epoch 26/50, Train Loss: 0.5920, Train Acc: 0.65, Val Loss: 0.6730, Val Acc: 0.62


Epoch 27: 100%|██████████| 277/277 [00:07<00:00, 34.90it/s]


Epoch 27/50, Train Loss: 0.5918, Train Acc: 0.65, Val Loss: 0.6852, Val Acc: 0.60


Epoch 28: 100%|██████████| 277/277 [00:07<00:00, 34.81it/s]


Epoch 28/50, Train Loss: 0.5916, Train Acc: 0.66, Val Loss: 0.6788, Val Acc: 0.61


Epoch 29: 100%|██████████| 277/277 [00:08<00:00, 34.09it/s]


Epoch 29/50, Train Loss: 0.5913, Train Acc: 0.66, Val Loss: 0.6871, Val Acc: 0.60


Epoch 30: 100%|██████████| 277/277 [00:07<00:00, 34.72it/s]


Epoch 30/50, Train Loss: 0.5909, Train Acc: 0.66, Val Loss: 0.6755, Val Acc: 0.61


Epoch 31: 100%|██████████| 277/277 [00:07<00:00, 34.84it/s]


Epoch 31/50, Train Loss: 0.5889, Train Acc: 0.66, Val Loss: 0.6722, Val Acc: 0.62


Epoch 32: 100%|██████████| 277/277 [00:07<00:00, 34.92it/s]


Epoch 32/50, Train Loss: 0.5879, Train Acc: 0.66, Val Loss: 0.6906, Val Acc: 0.60


Epoch 33: 100%|██████████| 277/277 [00:08<00:00, 34.13it/s]


Epoch 33/50, Train Loss: 0.5892, Train Acc: 0.66, Val Loss: 0.6861, Val Acc: 0.61


Epoch 34: 100%|██████████| 277/277 [00:08<00:00, 34.20it/s]


Epoch 34/50, Train Loss: 0.5860, Train Acc: 0.66, Val Loss: 0.6787, Val Acc: 0.60


Epoch 35: 100%|██████████| 277/277 [00:08<00:00, 34.42it/s]


Epoch 35/50, Train Loss: 0.5852, Train Acc: 0.66, Val Loss: 0.6982, Val Acc: 0.60


Epoch 36: 100%|██████████| 277/277 [00:07<00:00, 34.78it/s]


Epoch 36/50, Train Loss: 0.5855, Train Acc: 0.66, Val Loss: 0.6990, Val Acc: 0.60


Epoch 37: 100%|██████████| 277/277 [00:07<00:00, 34.87it/s]


Epoch 37/50, Train Loss: 0.5853, Train Acc: 0.66, Val Loss: 0.6782, Val Acc: 0.62


Epoch 38: 100%|██████████| 277/277 [00:07<00:00, 34.76it/s]


Epoch 38/50, Train Loss: 0.5852, Train Acc: 0.66, Val Loss: 0.6767, Val Acc: 0.61


Epoch 39: 100%|██████████| 277/277 [00:08<00:00, 34.09it/s]


Epoch 39/50, Train Loss: 0.5838, Train Acc: 0.66, Val Loss: 0.6762, Val Acc: 0.61


Epoch 40: 100%|██████████| 277/277 [00:07<00:00, 34.93it/s]


Epoch 40/50, Train Loss: 0.5826, Train Acc: 0.66, Val Loss: 0.6755, Val Acc: 0.61


Epoch 41: 100%|██████████| 277/277 [00:07<00:00, 34.78it/s]


Epoch 41/50, Train Loss: 0.5828, Train Acc: 0.66, Val Loss: 0.6774, Val Acc: 0.61


Epoch 42: 100%|██████████| 277/277 [00:08<00:00, 34.12it/s]


Epoch 42/50, Train Loss: 0.5808, Train Acc: 0.66, Val Loss: 0.6791, Val Acc: 0.61


Epoch 43: 100%|██████████| 277/277 [00:08<00:00, 33.71it/s]


Epoch 43/50, Train Loss: 0.5805, Train Acc: 0.67, Val Loss: 0.6763, Val Acc: 0.62


Epoch 44: 100%|██████████| 277/277 [00:08<00:00, 33.90it/s]


Epoch 44/50, Train Loss: 0.5802, Train Acc: 0.67, Val Loss: 0.6730, Val Acc: 0.62


Epoch 45: 100%|██████████| 277/277 [00:08<00:00, 34.10it/s]


Epoch 45/50, Train Loss: 0.5818, Train Acc: 0.66, Val Loss: 0.6749, Val Acc: 0.61


Epoch 46: 100%|██████████| 277/277 [00:08<00:00, 32.93it/s]


Epoch 46/50, Train Loss: 0.5788, Train Acc: 0.67, Val Loss: 0.6751, Val Acc: 0.61


Epoch 47: 100%|██████████| 277/277 [00:08<00:00, 33.77it/s]


Epoch 47/50, Train Loss: 0.5793, Train Acc: 0.67, Val Loss: 0.7029, Val Acc: 0.60


Epoch 48: 100%|██████████| 277/277 [00:08<00:00, 33.97it/s]


Epoch 48/50, Train Loss: 0.5819, Train Acc: 0.67, Val Loss: 0.6925, Val Acc: 0.61


Epoch 49: 100%|██████████| 277/277 [00:08<00:00, 34.37it/s]


Epoch 49/50, Train Loss: 0.5769, Train Acc: 0.67, Val Loss: 0.6878, Val Acc: 0.61


Epoch 50: 100%|██████████| 277/277 [00:07<00:00, 34.70it/s]


Epoch 50/50, Train Loss: 0.5766, Train Acc: 0.67, Val Loss: 0.6723, Val Acc: 0.62


In [None]:
# Main task with transformer
train_losses, val_losses = train_model(
    model=main_transformer_model,
    criterion=main_transformer_criterion,
    optimizer=main_transformer_optimizer,
    train_loader=train_loader_main,
    val_loader=val_loader_main, 
    model_type="main", 
    epoch_count=50
)

In [67]:
# Known vs. Unknown with BiLSTM
train_losses, val_losses = train_model(
    model=bin_bilstm_model,
    criterion=bin_bilstm_criterion,
    optimizer=bin_bilstm_optimizer,
    train_loader=train_loader_bin,
    val_loader=val_loader_bin, 
    model_type="bin", 
    epoch_count=50
)

Epoch 1: 100%|██████████| 277/277 [00:16<00:00, 16.41it/s]


Epoch 1/50, Train Loss: 0.6960, Train Acc: 0.52, Val Loss: 0.7122, Val Acc: 0.38


Epoch 2: 100%|██████████| 277/277 [00:12<00:00, 22.29it/s]


Epoch 2/50, Train Loss: 0.6859, Train Acc: 0.53, Val Loss: 0.7117, Val Acc: 0.39


Epoch 3: 100%|██████████| 277/277 [00:09<00:00, 27.76it/s]


Epoch 3/50, Train Loss: 0.6791, Train Acc: 0.54, Val Loss: 0.6929, Val Acc: 0.54


Epoch 4: 100%|██████████| 277/277 [00:09<00:00, 27.75it/s]


Epoch 4/50, Train Loss: 0.6679, Train Acc: 0.55, Val Loss: 0.6975, Val Acc: 0.53


Epoch 5: 100%|██████████| 277/277 [00:10<00:00, 27.59it/s]


Epoch 5/50, Train Loss: 0.6603, Train Acc: 0.57, Val Loss: 0.8594, Val Acc: 0.42


Epoch 6: 100%|██████████| 277/277 [00:10<00:00, 27.62it/s]


Epoch 6/50, Train Loss: 0.6565, Train Acc: 0.58, Val Loss: 0.6948, Val Acc: 0.57


Epoch 7: 100%|██████████| 277/277 [00:09<00:00, 27.74it/s]


Epoch 7/50, Train Loss: 0.6532, Train Acc: 0.58, Val Loss: 0.7271, Val Acc: 0.54


Epoch 8: 100%|██████████| 277/277 [00:09<00:00, 27.71it/s]


Epoch 8/50, Train Loss: 0.6547, Train Acc: 0.58, Val Loss: 0.7005, Val Acc: 0.55


Epoch 9: 100%|██████████| 277/277 [00:10<00:00, 27.27it/s]


Epoch 9/50, Train Loss: 0.6512, Train Acc: 0.59, Val Loss: 0.6919, Val Acc: 0.57


Epoch 10: 100%|██████████| 277/277 [00:10<00:00, 27.60it/s]


Epoch 10/50, Train Loss: 0.6501, Train Acc: 0.59, Val Loss: 0.6880, Val Acc: 0.59


Epoch 11: 100%|██████████| 277/277 [00:09<00:00, 27.72it/s]


Epoch 11/50, Train Loss: 0.6497, Train Acc: 0.59, Val Loss: 0.6811, Val Acc: 0.60


Epoch 12: 100%|██████████| 277/277 [00:09<00:00, 27.79it/s]


Epoch 12/50, Train Loss: 0.6466, Train Acc: 0.60, Val Loss: 0.6832, Val Acc: 0.60


Epoch 13: 100%|██████████| 277/277 [00:09<00:00, 27.77it/s]


Epoch 13/50, Train Loss: 0.6451, Train Acc: 0.60, Val Loss: 0.6927, Val Acc: 0.57


Epoch 14: 100%|██████████| 277/277 [00:09<00:00, 27.80it/s]


Epoch 14/50, Train Loss: 0.6410, Train Acc: 0.60, Val Loss: 0.6926, Val Acc: 0.59


Epoch 15: 100%|██████████| 277/277 [00:09<00:00, 27.77it/s]


Epoch 15/50, Train Loss: 0.6409, Train Acc: 0.61, Val Loss: 0.7251, Val Acc: 0.53


Epoch 16: 100%|██████████| 277/277 [00:09<00:00, 27.78it/s]


Epoch 16/50, Train Loss: 0.6408, Train Acc: 0.60, Val Loss: 0.6891, Val Acc: 0.58


Epoch 17: 100%|██████████| 277/277 [00:10<00:00, 27.34it/s]


Epoch 17/50, Train Loss: 0.6390, Train Acc: 0.60, Val Loss: 0.7130, Val Acc: 0.56


Epoch 18: 100%|██████████| 277/277 [00:11<00:00, 23.87it/s]


Epoch 18/50, Train Loss: 0.6372, Train Acc: 0.61, Val Loss: 0.6781, Val Acc: 0.61


Epoch 19: 100%|██████████| 277/277 [00:13<00:00, 20.92it/s]


Epoch 19/50, Train Loss: 0.6349, Train Acc: 0.61, Val Loss: 0.6800, Val Acc: 0.60


Epoch 20: 100%|██████████| 277/277 [00:12<00:00, 21.41it/s]


Epoch 20/50, Train Loss: 0.6329, Train Acc: 0.61, Val Loss: 0.7068, Val Acc: 0.56


Epoch 21: 100%|██████████| 277/277 [00:12<00:00, 21.36it/s]


Epoch 21/50, Train Loss: 0.6342, Train Acc: 0.61, Val Loss: 0.7315, Val Acc: 0.56


Epoch 22: 100%|██████████| 277/277 [00:12<00:00, 21.32it/s]


Epoch 22/50, Train Loss: 0.6323, Train Acc: 0.61, Val Loss: 0.6951, Val Acc: 0.57


Epoch 23: 100%|██████████| 277/277 [00:12<00:00, 21.32it/s]


Epoch 23/50, Train Loss: 0.6321, Train Acc: 0.61, Val Loss: 0.6932, Val Acc: 0.58


Epoch 24: 100%|██████████| 277/277 [00:12<00:00, 21.43it/s]


Epoch 24/50, Train Loss: 0.6290, Train Acc: 0.62, Val Loss: 0.6741, Val Acc: 0.63


Epoch 25: 100%|██████████| 277/277 [00:13<00:00, 21.18it/s]


Epoch 25/50, Train Loss: 0.6296, Train Acc: 0.62, Val Loss: 0.6957, Val Acc: 0.59


Epoch 26: 100%|██████████| 277/277 [00:12<00:00, 21.42it/s]


Epoch 26/50, Train Loss: 0.6281, Train Acc: 0.62, Val Loss: 0.6777, Val Acc: 0.61


Epoch 27: 100%|██████████| 277/277 [00:12<00:00, 21.35it/s]


Epoch 27/50, Train Loss: 0.6286, Train Acc: 0.62, Val Loss: 0.7011, Val Acc: 0.60


Epoch 28: 100%|██████████| 277/277 [00:12<00:00, 21.44it/s]


Epoch 28/50, Train Loss: 0.6276, Train Acc: 0.62, Val Loss: 0.6961, Val Acc: 0.58


Epoch 29:  17%|█▋        | 47/277 [00:02<00:11, 20.84it/s]


KeyboardInterrupt: 

In [None]:
# Main task with BiLSTM
train_losses, val_losses = train_model(
    model=main_bilstm_model,
    criterion=main_bilstm_criterion,
    optimizer=main_bilstm_optimizer,
    train_loader=train_loader_main,
    val_loader=val_loader_main, 
    model_type="main", 
    epoch_count=50
)

### Pretrained model

In [64]:
from transformers import AutoProcessor, WavLMModel

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  _torch_pytree._register_pytree_node(


In [3]:
X, y = np.load("data.npy"), np.load("labels.npy")

In [5]:
X = X[y != 10]
y = y[y != 10]

In [6]:
X.shape

(18994, 16000)

In [7]:
feature_extractor = ASTFeatureExtractor()
X = np.array(feature_extractor(X, sampling_rate=16000)["input_values"])

In [8]:
X.shape

(18994, 1024, 128)

In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

In [15]:
train_dl = transform_to_data_loader(X_train, y_train, device=device)

In [16]:
val_dl = transform_to_data_loader(X_test, y_test, device=device)

In [3]:
# feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
# feature_extractor = ASTFeatureExtractor()

# train_dl, val_dl = get_dl_for_pretrained(
#     feature_extractor=feature_extractor,
#     X_data_path="data.npy", 
#     y_data_path="labels.npy", 
#     device=device,
#     task_type="main"
# )

Extracting features for data with size (18994, 16000).
Splitting data with size (18994, 1024, 128).


In [25]:
# model = AutoModelForAudioClassification.from_pretrained("facebook/wav2vec2-base", num_labels=10).to(device)

configuration = ASTConfig()
model = ASTModel(configuration).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [27]:
train_losses, val_losses = train_model(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    train_loader=train_dl,
    val_loader=val_dl, 
    model_type="main", 
    epoch_count=50,
    using_pretrained=True
)

Epoch 1:   0%|          | 0/56 [00:02<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 910.00 MiB. GPU 0 has a total capacity of 6.00 GiB of which 0 bytes is free. Of the allocated memory 12.27 GiB is allocated by PyTorch, and 497.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)