In [6]:
# -*- coding: utf-8 -*-

from matplotlib import pyplot as plt
import numpy as np
import random
from torch.utils.data import Dataset, DataLoader
import torch
import os
from torch.optim import *
from torch import nn
from torch.nn import functional as F
from scipy import ndimage
import pandas as pd
from copy import deepcopy

torch.manual_seed(7)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(7)

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')


In [7]:
DATA_ROOT = '/beegfs/bva212/openmic-2018'
OPENMIC = np.load(os.path.join(DATA_ROOT, 'openmic-2018.npz'))
X, Y_true, Y_mask, sample_key = OPENMIC['X'], OPENMIC['Y_true'], OPENMIC['Y_mask'], OPENMIC['sample_key']

In [8]:
len_data = Y_mask.shape[0]
idx_train = np.random.choice(len_data, int(len_data*0.7), replace=False)
remain_set = set(np.arange(len_data))-set(idx_train)
idx_test = np.random.choice(list(remain_set), int(len_data*0.1), replace=False)
idx_val = list(remain_set-set(idx_test))

In [9]:
Y_mask_train = Y_mask[idx_train]
Y_mask_val = Y_mask[idx_val]
Y_mask_test = Y_mask[idx_test]

label_train = Y_true[idx_train]
label_val = Y_true[idx_val]
label_test = Y_true[idx_test]

In [10]:
weights_train = np.sum(Y_mask_train, axis= 1)/20
new_weights_train = weights_train.reshape(-1,1)*Y_mask_train
weights_val = np.sum(Y_mask_val, axis= 1)/20
new_weights_val = weights_val.reshape(-1,1)*Y_mask_val
weights_test = np.sum(Y_mask_test, axis= 1)/20
new_weights_test = weights_test.reshape(-1,1)*Y_mask_test

In [26]:
class ArrowOfTime(Dataset):

    def __init__(self, root_dir, files, weights, label):
        self.weights = weights
        self.device = device
        self.root_dir = root_dir
        self.files = files
        self.label = label

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        logscalogram = np.load(self.root_dir + self.files[idx]+'_cqt.npy')
        weight = self.weights[idx]
        label = self.label[idx]
        return {'logscalogram': logscalogram[np.newaxis, :], 'label': label[np.newaxis, :], 'weight': weight[np.newaxis,:]}

filenames = []
root_dir = '/beegfs/bva212/openmic-2018/cqt_full/'

BATCH_SIZE = 8

def my_collate(batch):
    data = np.concatenate([item['logscalogram'] for item in batch],axis=0)
    data = np.expand_dims(data, axis = 1)
    target = np.concatenate([item['label'] for item in batch],axis=0)
    weight = np.concatenate([item['weight'] for item in batch],axis=0)
    return [torch.from_numpy(data).float(), torch.from_numpy(target).float(), torch.from_numpy(weight).float()]

Train_dataset = ArrowOfTime(root_dir, sample_key[idx_train], new_weights_train, label_train)
Train_loader = torch.utils.data.DataLoader(dataset = Train_dataset, 
                                              batch_size = BATCH_SIZE,
                                              shuffle = True,
                                          collate_fn = my_collate)

Val_dataset = ArrowOfTime(root_dir, sample_key[idx_val], new_weights_val, label_val)
Val_loader = torch.utils.data.DataLoader(dataset = Val_dataset, 
                                              batch_size = BATCH_SIZE,
                                              shuffle = True,
                                        collate_fn = my_collate)

Test_dataset = ArrowOfTime(root_dir, sample_key[idx_test], new_weights_test, label_test)
Test_loader = torch.utils.data.DataLoader(dataset = Test_dataset, 
                                              batch_size = BATCH_SIZE,
                                              shuffle = True,
                                        collate_fn = my_collate)


In [41]:
class AudioConvNet(nn.Module):

    def __init__(self, n_out):
        super(AudioConvNet, self).__init__()
        self.pool = nn.MaxPool2d(2, stride=2)
        self.pool1 = nn.AvgPool2d(2, stride=2)
        self.pool2 = nn.AdaptiveAvgPool2d(1)

        self.cnn1 = nn.Conv2d(1, 64, 3, stride=2, padding=1)
        self.cnn2 = nn.Conv2d(64, 64, 3, padding=1)
        self.bat10 = nn.BatchNorm2d(64)
        self.bat11 = nn.BatchNorm2d(64)


        self.cnn3 = nn.Conv2d(64, 128, 3, stride=1, padding=1)
        self.cnn4 = nn.Conv2d(128, 128, 3, padding=1)
        self.bat20 = nn.BatchNorm2d(128)
        self.bat21 = nn.BatchNorm2d(128)


        self.cnn5 = nn.Conv2d(128, 256, 3, stride=1, padding=1)
        self.cnn6 = nn.Conv2d(256, 256, 3, padding=1)
        self.bat30 = nn.BatchNorm2d(256)
        self.bat31 = nn.BatchNorm2d(256)


        self.cnn7 = nn.Conv2d(256, 512, 3, stride=1, padding=1)
        self.cnn8 = nn.Conv2d(512, 512, 3, padding=1)
        self.bat40 = nn.BatchNorm2d(512)
        self.bat41 = nn.BatchNorm2d(512)

        self.linear_final = nn.Linear(512, n_out)

    def forward(self, inp):
        c = F.relu(self.bat10(self.cnn1(inp)))
        c = F.relu(self.bat11(self.cnn2(c)))
        c = self.pool(c)

        c = F.relu(self.bat20(self.cnn3(c)))
        c = F.relu(self.bat21(self.cnn4(c)))
        c = self.pool1(c)

        c = F.relu(self.bat30(self.cnn5(c)))
        c = F.relu(self.bat31(self.cnn6(c)))
        c = self.pool1(c)
        
        c = F.relu(self.bat40(self.cnn7(c)))
        c = F.relu(self.bat41(self.cnn8(c)))
        c = self.pool2(c)
        
        c = self.linear_final(c.reshape(c.shape[0],-1))

        return c


# Function for testing the model
def test_model(loader, model):
    correct = 0
    total_loss = 0
    total = 0
    total_num = 0
    model.eval()
    with torch.no_grad():
        for spectrogram, target, weight in loader:
            spectrogram_batch, target_batch, weight_batch = spectrogram.to(device), target.to(device), weight.to(device)
            outputs = model(spectrogram_batch)
#             print(label_batch.shape)
            predicted = (torch.sigmoid(outputs.data)>0.5).float()
            loss = F.binary_cross_entropy_with_logits(outputs, target_batch,
                                                  weight = weight_batch,
                                                  reduction='sum')
            total_loss += loss.item()
            total += weight_batch.shape[0]

            correct += ((weight_batch != 0).float()*(predicted.eq(target_batch.view_as(predicted)).float())).sum().item()
            total_num += (weight_batch != 0).sum().item()
    return (100 * correct / total_num), (total_loss/total)

def train_model(train_loader, val_loader, model, optimizer, scheduler, num_epochs):
    train_acc_list = []
    train_loss_list = []
    val_acc_list = []
    val_loss_list = []
    best_val_acc = 0
    for epoch in range(num_epochs):
        for spectrogram, target, weight in train_loader:
            model.train()
            spectrogram_batch, target_batch, weight_batch = spectrogram.to(device), target.to(device), weight.to(device)
            optimizer.zero_grad()
            outputs = model(spectrogram_batch)
#             print(label_batch.shape)
            loss = F.binary_cross_entropy_with_logits(outputs, target_batch,
                                                  weight = weight_batch,
                                                  reduction='mean')
#             print(loss)
            loss.backward()
            optimizer.step()
        train_acc, train_loss = test_model(train_loader, model)
        val_acc, val_loss = test_model(val_loader, model)
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_state_dict = deepcopy(model.state_dict())
        train_acc_list.append(train_acc)
        train_loss_list.append(train_loss)
        val_acc_list.append(val_acc)
        val_loss_list.append(val_loss)
        scheduler.step(val_acc)
        print("Epoch:{}, Validation Accuracy:{:.2f}, Training Acc: {:.2f}, Val Loss: {:.5f}, Train Loss: {:.5f}".format(epoch+1, val_acc, train_acc, val_loss, train_loss))
    return train_acc_list, train_loss_list, val_acc_list, val_loss_list, best_model_state_dict

model = AudioConvNet(20).to(device)

learning_rate = 0.01
num_epochs = 50 # number epoch to train
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=2, verbose=True, threshold=0.03, threshold_mode='abs', cooldown=0, min_lr=0, eps=1e-08)
train_acc_list, train_loss_list, val_acc_list, val_loss_list, best_model_state_dict = train_model(Train_loader, Val_loader, model, optimizer, scheduler, num_epochs)

Epoch:1, Validation Accuracy:50.54, Training Acc: 50.24, Val Loss: 0.16454, Train Loss: 0.16415
Epoch:2, Validation Accuracy:55.04, Training Acc: 54.86, Val Loss: 0.14236, Train Loss: 0.13992
Epoch:3, Validation Accuracy:54.58, Training Acc: 54.42, Val Loss: 0.14563, Train Loss: 0.14233
Epoch:4, Validation Accuracy:56.33, Training Acc: 55.97, Val Loss: 0.13798, Train Loss: 0.13428
Epoch:5, Validation Accuracy:56.10, Training Acc: 56.61, Val Loss: 0.13824, Train Loss: 0.13367
Epoch:6, Validation Accuracy:56.50, Training Acc: 57.20, Val Loss: 0.13435, Train Loss: 0.12715
Epoch:7, Validation Accuracy:56.69, Training Acc: 57.34, Val Loss: 0.13268, Train Loss: 0.12653
Epoch:8, Validation Accuracy:57.47, Training Acc: 58.00, Val Loss: 0.13044, Train Loss: 0.12293
Epoch:9, Validation Accuracy:57.35, Training Acc: 57.91, Val Loss: 0.13462, Train Loss: 0.12544
Epoch:10, Validation Accuracy:58.09, Training Acc: 58.76, Val Loss: 0.13047, Train Loss: 0.12202
Epoch:11, Validation Accuracy:58.40, Tr