In [1]:
# -*- coding: utf-8 -*-

from matplotlib import pyplot as plt
import numpy as np
import random
from torch.utils.data import Dataset, DataLoader
import torch
import os
from torch.optim import *
from torch import nn
from torch.nn import functional as F
from scipy import ndimage
import pandas as pd
from copy import deepcopy
import random
from sklearn.metrics import f1_score

random.seed(7)
torch.cuda.manual_seed(7)
torch.manual_seed(7)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(7)

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')


In [2]:
DATA_ROOT = '/beegfs/bva212/openmic-2018'
OPENMIC = np.load(os.path.join(DATA_ROOT, 'openmic-2018.npz'))
X, Y_true, Y_mask, sample_key = OPENMIC['X'], OPENMIC['Y_true'], OPENMIC['Y_mask'], OPENMIC['sample_key']

In [3]:
train_samples = pd.read_csv(os.path.join(DATA_ROOT,'partitions/split01_train.csv'), names =['id']).to_numpy().squeeze()
test_samples = pd.read_csv(os.path.join(DATA_ROOT,'partitions/split01_test.csv'), names =['id']).to_numpy().squeeze()

In [4]:
len_data = len(train_samples)
idx_train = np.random.choice(len_data, int(len_data*0.8), replace=False)
remain_set = list(set(np.arange(len_data))-set(idx_train))

In [5]:
val_idx = np.isin(sample_key, train_samples[remain_set])
train_idx = np.isin(sample_key, train_samples[idx_train])
test_idx = np.isin(sample_key, test_samples)

In [6]:
class ArrowOfTime(Dataset):

    def __init__(self, root_dir, files, weights, label):
        self.weights = weights
        self.device = device
        self.root_dir = root_dir
        self.files = files
        self.label = label

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        logscalogram = (np.load(self.root_dir + self.files[idx]+'_cqt.npy') - -24.3633)/14.2659
        weight = self.weights[idx]
        label = (self.label[idx] > 0.5).astype(int)
        return {'logscalogram': logscalogram[np.newaxis, :], 'label': label[np.newaxis, :], 'weight': weight[np.newaxis, :]}

filenames = []
root_dir = '/beegfs/bva212/openmic-2018/cqt_full/'

BATCH_SIZE = 32

def my_collate(batch):
    data = np.stack([item['logscalogram'] for item in batch])
    target = np.concatenate([item['label'] for item in batch],axis=0)
    weight = np.concatenate([item['weight'] for item in batch],axis=0)
    weight_sum = weight.sum(0)
    weight_sum = np.repeat(weight_sum[np.newaxis, :], len(batch), 0)
    weight_sum[weight_sum==0]=1
    weight = weight/weight_sum
    return [torch.from_numpy(data).float(), torch.from_numpy(target).float(), torch.from_numpy(weight).float()]


In [7]:
class snet2_jigsaw(nn.Module):

    def __init__(self, output_classes = 20):
        '''
        Create the 5 Conv Layer Sound Net network architecture as per the paper - https://arxiv.org/pdf/1610.09001.pdf
        '''
        super(snet2_jigsaw, self).__init__()

        self.conv_layers = nn.Sequential(nn.Conv2d(in_channels = 1, out_channels= 16, kernel_size = 5, stride = 2, padding = 5), 
                                nn.BatchNorm2d(num_features = 16), 
                                nn.ReLU(inplace = True),

                                nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 5, stride = 2, padding = 5),
                                nn.BatchNorm2d(32),
                                nn.ReLU(inplace = True),

                                nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 5, padding = 5),
                                nn.BatchNorm2d(64),
                                nn.ReLU(inplace = True),
                                nn.AvgPool2d(kernel_size = 3),

                                nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 4, padding = 4),
                                nn.BatchNorm2d(128),
                                nn.ReLU(inplace = True),

                                nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 4, padding = 4),
                                nn.BatchNorm2d(256),
                                nn.ReLU(inplace = True),
                                nn.AvgPool2d(kernel_size = 3),

                                nn.Conv2d(in_channels = 256, out_channels = 512, kernel_size = 3, stride = 2, padding = 3),
                                nn.BatchNorm2d(512),
                                nn.ReLU(inplace = True),

                                nn.Conv2d(in_channels = 512, out_channels = 1024, kernel_size = 3, stride = 2, padding = 3),
                                nn.BatchNorm2d(1024),
                                nn.ReLU(inplace = True),
                                nn.AdaptiveAvgPool2d(output_size = 1)
                                )
        self.mlp_layer = nn.Linear(1024, output_classes)
              
    def forward(self, input):
        out = self.conv_layers(input)
        out = self.mlp_layer(out.view(out.shape[0], -1))
        return out
    
    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in', nonlinearity='relu')
        
# Function for testing the model
def test_model(loader, model):
    correct = 0
    total_loss = 0
    total = 0
    total_num = 0
    actual_arr = []
    predicted_arr = []
    weight_array = []
    model.eval()
    with torch.no_grad():
        for spectrogram, target, weight in loader:
            spectrogram_batch, target_batch, weight_batch = spectrogram.to(device), target.to(device), weight.to(device)
            outputs = model(spectrogram_batch)
            loss = F.binary_cross_entropy_with_logits(outputs, target_batch,
                                                  weight = weight_batch,
                                                  reduction='sum')
            predicted = (torch.sigmoid(outputs.data)>0.5).float()
            
            actual_arr.extend(target.view(1,-1).squeeze().numpy().astype(int).tolist())
            predicted_arr.extend(predicted.view(1,-1).squeeze().cpu().numpy().astype(int).tolist())
            
            total_loss += loss.item()
            total += weight_batch.shape[0]
            
            correct += ((weight_batch != 0).float()*(predicted.eq(target_batch.view_as(predicted)).float())).sum().item()
            total_num += (weight_batch != 0).sum().item()
            weight_array = np.concatenate((weight_array,(weight != 0).reshape(-1).numpy().astype(int)))
        accuracy = (100 * correct / total_num)
        return accuracy, f1_score(actual_arr, predicted_arr, average='micro', sample_weight = weight_array), total_loss/total

def train_model(train_loader, val_loader, model, optimizer, scheduler, num_epochs):
    train_acc_list = []
    train_loss_list = []
    val_acc_list = []
    val_loss_list = []
    train_f1_list = []
    val_f1_list = []
    best_val_acc = 0
    for epoch in range(num_epochs):
        for spectrogram, target, weight in train_loader:
            model.train()
            spectrogram_batch, target_batch, weight_batch = spectrogram.to(device), target.to(device), weight.to(device)
            optimizer.zero_grad()
            outputs = model(spectrogram_batch)
            loss = F.binary_cross_entropy_with_logits(outputs, target_batch,
                                                  weight = weight_batch,
                                                  reduction='sum')
            loss.backward()
            optimizer.step()
        train_acc, f1_score_train, train_loss = test_model(train_loader, model)
        val_acc, f1_score_val, val_loss = test_model(val_loader, model)
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_state_dict = deepcopy(model.state_dict())
        
        train_acc_list.append(train_acc)
        train_f1_list.append(f1_score_train)
        train_loss_list.append(train_loss)
        val_f1_list.append(f1_score_val)
        val_acc_list.append(val_acc)
        val_loss_list.append(val_loss)
        
        scheduler.step(val_acc)
        print("Epoch:{}".format(epoch+1))
        print("Validation Accuracy:{:.2f}, Validation F1:{:.2f}, Val Loss: {:.5f}".format(val_acc, f1_score_val, val_loss))
        print("Training Acc: {:.2f}, Training F1:{:.2f}, Train Loss: {:.5f}".format(train_acc, f1_score_train, train_loss))
    return train_acc_list, train_loss_list, val_acc_list, val_loss_list, best_model_state_dict

In [8]:
import pickle
sizes = [10, 50, 250, 500, 950, 'Full']

Model_Data = pickle.load(open('Training_Data_Index.pkl', 'rb'))

Val_dataset = ArrowOfTime(root_dir, sample_key[val_idx], Y_mask[val_idx], Y_true[val_idx])
Val_loader = torch.utils.data.DataLoader(dataset = Val_dataset, 
                                              batch_size = BATCH_SIZE,
                                              shuffle = False,
                                        collate_fn = my_collate)

results_dict = {}

for i in range(len(sizes)):
    if sizes[i] != 'Full':
        sample_key_train = Model_Data[i]
        idx_train_new = np.isin(sample_key, sample_key_train)
        Y_mask_train = Y_mask[idx_train_new]
        label_train = Y_true[idx_train_new]
        Train_dataset = ArrowOfTime(root_dir, sample_key[idx_train_new], Y_mask_train, label_train)
        Train_loader = torch.utils.data.DataLoader(dataset = Train_dataset, 
                                                      batch_size = BATCH_SIZE,
                                                      shuffle = True,
                                                  collate_fn = my_collate)
    else:
        Y_mask_train = Y_mask[train_idx]
        label_train = Y_true[train_idx]
        Train_dataset = ArrowOfTime(root_dir, sample_key[train_idx], Y_mask_train, label_train)
        Train_loader = torch.utils.data.DataLoader(dataset = Train_dataset, 
                                                      batch_size = BATCH_SIZE,
                                                      shuffle = True,
                                                  collate_fn = my_collate)

    
    weights = torch.load('/beegfs/sc6957/capstone/models/20191106/snet2_jigsaw_large_best_model.pth')['modelStateDict']
    model = snet2_jigsaw(20)
    reqd_weights = {key:value for key, value in weights.items() if key in model.state_dict() and key[:9] != 'mlp_layer'}
    reqd_weights["mlp_layer.weight"] = model.mlp_layer.weight
    reqd_weights["mlp_layer.bias"] = model.mlp_layer.bias
    model.load_state_dict(reqd_weights)
    model.to(device)
    learning_rate = 10**(-2)
    num_epochs = 50 # number epoch to train

    optimizer = torch.optim.Adam([param for param in model.parameters() if param.requires_grad == True], lr=learning_rate, weight_decay = 0.01)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=2, verbose=True, threshold=0.03,
                                                           threshold_mode='abs', cooldown=0, min_lr=0, eps=1e-08)

    train_acc_list, train_loss_list, val_acc_list, val_loss_list, best_model_state_dict = train_model(Train_loader, Val_loader, model, optimizer, scheduler, num_epochs)
    results_dict[sizes[i]] = {
        'train_acc_list':train_acc_list,
        'train_loss_list': train_loss_list,
        'val_acc_list': val_acc_list,
        'val_loss_list': val_loss_list,
        'best_model_state_dict': best_model_state_dict
    }
pickle.dump(results_dict, open('/scratch/rc3620/Capstone/Size_Exp_Jigsaw_Wt_NoLin_Results.pkl','wb'))

Epoch:1
Validation Accuracy:59.09, Validation F1:0.59, Val Loss: 54.79447
Training Acc: 62.69, Training F1:0.63, Train Loss: 56.73961
Epoch:2
Validation Accuracy:59.49, Validation F1:0.59, Val Loss: 0.39648
Training Acc: 62.88, Training F1:0.63, Train Loss: 0.41709
Epoch:3
Validation Accuracy:62.59, Validation F1:0.63, Val Loss: 0.39235
Training Acc: 64.39, Training F1:0.64, Train Loss: 0.43675
Epoch:4
Validation Accuracy:62.56, Validation F1:0.63, Val Loss: 0.39250
Training Acc: 64.02, Training F1:0.64, Train Loss: 0.42514
Epoch:5
Validation Accuracy:62.57, Validation F1:0.63, Val Loss: 0.39557
Training Acc: 64.02, Training F1:0.64, Train Loss: 0.43106
Epoch:6
Validation Accuracy:62.70, Validation F1:0.63, Val Loss: 0.40151
Training Acc: 64.39, Training F1:0.64, Train Loss: 0.42060
Epoch:7
Validation Accuracy:63.51, Validation F1:0.64, Val Loss: 0.39813
Training Acc: 69.32, Training F1:0.69, Train Loss: 0.38798
Epoch:8
Validation Accuracy:63.04, Validation F1:0.63, Val Loss: 0.39932
T

Epoch:10
Validation Accuracy:67.83, Validation F1:0.68, Val Loss: 0.34781
Training Acc: 71.43, Training F1:0.71, Train Loss: 0.34065
Epoch:11
Validation Accuracy:69.75, Validation F1:0.70, Val Loss: 0.33564
Training Acc: 72.60, Training F1:0.73, Train Loss: 0.32858
Epoch:12
Validation Accuracy:69.96, Validation F1:0.70, Val Loss: 0.33335
Training Acc: 73.06, Training F1:0.73, Train Loss: 0.31806
Epoch:13
Validation Accuracy:69.45, Validation F1:0.69, Val Loss: 0.33941
Training Acc: 73.02, Training F1:0.73, Train Loss: 0.32709
Epoch:14
Validation Accuracy:69.80, Validation F1:0.70, Val Loss: 0.33614
Training Acc: 73.53, Training F1:0.74, Train Loss: 0.32791
Epoch    14: reducing learning rate of group 0 to 1.0000e-04.
Epoch:15
Validation Accuracy:69.25, Validation F1:0.69, Val Loss: 0.33819
Training Acc: 73.30, Training F1:0.73, Train Loss: 0.31949
Epoch:16
Validation Accuracy:70.09, Validation F1:0.70, Val Loss: 0.32910
Training Acc: 74.15, Training F1:0.74, Train Loss: 0.30660
Epoch:1

Epoch:19
Validation Accuracy:71.43, Validation F1:0.71, Val Loss: 0.30063
Training Acc: 74.30, Training F1:0.74, Train Loss: 0.29514
Epoch    19: reducing learning rate of group 0 to 1.0000e-04.
Epoch:20
Validation Accuracy:71.51, Validation F1:0.72, Val Loss: 0.30368
Training Acc: 74.00, Training F1:0.74, Train Loss: 0.29803
Epoch:21
Validation Accuracy:72.19, Validation F1:0.72, Val Loss: 0.29686
Training Acc: 74.78, Training F1:0.75, Train Loss: 0.29177
Epoch:22
Validation Accuracy:72.25, Validation F1:0.72, Val Loss: 0.29637
Training Acc: 74.87, Training F1:0.75, Train Loss: 0.29065
Epoch:23
Validation Accuracy:72.33, Validation F1:0.72, Val Loss: 0.29625
Training Acc: 74.96, Training F1:0.75, Train Loss: 0.28797
Epoch:24
Validation Accuracy:72.38, Validation F1:0.72, Val Loss: 0.29691
Training Acc: 75.00, Training F1:0.75, Train Loss: 0.29145
Epoch:25
Validation Accuracy:72.48, Validation F1:0.72, Val Loss: 0.29630
Training Acc: 75.00, Training F1:0.75, Train Loss: 0.28908
Epoch:2

Epoch:28
Validation Accuracy:72.46, Validation F1:0.72, Val Loss: 0.29674
Training Acc: 74.73, Training F1:0.75, Train Loss: 0.28897
Epoch    28: reducing learning rate of group 0 to 1.0000e-05.
Epoch:29
Validation Accuracy:72.72, Validation F1:0.73, Val Loss: 0.29681
Training Acc: 74.65, Training F1:0.75, Train Loss: 0.29249
Epoch:30
Validation Accuracy:72.71, Validation F1:0.73, Val Loss: 0.29659
Training Acc: 74.51, Training F1:0.75, Train Loss: 0.29126
Epoch:31
Validation Accuracy:72.72, Validation F1:0.73, Val Loss: 0.29615
Training Acc: 74.62, Training F1:0.75, Train Loss: 0.29142
Epoch    31: reducing learning rate of group 0 to 1.0000e-06.
Epoch:32
Validation Accuracy:72.66, Validation F1:0.73, Val Loss: 0.29702
Training Acc: 74.77, Training F1:0.75, Train Loss: 0.29185
Epoch:33
Validation Accuracy:72.64, Validation F1:0.73, Val Loss: 0.29654
Training Acc: 74.72, Training F1:0.75, Train Loss: 0.29283
Epoch:34
Validation Accuracy:72.71, Validation F1:0.73, Val Loss: 0.29686
Trai

Epoch:36
Validation Accuracy:73.56, Validation F1:0.74, Val Loss: 0.28829
Training Acc: 75.41, Training F1:0.75, Train Loss: 0.28432
Epoch:37
Validation Accuracy:73.69, Validation F1:0.74, Val Loss: 0.28782
Training Acc: 75.32, Training F1:0.75, Train Loss: 0.28256
Epoch    37: reducing learning rate of group 0 to 1.0000e-07.
Epoch:38
Validation Accuracy:73.42, Validation F1:0.73, Val Loss: 0.28810
Training Acc: 75.28, Training F1:0.75, Train Loss: 0.28518
Epoch:39
Validation Accuracy:73.46, Validation F1:0.73, Val Loss: 0.28826
Training Acc: 75.36, Training F1:0.75, Train Loss: 0.28504
Epoch:40
Validation Accuracy:73.46, Validation F1:0.73, Val Loss: 0.28786
Training Acc: 75.29, Training F1:0.75, Train Loss: 0.28285
Epoch    40: reducing learning rate of group 0 to 1.0000e-08.
Epoch:41
Validation Accuracy:73.54, Validation F1:0.74, Val Loss: 0.28861
Training Acc: 75.48, Training F1:0.75, Train Loss: 0.28372
Epoch:42
Validation Accuracy:73.72, Validation F1:0.74, Val Loss: 0.28821
Trai

Epoch:45
Validation Accuracy:71.75, Validation F1:0.72, Val Loss: 0.31381
Training Acc: 71.81, Training F1:0.72, Train Loss: 0.31874
Epoch    45: reducing learning rate of group 0 to 1.0000e-08.
Epoch:46
Validation Accuracy:71.71, Validation F1:0.72, Val Loss: 0.31339
Training Acc: 71.84, Training F1:0.72, Train Loss: 0.31923
Epoch:47
Validation Accuracy:71.50, Validation F1:0.71, Val Loss: 0.31376
Training Acc: 71.79, Training F1:0.72, Train Loss: 0.31883
Epoch:48
Validation Accuracy:71.67, Validation F1:0.72, Val Loss: 0.31334
Training Acc: 71.81, Training F1:0.72, Train Loss: 0.31766
Epoch:49
Validation Accuracy:71.59, Validation F1:0.72, Val Loss: 0.31413
Training Acc: 71.72, Training F1:0.72, Train Loss: 0.31931
Epoch:50
Validation Accuracy:71.56, Validation F1:0.72, Val Loss: 0.31359
Training Acc: 71.76, Training F1:0.72, Train Loss: 0.31741


In [12]:
mean = 0.
std = 0.
for images, _, _ in Train_loader:
    batch_samples = images.size(0) # batch size (the last batch can have smaller size!)
    images1 = images.reshape(batch_samples, images.size(1), -1)
    mean += images1.mean(2).sum(0)
    std += images1.std(2).sum(0)
    
mean /= len(Train_loader.dataset)
std /= len(Train_loader.dataset)

In [13]:
print(mean, std)

tensor([-24.3633]) tensor([14.2659])
