In [1]:
# -*- coding: utf-8 -*-

from matplotlib import pyplot as plt
import numpy as np
import random
from torch.utils.data import Dataset, DataLoader
import torch
import os
from torch.optim import *
from torch import nn
from torch.nn import functional as F
from scipy import ndimage
import pandas as pd
from copy import deepcopy
import random
from sklearn.metrics import f1_score

random.seed(7)
torch.cuda.manual_seed(7)
torch.manual_seed(7)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(7)

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')


In [2]:
DATA_ROOT = '/beegfs/bva212/openmic-2018'
OPENMIC = np.load(os.path.join(DATA_ROOT, 'openmic-2018.npz'))
X, Y_true, Y_mask, sample_key = OPENMIC['X'], OPENMIC['Y_true'], OPENMIC['Y_mask'], OPENMIC['sample_key']

In [3]:
len_data = Y_mask.shape[0]
idx_train = np.random.choice(len_data, int(len_data*0.7), replace=False)
remain_set = set(np.arange(len_data))-set(idx_train)
idx_test = np.random.choice(list(remain_set), int(len_data*0.1), replace=False)
idx_val = list(remain_set-set(idx_test))

In [4]:
Y_mask_val = Y_mask[idx_val]
Y_mask_test = Y_mask[idx_test]

label_val = Y_true[idx_val]
label_test = Y_true[idx_test]

In [5]:
class ArrowOfTime(Dataset):

    def __init__(self, root_dir, files, weights, label):
        self.weights = weights
        self.device = device
        self.root_dir = root_dir
        self.files = files
        self.label = label

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        logscalogram = (np.load(self.root_dir + self.files[idx]+'_cqt.npy') - -24.3633)/14.2659
        weight = self.weights[idx]
        label = (self.label[idx] > 0.5).astype(int)
        return {'logscalogram': logscalogram[np.newaxis, :], 'label': label[np.newaxis, :], 'weight': weight[np.newaxis, :]}

filenames = []
root_dir = '/beegfs/bva212/openmic-2018/cqt_full/'

BATCH_SIZE = 32

def my_collate(batch):
    data = np.stack([item['logscalogram'] for item in batch])
    target = np.concatenate([item['label'] for item in batch],axis=0)
    weight = np.concatenate([item['weight'] for item in batch],axis=0)
    weight_sum = weight.sum(0)
    weight_sum = np.repeat(weight_sum[np.newaxis, :], len(batch), 0)
    weight_sum[weight_sum==0]=1
    weight = weight/weight_sum
    return [torch.from_numpy(data).float(), torch.from_numpy(target).float(), torch.from_numpy(weight).float()]


In [6]:
class snet2_jigsaw(nn.Module):

    def __init__(self, output_classes = 20):
        '''
        Create the 5 Conv Layer Sound Net network architecture as per the paper - https://arxiv.org/pdf/1610.09001.pdf
        '''
        super(snet2_jigsaw, self).__init__()

        self.conv_layers = nn.Sequential(nn.Conv2d(in_channels = 1, out_channels= 16, kernel_size = 5, stride = 2, padding = 5), 
                                nn.BatchNorm2d(num_features = 16), 
                                nn.ReLU(inplace = True),

                                nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 5, stride = 2, padding = 5),
                                nn.BatchNorm2d(32),
                                nn.ReLU(inplace = True),

                                nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 5, padding = 5),
                                nn.BatchNorm2d(64),
                                nn.ReLU(inplace = True),
                                nn.AvgPool2d(kernel_size = 3),

                                nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 4, padding = 4),
                                nn.BatchNorm2d(128),
                                nn.ReLU(inplace = True),

                                nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 4, padding = 4),
                                nn.BatchNorm2d(256),
                                nn.ReLU(inplace = True),
                                nn.AvgPool2d(kernel_size = 3),

                                nn.Conv2d(in_channels = 256, out_channels = 512, kernel_size = 3, stride = 2, padding = 3),
                                nn.BatchNorm2d(512),
                                nn.ReLU(inplace = True),

                                nn.Conv2d(in_channels = 512, out_channels = 1024, kernel_size = 3, stride = 2, padding = 3),
                                nn.BatchNorm2d(1024),
                                nn.ReLU(inplace = True),
                                nn.AdaptiveAvgPool2d(output_size = 1)
                                )
        self.mlp_layer = nn.Linear(1024, output_classes)
              
    def forward(self, input):
        out = self.conv_layers(input)
        out = self.mlp_layer(out.view(out.shape[0], -1))
        return out
    
    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in', nonlinearity='relu')
        
# Function for testing the model
def test_model(loader, model):
    correct = 0
    total_loss = 0
    total = 0
    total_num = 0
    actual_arr = []
    predicted_arr = []
    weight_array = []
    model.eval()
    with torch.no_grad():
        for spectrogram, target, weight in loader:
            spectrogram_batch, target_batch, weight_batch = spectrogram.to(device), target.to(device), weight.to(device)
            outputs = model(spectrogram_batch)
            loss = F.binary_cross_entropy_with_logits(outputs, target_batch,
                                                  weight = weight_batch,
                                                  reduction='sum')
            predicted = (torch.sigmoid(outputs.data)>0.5).float()
            
            actual_arr.extend(target.view(1,-1).squeeze().numpy().astype(int).tolist())
            predicted_arr.extend(predicted.view(1,-1).squeeze().cpu().numpy().astype(int).tolist())
            
            total_loss += loss.item()
            total += weight_batch.shape[0]
            
            correct += ((weight_batch != 0).float()*(predicted.eq(target_batch.view_as(predicted)).float())).sum().item()
            total_num += (weight_batch != 0).sum().item()
            weight_array = np.concatenate((weight_array,(weight != 0).reshape(-1).numpy().astype(int)))
        accuracy = (100 * correct / total_num)
        return accuracy, f1_score(actual_arr, predicted_arr, average='micro', sample_weight = weight_array), total_loss/total

def train_model(train_loader, val_loader, model, optimizer, scheduler, num_epochs):
    train_acc_list = []
    train_loss_list = []
    val_acc_list = []
    val_loss_list = []
    train_f1_list = []
    val_f1_list = []
    best_val_acc = 0
    for epoch in range(num_epochs):
        for spectrogram, target, weight in train_loader:
            model.train()
            spectrogram_batch, target_batch, weight_batch = spectrogram.to(device), target.to(device), weight.to(device)
            optimizer.zero_grad()
            outputs = model(spectrogram_batch)
            loss = F.binary_cross_entropy_with_logits(outputs, target_batch,
                                                  weight = weight_batch,
                                                  reduction='sum')
            loss.backward()
            optimizer.step()
        train_acc, f1_score_train, train_loss = test_model(train_loader, model)
        val_acc, f1_score_val, val_loss = test_model(val_loader, model)
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_state_dict = deepcopy(model.state_dict())
        
        train_acc_list.append(train_acc)
        train_f1_list.append(f1_score_train)
        train_loss_list.append(train_loss)
        val_f1_list.append(f1_score_val)
        val_acc_list.append(val_acc)
        val_loss_list.append(val_loss)
        
        scheduler.step(val_acc)
        print("Epoch:{}".format(epoch+1))
        print("Validation Accuracy:{:.2f}, Validation F1:{:.2f}, Val Loss: {:.5f}".format(val_acc, f1_score_val, val_loss))
        print("Training Acc: {:.2f}, Training F1:{:.2f}, Train Loss: {:.5f}".format(train_acc, f1_score_train, train_loss))
    return train_acc_list, train_loss_list, val_acc_list, val_loss_list, best_model_state_dict

In [7]:
import pickle
sizes = [10, 50, 250, 500, 1000, 1500]

Model_Data = pickle.load(open('List_indices_data_size_exp.pkl', 'rb'))

Val_dataset = ArrowOfTime(root_dir, sample_key[idx_val], Y_mask_val, label_val)
Val_loader = torch.utils.data.DataLoader(dataset = Val_dataset, 
                                              batch_size = BATCH_SIZE,
                                              shuffle = True,
                                        collate_fn = my_collate)

Test_dataset = ArrowOfTime(root_dir, sample_key[idx_test], Y_mask_test, label_test)
Test_loader = torch.utils.data.DataLoader(dataset = Test_dataset, 
                                              batch_size = BATCH_SIZE,
                                              shuffle = True,
                                        collate_fn = my_collate)

results_dict = {}

for i in range(len(sizes)):
    if sizes[i] != 1500:
        idx_train_new = Model_Data[i]
        Y_mask_train = Y_mask[idx_train_new]
        label_train = Y_true[idx_train_new]
        Train_dataset = ArrowOfTime(root_dir, sample_key[idx_train_new], Y_mask_train, label_train)
        Train_loader = torch.utils.data.DataLoader(dataset = Train_dataset, 
                                                      batch_size = BATCH_SIZE,
                                                      shuffle = True,
                                                  collate_fn = my_collate)
    else:
        Y_mask_train = Y_mask[idx_train]
        label_train = Y_true[idx_train]
        Train_dataset = ArrowOfTime(root_dir, sample_key[idx_train], Y_mask_train, label_train)
        Train_loader = torch.utils.data.DataLoader(dataset = Train_dataset, 
                                                      batch_size = BATCH_SIZE,
                                                      shuffle = True,
                                                  collate_fn = my_collate)

    
    weights = torch.load('/beegfs/sc6957/capstone/models/20191106/snet2_jigsaw_large_best_model.pth')['modelStateDict']
    model = snet2_jigsaw(20)
    reqd_weights = {key:value for key, value in weights.items() if key in model.state_dict() and key[:9] != 'mlp_layer'}
    reqd_weights["mlp_layer.weight"] = model.mlp_layer.weight
    reqd_weights["mlp_layer.bias"] = model.mlp_layer.bias
    model.load_state_dict(reqd_weights)
    model.to(device)
    learning_rate = 10**(-2)
    num_epochs = 50 # number epoch to train

    optimizer = torch.optim.Adam([param for param in model.parameters() if param.requires_grad == True], lr=learning_rate, weight_decay = 0.01)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=2, verbose=True, threshold=0.03,
                                                           threshold_mode='abs', cooldown=0, min_lr=0, eps=1e-08)

    train_acc_list, train_loss_list, val_acc_list, val_loss_list, best_model_state_dict = train_model(Train_loader, Val_loader, model, optimizer, scheduler, num_epochs)
    results_dict[sizes[i]] = {
        'train_acc_list':train_acc_list,
        'train_loss_list': train_loss_list,
        'val_acc_list': val_acc_list,
        'val_loss_list': val_loss_list,
        'best_model_state_dict': best_model_state_dict
    }
pickle.dump(results_dict, open('Size_Exp_Jigsaw_Wt_NoLin_Results.pkl'.format(sizes[i]),'wb'))

Epoch:1
Validation Accuracy:57.69, Validation F1:0.58, Val Loss: 13.09279
Training Acc: 60.81, Training F1:0.61, Train Loss: 11.64351
Epoch:2
Validation Accuracy:61.29, Validation F1:0.61, Val Loss: 0.40483
Training Acc: 62.39, Training F1:0.62, Train Loss: 0.42143
Epoch:3
Validation Accuracy:58.38, Validation F1:0.58, Val Loss: 0.40425
Training Acc: 62.61, Training F1:0.63, Train Loss: 0.42197
Epoch:4
Validation Accuracy:60.79, Validation F1:0.61, Val Loss: 0.40131
Training Acc: 63.06, Training F1:0.63, Train Loss: 0.41892
Epoch:5
Validation Accuracy:62.63, Validation F1:0.63, Val Loss: 0.39506
Training Acc: 63.29, Training F1:0.63, Train Loss: 0.39489
Epoch:6
Validation Accuracy:60.69, Validation F1:0.61, Val Loss: 0.39412
Training Acc: 63.74, Training F1:0.64, Train Loss: 0.39535
Epoch:7
Validation Accuracy:60.25, Validation F1:0.60, Val Loss: 0.40000
Training Acc: 63.74, Training F1:0.64, Train Loss: 0.42032
Epoch     7: reducing learning rate of group 0 to 1.0000e-03.
Epoch:8
Vali

Epoch:10
Validation Accuracy:70.61, Validation F1:0.71, Val Loss: 0.33101
Training Acc: 71.35, Training F1:0.71, Train Loss: 0.32461
Epoch:11
Validation Accuracy:67.62, Validation F1:0.68, Val Loss: 0.35268
Training Acc: 70.24, Training F1:0.70, Train Loss: 0.33036
Epoch:12
Validation Accuracy:70.93, Validation F1:0.71, Val Loss: 0.33711
Training Acc: 72.37, Training F1:0.72, Train Loss: 0.31942
Epoch:13
Validation Accuracy:70.21, Validation F1:0.70, Val Loss: 0.33037
Training Acc: 73.24, Training F1:0.73, Train Loss: 0.31312
Epoch:14
Validation Accuracy:70.25, Validation F1:0.70, Val Loss: 0.33143
Training Acc: 72.80, Training F1:0.73, Train Loss: 0.31751
Epoch:15
Validation Accuracy:71.81, Validation F1:0.72, Val Loss: 0.32311
Training Acc: 74.69, Training F1:0.75, Train Loss: 0.29367
Epoch:16
Validation Accuracy:71.48, Validation F1:0.71, Val Loss: 0.32717
Training Acc: 75.65, Training F1:0.76, Train Loss: 0.29659
Epoch:17
Validation Accuracy:71.36, Validation F1:0.71, Val Loss: 0.3

Epoch:19
Validation Accuracy:73.48, Validation F1:0.73, Val Loss: 0.30811
Training Acc: 73.47, Training F1:0.73, Train Loss: 0.30365
Epoch:20
Validation Accuracy:73.56, Validation F1:0.74, Val Loss: 0.30675
Training Acc: 74.01, Training F1:0.74, Train Loss: 0.30206
Epoch:21
Validation Accuracy:73.45, Validation F1:0.73, Val Loss: 0.30467
Training Acc: 73.39, Training F1:0.73, Train Loss: 0.29747
Epoch:22
Validation Accuracy:72.71, Validation F1:0.73, Val Loss: 0.30782
Training Acc: 72.93, Training F1:0.73, Train Loss: 0.30362
Epoch    22: reducing learning rate of group 0 to 1.0000e-04.
Epoch:23
Validation Accuracy:73.42, Validation F1:0.73, Val Loss: 0.30345
Training Acc: 73.83, Training F1:0.74, Train Loss: 0.30212
Epoch:24
Validation Accuracy:73.86, Validation F1:0.74, Val Loss: 0.30635
Training Acc: 74.01, Training F1:0.74, Train Loss: 0.29907
Epoch:25
Validation Accuracy:74.05, Validation F1:0.74, Val Loss: 0.30234
Training Acc: 74.05, Training F1:0.74, Train Loss: 0.29606
Epoch:2

Epoch:28
Validation Accuracy:75.14, Validation F1:0.75, Val Loss: 0.28739
Training Acc: 76.28, Training F1:0.76, Train Loss: 0.27515
Epoch:29
Validation Accuracy:75.24, Validation F1:0.75, Val Loss: 0.28815
Training Acc: 76.27, Training F1:0.76, Train Loss: 0.27499
Epoch:30
Validation Accuracy:75.24, Validation F1:0.75, Val Loss: 0.29051
Training Acc: 76.40, Training F1:0.76, Train Loss: 0.27268
Epoch:31
Validation Accuracy:75.28, Validation F1:0.75, Val Loss: 0.28983
Training Acc: 76.44, Training F1:0.76, Train Loss: 0.27539
Epoch:32
Validation Accuracy:75.51, Validation F1:0.76, Val Loss: 0.28622
Training Acc: 76.38, Training F1:0.76, Train Loss: 0.27384
Epoch:33
Validation Accuracy:75.29, Validation F1:0.75, Val Loss: 0.28754
Training Acc: 76.45, Training F1:0.76, Train Loss: 0.27559
Epoch:34
Validation Accuracy:75.30, Validation F1:0.75, Val Loss: 0.28742
Training Acc: 76.52, Training F1:0.77, Train Loss: 0.27420
Epoch    34: reducing learning rate of group 0 to 1.0000e-05.
Epoch:3

Epoch    36: reducing learning rate of group 0 to 1.0000e-06.
Epoch:37
Validation Accuracy:74.60, Validation F1:0.75, Val Loss: 0.29065
Training Acc: 75.39, Training F1:0.75, Train Loss: 0.28655
Epoch:38
Validation Accuracy:74.73, Validation F1:0.75, Val Loss: 0.28729
Training Acc: 75.44, Training F1:0.75, Train Loss: 0.28627
Epoch:39
Validation Accuracy:74.56, Validation F1:0.75, Val Loss: 0.29109
Training Acc: 75.30, Training F1:0.75, Train Loss: 0.28713
Epoch    39: reducing learning rate of group 0 to 1.0000e-07.
Epoch:40
Validation Accuracy:74.71, Validation F1:0.75, Val Loss: 0.29474
Training Acc: 75.55, Training F1:0.76, Train Loss: 0.28711
Epoch:41
Validation Accuracy:74.62, Validation F1:0.75, Val Loss: 0.29148
Training Acc: 75.42, Training F1:0.75, Train Loss: 0.28762
Epoch:42
Validation Accuracy:74.54, Validation F1:0.75, Val Loss: 0.28835
Training Acc: 75.48, Training F1:0.75, Train Loss: 0.28545
Epoch    42: reducing learning rate of group 0 to 1.0000e-08.
Epoch:43
Validat

Epoch:45
Validation Accuracy:73.66, Validation F1:0.74, Val Loss: 0.30196
Training Acc: 73.65, Training F1:0.74, Train Loss: 0.29841
Epoch:46
Validation Accuracy:73.64, Validation F1:0.74, Val Loss: 0.30247
Training Acc: 73.68, Training F1:0.74, Train Loss: 0.29578
Epoch:47
Validation Accuracy:73.80, Validation F1:0.74, Val Loss: 0.30626
Training Acc: 73.79, Training F1:0.74, Train Loss: 0.29847
Epoch:48
Validation Accuracy:73.72, Validation F1:0.74, Val Loss: 0.30340
Training Acc: 73.69, Training F1:0.74, Train Loss: 0.29903
Epoch:49
Validation Accuracy:73.66, Validation F1:0.74, Val Loss: 0.30194
Training Acc: 73.71, Training F1:0.74, Train Loss: 0.29807
Epoch:50
Validation Accuracy:73.72, Validation F1:0.74, Val Loss: 0.30410
Training Acc: 73.75, Training F1:0.74, Train Loss: 0.29779


In [None]:
results_dict = {}
model1 = snet2_jigsaw(20).to(device)
model = snet2_jigsaw(20).to(device)

for i in range(len(sizes)):
    if sizes[i] != 1500:
        idx_train_new = Model_Data[i]
        Y_mask_train = Y_mask[idx_train_new]
        label_train = Y_true[idx_train_new]
        Train_dataset = ArrowOfTime(root_dir, sample_key[idx_train_new], Y_mask_train, label_train)
        Train_loader = torch.utils.data.DataLoader(dataset = Train_dataset, 
                                                      batch_size = BATCH_SIZE,
                                                      shuffle = True,
                                                  collate_fn = my_collate)
    else:
        Y_mask_train = Y_mask[idx_train]
        label_train = Y_true[idx_train]
        Train_dataset = ArrowOfTime(root_dir, sample_key[idx_train], Y_mask_train, label_train)
        Train_loader = torch.utils.data.DataLoader(dataset = Train_dataset, 
                                                      batch_size = BATCH_SIZE,
                                                      shuffle = True,
                                                  collate_fn = my_collate)

    
    model.load_state_dict(model1.state_dict())
    learning_rate = 10**(-5)
    num_epochs = 50 # number epoch to train

    optimizer = torch.optim.Adam([param for param in model.parameters() if param.requires_grad == True], lr=learning_rate, weight_decay = 0.01)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=2, verbose=True, threshold=0.03,
                                                           threshold_mode='abs', cooldown=0, min_lr=0, eps=1e-08)

    train_acc_list, train_loss_list, val_acc_list, val_loss_list, best_model_state_dict = train_model(Train_loader, Val_loader, model, optimizer, scheduler, num_epochs)
    results_dict[sizes[i]] = {
        'train_acc_list':train_acc_list,
        'train_loss_list': train_loss_list,
        'val_acc_list': val_acc_list,
        'val_loss_list': val_loss_list,
        'best_model_state_dict': best_model_state_dict
    }
pickle.dump(results_dict, open('Size_Exp_Random_Wt_NoLinear_Results_size_{}.pkl'.format(sizes[i]),'wb'))

In [12]:
mean = 0.
std = 0.
for images, _, _ in Train_loader:
    batch_samples = images.size(0) # batch size (the last batch can have smaller size!)
    images1 = images.reshape(batch_samples, images.size(1), -1)
    mean += images1.mean(2).sum(0)
    std += images1.std(2).sum(0)
    
mean /= len(Train_loader.dataset)
std /= len(Train_loader.dataset)

In [13]:
print(mean, std)

tensor([-24.3633]) tensor([14.2659])


In [65]:

model = conv_net().to(device)
model.load_state_dict(reqd_weights)

<All keys matched successfully>

In [9]:
pickle.dump(results_dict, open('Size_Exp_Jigsaw_Wt_Results.pkl'.format(sizes[i]),'wb'))

In [10]:
results_dict.keys()

dict_keys([10, 50, 250, 500])

In [19]:
import torchvision.models as models
resnet18 = models.resnet18().to(device)
alexnet = models.alexnet().to(device)
vgg16 = models.vgg16().to(device)
squeezenet = models.squeezenet1_0().to(device)
densenet = models.densenet161().to(device)
inception = models.inception_v3().to(device)


In [8]:
import numpy as np
a = np.array([1,2])
a[:1]

array([1])