In [1]:
# -*- coding: utf-8 -*-

from matplotlib import pyplot as plt
import numpy as np
import random
from torch.utils.data import Dataset, DataLoader
import torch
import os
from torch.optim import *
from torch import nn
from torch.nn import functional as F
from scipy import ndimage
import pandas as pd
from copy import deepcopy
import random
from sklearn.metrics import f1_score

random.seed(7)
torch.cuda.manual_seed(7)
torch.manual_seed(7)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(7)

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')


In [2]:
DATA_ROOT = '/beegfs/bva212/openmic-2018'
OPENMIC = np.load(os.path.join(DATA_ROOT, 'openmic-2018.npz'))
X, Y_true, Y_mask, sample_key = OPENMIC['X'], OPENMIC['Y_true'], OPENMIC['Y_mask'], OPENMIC['sample_key']

In [3]:
len_data = Y_mask.shape[0]
idx_train = np.random.choice(len_data, int(len_data*0.7), replace=False)
remain_set = set(np.arange(len_data))-set(idx_train)
idx_test = np.random.choice(list(remain_set), int(len_data*0.1), replace=False)
idx_val = list(remain_set-set(idx_test))

In [4]:
Y_mask_val = Y_mask[idx_val]
Y_mask_test = Y_mask[idx_test]

label_val = Y_true[idx_val]
label_test = Y_true[idx_test]

In [5]:
weights_val = np.sum(Y_mask_val, axis= 1)/20
new_weights_val = weights_val.reshape(-1,1)*Y_mask_val
weights_test = np.sum(Y_mask_test, axis= 1)/20
new_weights_test = weights_test.reshape(-1,1)*Y_mask_test

In [6]:
class ArrowOfTime(Dataset):

    def __init__(self, root_dir, files, weights, label):
        self.weights = weights
        self.device = device
        self.root_dir = root_dir
        self.files = files
        self.label = label

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        logscalogram = (np.load(self.root_dir + self.files[idx]+'_cqt.npy') - -24.3633)/14.2659
        len_cqt = logscalogram.shape[1]
        rem = len_cqt % 3
        logscalograms = np.split(logscalogram[:, :-rem], 3, axis = 1)
        logscalograms = np.stack(logscalograms)
        weight = self.weights[idx]
        label = self.label[idx]
        return {'logscalogram': logscalograms, 'label': label[np.newaxis, :], 'weight': weight[np.newaxis, :]}

filenames = []
root_dir = '/beegfs/bva212/openmic-2018/cqt_full/'

BATCH_SIZE = 8

def my_collate(batch):
    data = np.stack([item['logscalogram'] for item in batch])
    target = np.concatenate([item['label'] for item in batch],axis=0)
    weight = np.concatenate([item['weight'] for item in batch],axis=0)
    return [torch.from_numpy(data).float(), torch.from_numpy(target).float(), torch.from_numpy(weight).float()]


In [7]:
class snet2_jigsaw(nn.Module):

    def __init__(self, output_classes = 20):
        '''
        Create the 5 Conv Layer Sound Net network architecture as per the paper - https://arxiv.org/pdf/1610.09001.pdf
        '''
        super(snet2_jigsaw, self).__init__()

        self.conv_layers = nn.Sequential(nn.Conv2d(in_channels = 1, out_channels= 16, kernel_size = 5, stride = 2, padding = 5), 
                                nn.BatchNorm2d(num_features = 16), 
                                nn.ReLU(inplace = True),

                                nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 5, stride = 2, padding = 5),
                                nn.BatchNorm2d(32),
                                nn.ReLU(inplace = True),

                                nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 5, padding = 5),
                                nn.BatchNorm2d(64),
                                nn.ReLU(inplace = True),
                                nn.AvgPool2d(kernel_size = 3),

                                nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 4, padding = 4),
                                nn.BatchNorm2d(128),
                                nn.ReLU(inplace = True),

                                nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 4, padding = 4),
                                nn.BatchNorm2d(256),
                                nn.ReLU(inplace = True),
                                nn.AvgPool2d(kernel_size = 3),

                                nn.Conv2d(in_channels = 256, out_channels = 512, kernel_size = 3, stride = 2, padding = 3),
                                nn.BatchNorm2d(512),
                                nn.ReLU(inplace = True),

                                nn.Conv2d(in_channels = 512, out_channels = 1024, kernel_size = 3, stride = 2, padding = 3),
                                nn.BatchNorm2d(1024),
                                nn.ReLU(inplace = True),
                                nn.AdaptiveAvgPool2d(output_size = 1)
                                )
        self.concat_mlp_layer = nn.Sequential(nn.Linear(3072, 2048),
                                              nn.BatchNorm1d(num_features = 2048), 
                                              nn.ReLU(inplace = True),
                                              
                                              nn.Linear(2048, 1024),
                                              nn.BatchNorm1d(num_features = 1024), 
                                              nn.ReLU(inplace = True),
                                              
                                              nn.Linear(1024, 256),
                                              nn.BatchNorm1d(num_features = 256), 
                                              nn.ReLU(inplace = True),
                                             )
        self.mlp_layer = nn.Linear(256, output_classes)
              
    def forward(self, input):
        conv_strips = []
        n_strips = input.shape[1]
        for strip in range(n_strips):
            conv_strip = input[:,strip]
            conv_strip = conv_strip.unsqueeze(1)
            conv_strips.append(self.conv_layers(conv_strip))

        concat_out=torch.cat(conv_strips,1)
        out = self.concat_mlp_layer(concat_out.view(concat_out.shape[0], -1))
        output = self.mlp_layer(out.view(out.shape[0], -1))
        return output
    
    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in', nonlinearity='relu')
        
# Function for testing the model
def test_model(loader, model):
    correct = 0
    total_loss = 0
    total = 0
    total_num = 0
    actual_arr = []
    predicted_arr = []
    model.eval()
    with torch.no_grad():
        for spectrogram, target, weight in loader:
            spectrogram_batch, target_batch, weight_batch = spectrogram.to(device), target.to(device), weight.to(device)
            outputs = model(spectrogram_batch)
            loss = F.binary_cross_entropy_with_logits(outputs, target_batch,
                                                  weight = weight_batch,
                                                  reduction='sum')
            predicted = (torch.sigmoid(outputs.data)>0.5).float()
            
            actual_arr.extend(target.view(1,-1).squeeze().numpy().astype(int).tolist())
            predicted_arr.extend(predicted.view(1,-1).squeeze().cpu().numpy().astype(int).tolist())
            
            total_loss += loss.item()
            total += weight_batch.shape[0]
            
            correct += ((weight_batch != 0).float()*(predicted.eq(target_batch.view_as(predicted)).float())).sum().item()
            total_num += (weight_batch != 0).sum().item()
        accuracy = (100 * correct / total_num)
    return accuracy, f1_score(actual_arr, predicted_arr, average='micro'), total_loss/total

def train_model(train_loader, val_loader, model, optimizer, scheduler, num_epochs):
    train_acc_list = []
    train_loss_list = []
    val_acc_list = []
    val_loss_list = []
    train_f1_list = []
    val_f1_list = []
    best_val_acc = 0
    for epoch in range(num_epochs):
        for spectrogram, target, weight in train_loader:
            model.train()
            spectrogram_batch, target_batch, weight_batch = spectrogram.to(device), target.to(device), weight.to(device)
            optimizer.zero_grad()
            outputs = model(spectrogram_batch)
            loss = F.binary_cross_entropy_with_logits(outputs, target_batch,
                                                  weight = weight_batch,
                                                  reduction='sum')
            loss.backward()
            optimizer.step()
        train_acc, f1_score_train, train_loss = test_model(train_loader, model)
        val_acc, f1_score_val, val_loss = test_model(val_loader, model)
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_state_dict = deepcopy(model.state_dict())
        
        train_acc_list.append(train_acc)
        train_f1_list.append(f1_score_train)
        train_loss_list.append(train_loss)
        val_f1_list.append(f1_score_val)
        val_acc_list.append(val_acc)
        val_loss_list.append(val_loss)
        
        scheduler.step(val_acc)
        print("Epoch:{}".format(epoch+1))
        print("Validation Accuracy:{:.2f}, Validation F1:{:.2f}, Val Loss: {:.5f}".format(val_acc, f1_score_val, val_loss))
        print("Training Acc: {:.2f}, Training F1:{:.2f}, Train Loss: {:.5f}".format(train_acc, f1_score_train, train_loss))
    return train_acc_list, train_loss_list, val_acc_list, val_loss_list, best_model_state_dict

In [None]:
import pickle
sizes = [10, 50, 250, 500, 1000]

Model_Data = pickle.load(open('List_indices_data_size_exp.pkl', 'rb'))

Val_dataset = ArrowOfTime(root_dir, sample_key[idx_val], new_weights_val, label_val)
Val_loader = torch.utils.data.DataLoader(dataset = Val_dataset, 
                                              batch_size = BATCH_SIZE,
                                              shuffle = True,
                                        collate_fn = my_collate)

Test_dataset = ArrowOfTime(root_dir, sample_key[idx_test], new_weights_test, label_test)
Test_loader = torch.utils.data.DataLoader(dataset = Test_dataset, 
                                              batch_size = BATCH_SIZE,
                                              shuffle = True,
                                        collate_fn = my_collate)


for i in range(len(sizes)):
    idx_train = Model_Data[i]
    Y_mask_train = Y_mask[idx_train]
    label_train = Y_true[idx_train]
    weights_train = np.sum(Y_mask_train, axis= 1)/20
    new_weights_train = weights_train.reshape(-1,1)*Y_mask_train
    Train_dataset = ArrowOfTime(root_dir, sample_key[idx_train], new_weights_train, label_train)
    Train_loader = torch.utils.data.DataLoader(dataset = Train_dataset, 
                                                  batch_size = BATCH_SIZE,
                                                  shuffle = True,
                                              collate_fn = my_collate)

    
    weights = torch.load('/beegfs/sc6957/capstone/models/20191106/snet2_jigsaw_large_best_model.pth')['modelStateDict']
    model = snet2_jigsaw(2)
    model.load_state_dict(weights)
    model.mlp_layer = nn.Linear(256, 20)
    model.to(device)
    learning_rate = 10**(-5)
    num_epochs = 50 # number epoch to train

    optimizer = torch.optim.Adam([param for param in model.parameters() if param.requires_grad == True], lr=learning_rate, weight_decay = 0.01)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=2, verbose=True, threshold=0.03,
                                                           threshold_mode='abs', cooldown=0, min_lr=0, eps=1e-08)

    train_acc_list, train_loss_list, val_acc_list, val_loss_list, best_model_state_dict = train_model(Train_loader, Val_loader, model, optimizer, scheduler, num_epochs)
    results_dict[sizes[i]] = {
        'train_acc_list':train_acc_list,
        'train_loss_list': train_loss_list,
        'val_acc_list': val_acc_list,
        'val_loss_list': val_loss_list,
        'best_model_state_dict': best_model_state_dict
    }
pickle.dump(results_dict, open('Size_Exp_Jigsaw_Wt_Results.pkl'.format(sizes[i]),'wb'))

In [None]:
model1 = snet2_jigsaw(20).to(device)
model = snet2_jigsaw(20).to(device)
sizes = [10, 50, 250, 500, 1000]
for i in range(len(sizes)):
    idx_train = Model_Data[i]
    Y_mask_train = Y_mask[idx_train]
    label_train = Y_true[idx_train]
    weights_train = np.sum(Y_mask_train, axis= 1)/20
    new_weights_train = weights_train.reshape(-1,1)*Y_mask_train
    Train_dataset = ArrowOfTime(root_dir, sample_key[idx_train], new_weights_train, label_train)
    Train_loader = torch.utils.data.DataLoader(dataset = Train_dataset, 
                                                  batch_size = BATCH_SIZE,
                                                  shuffle = True,
                                              collate_fn = my_collate)

    
    model.load_state_dict(model1.state_dict())
    learning_rate = 10**(-5)
    num_epochs = 50 # number epoch to train

    optimizer = torch.optim.Adam([param for param in model.parameters() if param.requires_grad == True], lr=learning_rate, weight_decay = 0.01)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=2, verbose=True, threshold=0.03,
                                                           threshold_mode='abs', cooldown=0, min_lr=0, eps=1e-08)

    train_acc_list, train_loss_list, val_acc_list, val_loss_list, best_model_state_dict = train_model(Train_loader, Val_loader, model, optimizer, scheduler, num_epochs)
    results_dict[sizes[i]] = {
        'train_acc_list':train_acc_list,
        'train_loss_list': train_loss_list,
        'val_acc_list': val_acc_list,
        'val_loss_list': val_loss_list,
        'best_model_state_dict': best_model_state_dict
    }
    pickle.dump(results_dict, open('Size_Exp_Random_Wt_Results_size_{}.pkl'.format(sizes[i]),'wb'))

Epoch:1
Validation Accuracy:34.82, Validation F1:0.56, Val Loss: 0.19598
Training Acc: 36.71, Training F1:0.56, Train Loss: 0.21569
Epoch:2
Validation Accuracy:38.21, Validation F1:0.51, Val Loss: 0.19323
Training Acc: 55.63, Training F1:0.54, Train Loss: 0.19107
Epoch:3
Validation Accuracy:40.62, Validation F1:0.51, Val Loss: 0.19150
Training Acc: 62.39, Training F1:0.53, Train Loss: 0.14832
Epoch:4
Validation Accuracy:40.61, Validation F1:0.49, Val Loss: 0.19145
Training Acc: 64.19, Training F1:0.51, Train Loss: 0.13654
Epoch:5
Validation Accuracy:41.90, Validation F1:0.51, Val Loss: 0.19135
Training Acc: 64.64, Training F1:0.53, Train Loss: 0.12195
Epoch:6
Validation Accuracy:42.24, Validation F1:0.52, Val Loss: 0.19032
Training Acc: 65.54, Training F1:0.53, Train Loss: 0.11523
Epoch:7
Validation Accuracy:42.09, Validation F1:0.51, Val Loss: 0.18926
Training Acc: 66.22, Training F1:0.53, Train Loss: 0.10914
Epoch:8
Validation Accuracy:41.57, Validation F1:0.47, Val Loss: 0.19079
Tra

Epoch    11: reducing learning rate of group 0 to 1.0000e-06.
Epoch:12
Validation Accuracy:50.30, Validation F1:0.62, Val Loss: 0.16599
Training Acc: 69.42, Training F1:0.62, Train Loss: 0.07756
Epoch:13
Validation Accuracy:51.15, Validation F1:0.63, Val Loss: 0.16397
Training Acc: 69.57, Training F1:0.63, Train Loss: 0.07648
Epoch:14
Validation Accuracy:51.24, Validation F1:0.63, Val Loss: 0.16301
Training Acc: 69.66, Training F1:0.63, Train Loss: 0.07471
Epoch    14: reducing learning rate of group 0 to 1.0000e-07.
Epoch:15
Validation Accuracy:51.03, Validation F1:0.63, Val Loss: 0.16364
Training Acc: 69.57, Training F1:0.63, Train Loss: 0.07423
Epoch:16
Validation Accuracy:50.55, Validation F1:0.59, Val Loss: 0.16611
Training Acc: 69.61, Training F1:0.60, Train Loss: 0.07684
Epoch:17
Validation Accuracy:50.78, Validation F1:0.61, Val Loss: 0.16458
Training Acc: 69.66, Training F1:0.61, Train Loss: 0.07290
Epoch    17: reducing learning rate of group 0 to 1.0000e-08.
Epoch:18
Validat

Epoch:22
Validation Accuracy:57.50, Validation F1:0.71, Val Loss: 0.13033
Training Acc: 69.39, Training F1:0.72, Train Loss: 0.05326
Epoch:23
Validation Accuracy:57.52, Validation F1:0.71, Val Loss: 0.12947
Training Acc: 69.39, Training F1:0.73, Train Loss: 0.05307
Epoch:24
Validation Accuracy:57.64, Validation F1:0.73, Val Loss: 0.12943
Training Acc: 69.39, Training F1:0.74, Train Loss: 0.05289
Epoch:25
Validation Accuracy:57.58, Validation F1:0.73, Val Loss: 0.12791
Training Acc: 69.39, Training F1:0.74, Train Loss: 0.05264
Epoch:26
Validation Accuracy:57.57, Validation F1:0.73, Val Loss: 0.12868
Training Acc: 69.37, Training F1:0.74, Train Loss: 0.05258
Epoch:27
Validation Accuracy:57.15, Validation F1:0.71, Val Loss: 0.13303
Training Acc: 69.39, Training F1:0.72, Train Loss: 0.05505
Epoch:28
Validation Accuracy:57.26, Validation F1:0.72, Val Loss: 0.13195
Training Acc: 69.38, Training F1:0.73, Train Loss: 0.05438
Epoch:29
Validation Accuracy:57.82, Validation F1:0.73, Val Loss: 0.1

Epoch:33
Validation Accuracy:60.76, Validation F1:0.76, Val Loss: 0.11024
Training Acc: 69.02, Training F1:0.77, Train Loss: 0.05137
Epoch:34
Validation Accuracy:60.82, Validation F1:0.76, Val Loss: 0.11019
Training Acc: 69.01, Training F1:0.77, Train Loss: 0.05137
Epoch:35
Validation Accuracy:60.45, Validation F1:0.73, Val Loss: 0.11148
Training Acc: 69.02, Training F1:0.75, Train Loss: 0.05156
Epoch:36
Validation Accuracy:60.65, Validation F1:0.74, Val Loss: 0.11121
Training Acc: 69.02, Training F1:0.75, Train Loss: 0.05222
Epoch:37
Validation Accuracy:61.07, Validation F1:0.76, Val Loss: 0.10893
Training Acc: 69.05, Training F1:0.78, Train Loss: 0.05006
Epoch:38
Validation Accuracy:60.79, Validation F1:0.75, Val Loss: 0.11046
Training Acc: 68.99, Training F1:0.77, Train Loss: 0.05194
Epoch:39
Validation Accuracy:60.89, Validation F1:0.75, Val Loss: 0.11109
Training Acc: 69.03, Training F1:0.76, Train Loss: 0.05199
Epoch:40
Validation Accuracy:60.96, Validation F1:0.76, Val Loss: 0.1

In [12]:
mean = 0.
std = 0.
for images, _, _ in Train_loader:
    batch_samples = images.size(0) # batch size (the last batch can have smaller size!)
    images1 = images.reshape(batch_samples, images.size(1), -1)
    mean += images1.mean(2).sum(0)
    std += images1.std(2).sum(0)
    
mean /= len(Train_loader.dataset)
std /= len(Train_loader.dataset)

In [13]:
print(mean, std)

tensor([-24.3633]) tensor([14.2659])


In [65]:

model = conv_net().to(device)
model.load_state_dict(reqd_weights)

<All keys matched successfully>

In [9]:
pickle.dump(results_dict, open('Size_Exp_Jigsaw_Wt_Results.pkl'.format(sizes[i]),'wb'))

In [10]:
results_dict.keys()

dict_keys([10, 50, 250, 500])

In [8]:
import torchvision.models as models
resnet18 = models.resnet18().to(device)
alexnet = models.alexnet().to(device)
vgg16 = models.vgg16().to(device)
squeezenet = models.squeezenet1_0().to(device)
densenet = models.densenet161().to(device)
inception = models.inception_v3().to(device)
