In [1]:
import os
import sys

# Model/Training related libraries
import numpy as np
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Logging
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter

# Dataloader libraries
from torch.utils.data import DataLoader, Dataset, SequentialSampler
from torchvision import transforms

# Custom models/loss/dataloaders/utilities
from models import SiameseNet_OntologicalLayer
from loss import Ontological_Loss
from dataloaders import AudioSet_Siamese, AudioSet_Siamese_Eval
from dataloaders_msos import MSoSSiamese, MSoSSiameseEval
from evaluate import evaluate_model_stats
from sklearn import metrics


# GPU
device = torch.device("cuda:0")

In [2]:
# Load data
data_dir = 'data/'
sounds_data = np.load(data_dir + 'msos_train_data.npy', allow_pickle=True)
class1_index = np.load(data_dir + 'msos_train_labels_1.npy', allow_pickle=True)
class2_index = np.load(data_dir + 'msos_train_labels_2.npy', allow_pickle=True)

# clip_lengths = [clip.shape[0] for clip in sounds_data]
# print(clip_lengths)

# Dataloader
train_data = MSoSSiamese(sounds_data, class1_index, class2_index, 97, 5)
train_args = dict(shuffle = True, batch_size = 256, num_workers=8, pin_memory=True)
train_loader = DataLoader(train_data, **train_args)

sounds_data = np.load(data_dir + 'msos_val_data.npy', allow_pickle=True)
class1_index = np.load(data_dir + 'msos_val_labels_1.npy', allow_pickle=True)
class2_index = np.load(data_dir + 'msos_val_labels_2.npy', allow_pickle=True)

val_data = MSoSSiamese(sounds_data, class1_index, class2_index, 97, 5)
val_args = dict(shuffle = False, batch_size = 256, num_workers=8, pin_memory=True)
val_loader = DataLoader(val_data, **val_args)

eval_data = MSoSSiameseEval(sounds_data, class1_index, class2_index, 97, 5)
eval_args = dict(shuffle = False, batch_size = 256, num_workers=8, pin_memory=True, sampler=SequentialSampler(eval_data))
eval_loader = DataLoader(eval_data, **eval_args)


#### Useful Constants

In [12]:
# Ontology Layer
# M = np.asarray([[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
#                  [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
#                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
#                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0],
#                  [0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1],
#                  [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
#                  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

M = np.load(data_dir + 'msos_M.npy', allow_pickle=True)
num_label_2, num_label_1 = M.shape
M = M / np.sum(M, axis=1).reshape(-1, 1)

# For MSoS dataset
data_len = sounds_data.shape[1]

# Siamese Net Model
in_feature_dim = train_data[0][0].shape[0]
model = SiameseNet_OntologicalLayer(5, 97, in_feature_dim, M)
model.to(device)

# Define Loss function
lambda1 = 1.5
lambda2 = 1
lambda3 = 0.25
criterion = Ontological_Loss(lambda1, lambda2, lambda3)

# Define Optimizer
learningRate = 1e-3
weightDecay = 1e-4

optimizer = torch.optim.Adam(model.parameters(), lr=learningRate, weight_decay=weightDecay)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)



In [11]:
### Train ###
def train_model(train_loader, model, epoch, writer):
    
    training_loss = 0
    training_acc_1 = 0
    training_acc_2 = 0
    
    # Set model in 'Training mode'
    model.train()
    
    # enumerate mini batches
    with tqdm(train_loader, ) as t_epoch:
        for i, (input1, input2, target1_1, target1_2, target2_1, target2_2, pair_type) in enumerate(t_epoch):
            t_epoch.set_description(f"Epoch {epoch}")
            
            # Move to GPU
            input1 = input1.to(device).float()
            target1_1 = target1_1.to(device)
            target1_2 = target1_2.to(device)
            
            input2 = input2.to(device).float()
            target2_1 = target2_1.to(device)
            target2_2 = target2_2.to(device)
            
            pair_type = pair_type.to(device)

            model.zero_grad()
            optimizer.zero_grad()
            
            # Model output
            outputs = model.forward(input1, input2) 
            targets = (target1_1, target1_2, target2_1, target2_2)
            
            # Loss/Backprop
            loss = criterion(outputs, targets, pair_type)                           
            loss.backward()
            optimizer.step()     

            training_loss += loss.item()
            t_epoch.set_postfix(loss=loss.item())
            
            torch.cuda.empty_cache()
            del input1, input2
            del target1_1, target1_2, target2_1, target2_2, pair_type
            del loss
    
    training_loss /= len(train_loader)
    writer.add_scalar("Loss/train", training_loss, epoch)  
    
    return training_loss


# Validation
def evaluate_model(val_loader, model, epoch, writer):
        
    val_loss = 0

    # Set model in validation mode
    model.eval()
    
    for i, (input1, input2, target1_1, target1_2, target2_1, target2_2, pair_type) in enumerate(val_loader):
        with torch.no_grad():
            
            # Move to GPU
            input1 = input1.to(device).float()
            target1_1 = target1_1.to(device)
            target1_2 = target1_2.to(device)
            
            input2 = input2.to(device).float()
            target2_1 = target2_1.to(device)
            target2_2 = target2_2.to(device)
            
            pair_type = pair_type.to(device)
            
            # Model Output
            outputs = model.forward(input1, input2) # model output
            targets = (target1_1, target1_2, target2_1, target2_2)
            
            # Val loss
            loss = criterion(outputs, targets, pair_type)            
            val_loss += loss.item()
    
    writer.add_scalar("Loss/val", val_loss / len(val_loader), epoch)  

    return loss.item()

In [13]:
def evaluate_model_stats(data_loader, model, reduction='weighted'):
    
    model.eval()
    device = torch.device("cuda:0")
    
    complete_outputs_1 = []
    complete_targets_1 = []
    
    complete_outputs_2 = []
    complete_targets_2 = []
    
    # Evaluate test set in batches
    for i, (input1, target1_1, target1_2) in enumerate(data_loader):
        with torch.no_grad():
            
            # Move to GPU
            input1 = input1.to(device).float()
            target1_1 = target1_1.to(device)
            target1_2 = target1_2.to(device)
            
            batch_size = int(input1.shape[0]/2)
            
            # Model Output
            _, _, out1_1, out1_2, out2_1, out2_2 = model.forward(input1[0:batch_size], input1[batch_size::]) # model output
            #targets = (target1_1[0:batch_size], target1_1[batch_size::], target1_2[0:batch_size], target1_2[batch_size::])
            
            sigmoid = torch.nn.Sigmoid()
            out1_1 = sigmoid(out1_1)
            out2_1 = sigmoid(out2_1)
            # print(out1_1)

            complete_outputs_1.append(torch.cat((out1_1, out2_1)))
            complete_targets_1.append(target1_1)
            
            complete_outputs_2.append(torch.cat((out1_2, out2_2)))
            complete_targets_2.append(target1_2)
    
    
    # Concat batch results 
    complete_outputs_1 = torch.cat(complete_outputs_1, 0)
    complete_targets_1 = torch.cat(complete_targets_1, 0)
    
    complete_outputs_2 = torch.cat(complete_outputs_2, 0)
    complete_targets_2 = torch.cat(complete_targets_2, 0)
    
    num_classes_1 = complete_outputs_1.shape[-1]
    num_classes_2 = complete_outputs_2.shape[-1]
    
    # Move to CPU
    complete_targets_1 = complete_targets_1.detach().cpu().numpy()
    complete_outputs_1 = complete_outputs_1.detach().cpu().numpy()
    
    complete_targets_2 = complete_targets_2.detach().cpu().numpy()
    complete_outputs_2 = complete_outputs_2.detach().cpu().numpy()
    
    # Average outputs over entire audio clip
    output_1_avg = np.zeros(
        (int(complete_outputs_1.shape[0]/data_len), complete_outputs_1.shape[1]))
    output_2_avg = np.zeros(
        (int(complete_outputs_2.shape[0]/data_len), complete_outputs_2.shape[1]))
    for i in range(int(complete_outputs_1.shape[0]/data_len)):
        output_1_avg[i] = np.mean(
            complete_outputs_1[data_len*i:data_len*(i+1)], axis=0)
        output_2_avg[i] = np.mean(
            complete_outputs_2[data_len*i:data_len*(i+1)], axis=0)
    
    tot1 = np.sum(complete_targets_1[0::data_len])
    weights_1 = np.sum(complete_targets_1[0::data_len], axis=0)/tot1
    tot2 = np.sum(complete_targets_2[0::data_len])
    weights_2 = np.sum(complete_targets_2[0::data_len], axis=0)/tot2
            
    # Level 1 Average precision, AUC metrics
    average_precision_1 = np.zeros((num_classes_1, ))
    auc_1 = np.zeros((num_classes_1, ))
    for i in range(num_classes_1):
        if weights_1[i] != 0 : # not present in val unbal set
            average_precision_1[i] = metrics.average_precision_score(
                complete_targets_1[0::data_len, i], output_1_avg[:, i])
            auc_1[i] = metrics.roc_auc_score(
                complete_targets_1[0::data_len, i], output_1_avg[:, i], average=None)

    # Level 2 Average precision, AUC metrics
    average_precision_2 = np.zeros((num_classes_2, ))
    auc_2 = np.zeros((num_classes_2, ))
    for i in range(num_classes_2):
        average_precision_2[i] = metrics.average_precision_score(
            complete_targets_2[0::data_len, i], output_2_avg[:, i])
        auc_2[i] = metrics.roc_auc_score(
            complete_targets_2[0::data_len, i], output_2_avg[:, i], average=None)
        
    if(reduction=='average'):
        mAP_1 = np.mean(average_precision_1)
        mauc_1 = np.mean(auc_1)

        mAP_2 = np.mean(average_precision_2)
        mauc_2 = np.mean(auc_2)
        
    elif(reduction=='weighted'):
        mAP_1 = np.sum(weights_1*average_precision_1)
        mauc_1 = np.sum(weights_1*auc_1)
        
        mAP_2 = np.sum(weights_2*average_precision_2)
        mauc_2 = np.sum(weights_2*auc_2)
        
    elif(reduction=='none'):
        return average_precision_1, auc_1, average_precision_2, auc_2
    
    return mAP_1, mauc_1, mAP_2, mauc_2




In [14]:
val_mAP_1, val_auc_1, val_mAP_2, val_auc_2 = evaluate_model_stats(eval_loader, model)

print(val_mAP_1)
print(val_auc_1)
print(val_mAP_2)
print(val_auc_2)

0.04317443510527517
0.5014502042779638
0.22432129631080147
0.49725704878441307


In [16]:
# Save dirs
model_num = 'siamese_lambda1_' + str(lambda1) + '_lambda2_' + str(lambda2) + '_lambda3_' + str(lambda3) + '/'
base_dir = './'
model_dir = base_dir + 'models/' + model_num
runs_dir = base_dir + 'runs/' + model_num

if not os.path.exists(model_dir):
    os.makedirs(model_dir)
if not os.path.exists(runs_dir):
    os.makedirs(runs_dir)

# Tensorboard logging
writer = SummaryWriter(runs_dir)
torch.backends.cudnn.benchmark = True

# Training loop
epochs = 40
for epoch in range(epochs):
    
    # Train + Validation
    training_loss = train_model(train_loader, model, epoch, writer)
    val_loss = evaluate_model(val_loader, model, epoch, writer)
    
    # Val Stats
    val_mAP_1, val_auc_1, val_mAP_2, val_auc_2 = evaluate_model_stats(eval_loader, model)

    scheduler.step()
    
    # Print log of accuracy and loss
    print("Epoch: "+str(epoch)+", Training loss: "+str(training_loss)+", Validation loss: "+str(val_loss)+", Validation mAP_1/AUC_1: "+str(val_mAP_1)+"/"+str(val_auc_1)+
             ", Validation mAP_2/AUC_2: "+str(val_mAP_2)+"/"+str(val_auc_2))
    
    writer.add_scalar("mAP_1/val", val_mAP_1, epoch)
    writer.add_scalar("AUC_1/val", val_auc_1, epoch)
    writer.add_scalar("mAP_2/val", val_mAP_2, epoch)
    writer.add_scalar("AUC_2/val", val_auc_2, epoch)
    
    # Save model checkpoint
    model_filename = model_dir + 'epoch' + str(epoch) + '.pt'
    if(epoch == 0):
        torch.save(model, model_filename)
    else:
        torch.save({'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': training_loss,}, model_filename)

2021-11-25 15:00:10.520536: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
Epoch 0: 100%|██████████| 870755/870755 [3:45:25<00:00, 64.38it/s, loss=2.04]


Epoch: 0, Training loss: 2.059190511247466, Validation loss: 2.712580680847168, Validation mAP_1/AUC_1: 0.12037549633770311/0.7297119998817448, Validation mAP_2/AUC_2: 0.46782137726502177/0.739521803130285


Epoch 1: 100%|██████████| 870755/870755 [3:38:35<00:00, 66.39it/s, loss=2.06]


Epoch: 1, Training loss: 2.0326146808515477, Validation loss: 2.944230079650879, Validation mAP_1/AUC_1: 0.11326904092456805/0.7329313824715791, Validation mAP_2/AUC_2: 0.45049994064566434/0.7264390410620798


Epoch 2:  70%|███████   | 611689/870755 [2:27:28<1:02:27, 69.13it/s, loss=2.01]


KeyboardInterrupt: 

In [None]:
# Save dirs
model_num = 'siamese_lambda1_' + str(lambda1) + '_lambda2_' + str(lambda2) + '_lambda3_' + str(lambda3) + '/'
base_dir = './'
model_dir = base_dir + 'models/' + model_num
runs_dir = base_dir + 'runs/' + model_num

if not os.path.exists(model_dir):
    os.makedirs(model_dir)
if not os.path.exists(runs_dir):
    os.makedirs(runs_dir)

# Tensorboard logging
writer = SummaryWriter(runs_dir)
torch.backends.cudnn.benchmark = True

# Load model
model_save_dir = model_dir + 'epoch0.pt'
model = torch.load(model_save_dir) 
model.to(device)

# Load saved weights
weights_dir = model_dir + 'epoch39.pt'
state = torch.load(weights_dir)
model.load_state_dict(state['model_state_dict'])

# Training loop
epochs = 40
for epoch in range(40, 60):
    
    # Train + Validation
    training_loss = train_model(train_loader, model, epoch, writer)
    val_loss = evaluate_model(val_loader, model, epoch, writer)
    
    # Val Stats
    val_mAP_1, val_auc_1, val_mAP_2, val_auc_2 = evaluate_model_stats(eval_loader, model)

    scheduler.step()
    
    # Print log of accuracy and loss
    print("Epoch: "+str(epoch)+", Training loss: "+str(training_loss)+", Validation loss: "+str(val_loss)+", Validation mAP_1/AUC_1: "+str(val_mAP_1)+"/"+str(val_auc_1)+
             ", Validation mAP_2/AUC_2: "+str(val_mAP_2)+"/"+str(val_auc_2))
    
    writer.add_scalar("mAP_1/val", val_mAP_1, epoch)
    writer.add_scalar("AUC_1/val", val_auc_1, epoch)
    writer.add_scalar("mAP_2/val", val_mAP_2, epoch)
    writer.add_scalar("AUC_2/val", val_auc_2, epoch)
    
    # Save model checkpoint
    model_filename = model_dir + 'epoch' + str(epoch) + '.pt'
    if(epoch == 0):
        torch.save(model, model_filename)
    else:
        torch.save({'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': training_loss,}, model_filename)

In [None]:
# Test Data
data_dir = './'

sounds_data = np.load(data_dir + 'audioset_test_data.npy', allow_pickle=True)
class1_index = np.load(data_dir + 'audioset_test_labels_1.npy', allow_pickle=True)
class2_index = np.load(data_dir + 'audioset_test_labels_2.npy', allow_pickle=True)

eval_data = AudioSet_Siamese_Eval(sounds_data, class1_index, class2_index, num_label_1, num_label_2)
eval_args = dict(shuffle = False, batch_size = 256, num_workers=8, pin_memory=True)
eval_loader = DataLoader(eval_data, **eval_args)

# Load model
model_save_dir = 'models/siamese_lambda1_1_lambda2_0.5_lambda3_0.25/'
model_num = 'epoch0.pt'
model = torch.load(model_save_dir + model_num) 
model.to(device)

# Load saved weights
weights_dir = 'epoch39.pt'
state = torch.load(model_save_dir + weights_dir)
model.load_state_dict(state['model_state_dict'])
model.eval()

test_mAP_1, test_AUC_1, test_mAP_2, test_AUC_2 = evaluate_model_stats(eval_loader, model, reduction='weighted')

print(test_mAP_1)
print(test_AUC_1)
print(test_mAP_2)
print(test_AUC_2)
