# Get dataset from another Gdrive account

In [None]:
def folder_download(folder_id):
  # authenticate
  from google.colab import auth
  auth.authenticate_user()
  # get folder_name
  from googleapiclient.discovery import build
  service = build('drive', 'v3')
  folder_name = service.files().get(fileId=folder_id).execute()['name']
  # import library and download
  !wget -qnc https://github.com/segnolin/google-drive-folder-downloader/raw/master/download.py
  from download import download_folder
  download_folder(service, folder_id, './', folder_name)
  return folder_name

# Provide the folder relevant url part 
folder_download('1Kmlx7fGMGWo76VacEc4PDPDsVkEMe1Zf') # COMPLETE DATASET


# Mount my Gdrive



In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
import sys
sys.path.insert(0,'/content/drive/MyDrive/Colab Notebooks/IEEE3D/')


# Install reuirements


In [None]:
!pip install -r 'drive/MyDrive/Colab Notebooks/IEEE3D/requirements.txt'

'''
Train our baseline model for the task2 of the L3DAS21 challenge.
This script saves the best model checkpoint, as well as a dict containing
the results (loss and history). To evaluate the performance of the trained model
according to the challenge metrics, please use evaluate_baseline_task2.py.
Command line arguments define the model parameters, the dataset to use and
where to save the obtained results.
'''


# Import libs


In [None]:
import sys, os
import time
import json
import pickle
import argparse
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
from torch.optim import Adam
import torch.utils.data as utils
from SELDNet import Seldnet, Seldnet_augmented
from utility_functions import load_model, save_model

# Define "args"

In [None]:
results_path = 'RESULTS/task2' #'Folder to write results dicts into')
checkpoint_dir = 'RESULTS/task2' #'Folder to write checkpoints into')
    
training_predictors_path='processed/task2_predictors_train.pkl'
training_target_path='processed/task2_target_train.pkl'
validation_predictors_path', type=str, default='processed/task2_predictors_validation.pkl')
validation_target_path', type=str, default='processed/task2_target_validation.pkl')
test_predictors_path', type=str, default='processed/task2_predictors_test.pkl')
test_target_path', type=str, default='processed/task2_target_test.pkl')

gpu_id=0
use_cuda='True' #str
early_stopping='True' # str
fixed_seed='False' # str
load_model=None #'Reload a previously trained model (whole task model)')
lr=0.00001
batch_size=20 #"Batch size")
sr=32000   #"Sampling rate")
patience=3 #"Patience for early stopping on validation set")
loss="L2" #"L1 or L2")

#model parameters
architecture='seldnet' #"can be seldnet or seldnet_augmented")
time_dim=600
freq_dim=256
input_channels=4
output_classes=14
pool_size=[[8,2],[8,2],[2,2]]
pool_time=False
n_cnn_filters=64
rnn_size=128
n_rnn=1
fc_size=128
dropout_perc=0.0
verbose=True

## Running environment 

In [None]:
device = 'cuda:0'

fixed_seed = False
if fixed_seed:
    seed = 1
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

# Evaluate network (validation)

In [None]:
def evaluate(model, device, criterion, dataloader):
    #compute loss without backprop
    model.eval()
    test_loss = 0.
    with tqdm(total=len(dataloader) // batch_size) as pbar, torch.no_grad():
        for example_num, (x, target) in enumerate(dataloader):
            target = target.to(device)
            x = x.to(device)
            outputs_sed, outputs_doa = model(x)
            loss = criterion(outputs_doa,  target[:,:,:126])
            test_loss += (1. / float(example_num + 1)) * (loss - test_loss)
            pbar.set_description("Current loss: {:.4f}".format(test_loss))
            pbar.update(1)
    return test_loss

# Load Dataset


In [None]:

print ('\nLoading dataset')

with open(training_predictors_path, 'rb') as f:
    training_predictors = pickle.load(f)
with open(training_target_path, 'rb') as f:
    training_target = pickle.load(f)
with open(validation_predictors_path, 'rb') as f:
    validation_predictors = pickle.load(f)
with open(validation_target_path, 'rb') as f:
    validation_target = pickle.load(f)
with open(test_predictors_path, 'rb') as f:
    test_predictors = pickle.load(f)
with open(test_target_path, 'rb') as f:
    test_target = pickle.load(f)

training_predictors = np.array(training_predictors)
training_target = np.array(training_target)
validation_predictors = np.array(validation_predictors)
validation_target = np.array(validation_target)
test_predictors = np.array(test_predictors)
test_target = np.array(test_target)

print ('\nShapes:')
print ('Training predictors: ', training_predictors.shape)
print ('Validation predictors: ', validation_predictors.shape)
print ('Test predictors: ', test_predictors.shape)

#convert to tensor
training_predictors = torch.tensor(training_predictors).float()
validation_predictors = torch.tensor(validation_predictors).float()
test_predictors = torch.tensor(test_predictors).float()
training_target = torch.tensor(training_target).float()
validation_target = torch.tensor(validation_target).float()
test_target = torch.tensor(test_target).float()
#build dataset from tensors
tr_dataset = utils.TensorDataset(training_predictors, training_target)
val_dataset = utils.TensorDataset(validation_predictors, validation_target)
test_dataset = utils.TensorDataset(test_predictors, test_target)
#build data loader from dataset
tr_data = utils.DataLoader(tr_dataset, batch_size, shuffle=True, pin_memory=True)
val_data = utils.DataLoader(val_dataset, batch_size, shuffle=False, pin_memory=True)
test_data = utils.DataLoader(test_dataset, batch_size, shuffle=False, pin_memory=True)

# Load MODEL


In [None]:
architecture = 'seldnet' # WANDB (test)

if architecture == 'seldnet':
    model = Seldnet(time_dim=time_dim, freq_dim=freq_dim,
                              input_channels=input_channels, output_classes=output_classes,
                              pool_size=pool_size, pool_time=pool_time,
                              n_cnn_filters=n_cnn_filters, rnn_size=rnn_size, 
                              n_rnn=n_rnn,fc_size=fc_size, dropout_perc=dropout_perc, 
                              verbose=verbose)
elif architecture == 'seldnet_augmented':
    model = Seldnet_augmented()


print("Moving model to gpu")
model = model.to(device)

#compute number of parameters
model_params = sum([np.prod(p.size()) for p in model.parameters()])
print ('Total paramters: ' + str(model_params))

#set up the loss function
if loss == "L1":
    criterion = nn.L1Loss()
elif loss == "L2":
    criterion = nn.MSELoss()
else:
    raise NotImplementedError("Couldn't find this loss!")

#set up optimizer
optimizer = Adam(params=model.parameters(), lr=lr)

#set up training state dict that will also be saved into checkpoints
state = {"step" : 0,
          "worse_epochs" : 0,
          "epochs" : 0,
          "best_loss" : np.Inf}

#load model checkpoint if desired
if load_model is not None:
    print("Continuing training full model from checkpoint " + str(load_model))
    state = load_model(model, optimizer, load_model, use_cuda)

# Train model


In [None]:
print('TRAINING START')
    train_loss_hist = []
    val_loss_hist = []
    while state["worse_epochs"] < patience:
        print("Training one epoch from iteration " + str(state["step"]))
        avg_time = 0.
        model.train()
        train_loss = 0.
        with tqdm(total=len(tr_dataset) // batch_size) as pbar:
            for example_num, (x, target) in enumerate(tr_data):
                target = target.to(device)
                x = x.to(device)
                t = time.time()
                # Compute loss for each instrument/model
                optimizer.zero_grad()
                outputs_sed, outputs_doa = model(x)

                f_loss = criterion(outputs_doa, target[:,:,:126]) 


                f_loss.backward()

                train_loss += (1. / float(example_num + 1)) * (f_loss - train_loss)
                optimizer.step()
                state["step"] += 1
                t = time.time() - t
                avg_time += (1. / float(example_num + 1)) * (t - avg_time)

                pbar.update(1)

            #PASS VALIDATION DATA
            val_loss = evaluate(model, device, criterion, val_data)
            print("VALIDATION FINISHED: LOSS: " + str(val_loss))

            # EARLY STOPPING CHECK
            checkpoint_path = os.path.join(checkpoint_dir, "checkpoint")

            if val_loss >= state["best_loss"]:
                state["worse_epochs"] += 1
            else:
                print("MODEL IMPROVED ON VALIDATION SET!")
                state["worse_epochs"] = 0
                state["best_loss"] = val_loss
                state["best_checkpoint"] = checkpoint_path

                # CHECKPOINT
                print("Saving model...")
                save_model(model, optimizer, state, checkpoint_path)

            state["epochs"] += 1
            #state["worse_epochs"] = 200
            train_loss_hist.append(train_loss.cpu().detach().numpy())
            val_loss_hist.append(val_loss.cpu().detach().numpy())

# Load best model and compute loss for all sets


In [None]:
print("TESTING")
# Load best model based on validation loss
state = load_model(model, None, state["best_checkpoint"], use_cuda)
#compute loss on all set_output_size
train_loss = evaluate(model, device, criterion, tr_data)
val_loss = evaluate(model, device, criterion, val_data)
test_loss = evaluate(model, device, criterion, test_data)

#PRINT AND SAVE RESULTS
results = {'train_loss': train_loss.cpu().detach().numpy(),
            'val_loss': val_loss.cpu().detach().numpy(),
            'test_loss': test_loss.cpu().detach().numpy(),
            'train_loss_hist': train_loss_hist,
            'val_loss_hist': val_loss_hist}

print ('RESULTS')
for i in results:
    if 'hist' not in i:
        print (i, results[i])
out_path = os.path.join(results_path, 'results_dict.json')
np.save(out_path, results)