In [None]:
#!pip install -U pytorch_metric_learning[with-hooks]==0.9.99
#!pip install umap-learn==0.5.1

In [None]:
from utils import *
from dnn_models import MLP, flip
from dnn_models import SincNet as CNN 
from torch.autograd import Variable

In [None]:
%matplotlib inline
from pytorch_metric_learning import losses, miners, samplers, trainers, testers
from pytorch_metric_learning.utils import common_functions
import pytorch_metric_learning.utils.logging_presets as logging_presets
from pytorch_metric_learning.utils.accuracy_calculator import AccuracyCalculator
import numpy as np
import pandas as pd
import torchvision
from torchvision import datasets, transforms
import torch
import torch.nn as nn
from PIL import Image
import logging
import matplotlib.pyplot as plt
import umap
from cycler import cycler
from tqdm import tqdm
import record_keeper
import pytorch_metric_learning

In [None]:
logging.getLogger().setLevel(logging.INFO)
logging.info("VERSION %s"%pytorch_metric_learning.__version__)

### Load configs

In [None]:
options=read_conf('SincNet_TradeDesk.cfg')

In [None]:
tr_lst=options.tr_lst
te_lst=options.te_lst
pt_file=options.pt_file
class_dict_file=options.lab_dict
data_folder=options.data_folder + '/'
output_folder=options.output_folder

#[windowing]
fs=int(options.fs)
cw_len=int(options.cw_len)
cw_shift=int(options.cw_shift)

#[cnn]
cnn_N_filt=list(map(int, options.cnn_N_filt.split(',')))
cnn_len_filt=list(map(int, options.cnn_len_filt.split(',')))
cnn_max_pool_len=list(map(int, options.cnn_max_pool_len.split(',')))
cnn_use_laynorm_inp=str_to_bool(options.cnn_use_laynorm_inp)
cnn_use_batchnorm_inp=str_to_bool(options.cnn_use_batchnorm_inp)
cnn_use_laynorm=list(map(str_to_bool, options.cnn_use_laynorm.split(',')))
cnn_use_batchnorm=list(map(str_to_bool, options.cnn_use_batchnorm.split(',')))
cnn_act=list(map(str, options.cnn_act.split(',')))
cnn_drop=list(map(float, options.cnn_drop.split(',')))


#[dnn]
fc_lay=list(map(int, options.fc_lay.split(',')))
fc_drop=list(map(float, options.fc_drop.split(',')))
fc_use_laynorm_inp=str_to_bool(options.fc_use_laynorm_inp)
fc_use_batchnorm_inp=str_to_bool(options.fc_use_batchnorm_inp)
fc_use_batchnorm=list(map(str_to_bool, options.fc_use_batchnorm.split(',')))
fc_use_laynorm=list(map(str_to_bool, options.fc_use_laynorm.split(',')))
fc_act=list(map(str, options.fc_act.split(',')))

#[class]
class_lay=list(map(int, options.class_lay.split(',')))
class_drop=list(map(float, options.class_drop.split(',')))
class_use_laynorm_inp=str_to_bool(options.class_use_laynorm_inp)
class_use_batchnorm_inp=str_to_bool(options.class_use_batchnorm_inp)
class_use_batchnorm=list(map(str_to_bool, options.class_use_batchnorm.split(',')))
class_use_laynorm=list(map(str_to_bool, options.class_use_laynorm.split(',')))
class_act=list(map(str, options.class_act.split(',')))


#[optimization]
lr=float(options.lr)
batch_size=int(options.batch_size)
N_epochs=int(options.N_epochs)
N_batches=int(options.N_batches)
N_eval_epoch=int(options.N_eval_epoch)
seed=int(options.seed)

# sample rate
fs = 16000

In [None]:
# Converting context and shift in samples
wlen = int(fs * cw_len / 1000.00)
wshift = int(fs * cw_shift / 1000.00)

### Initialize models, optimizers

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
# Feature extractor CNN
trunk_arch = {
    'input_dim': wlen,
    'fs': fs,
    'cnn_N_filt': cnn_N_filt,
    'cnn_len_filt': cnn_len_filt,
    'cnn_max_pool_len':cnn_max_pool_len,
    'cnn_use_laynorm_inp': cnn_use_laynorm_inp,
    'cnn_use_batchnorm_inp': cnn_use_batchnorm_inp,
    'cnn_use_laynorm':cnn_use_laynorm,
    'cnn_use_batchnorm':cnn_use_batchnorm,
    'cnn_act': cnn_act,
    'cnn_drop':cnn_drop,          
}

trunk=CNN(trunk_arch).to(device)
trunk2=CNN(trunk_arch).to(device)
trunk_out_dim = trunk.out_dim
print(trunk.input_dim, '>>', trunk.out_dim)
trunk=torch.nn.DataParallel(trunk)

In [None]:
embedder_arch = {
    'input_dim': trunk_out_dim,
    'fc_lay': fc_lay,
    'fc_drop': fc_drop, 
    'fc_use_batchnorm': fc_use_batchnorm,
    'fc_use_laynorm': fc_use_laynorm,
    'fc_use_laynorm_inp': fc_use_laynorm_inp,
    'fc_use_batchnorm_inp':fc_use_batchnorm_inp,
    'fc_act': fc_act,
}

print(trunk_out_dim, '>>', fc_lay[-1])
embedder=torch.nn.DataParallel(MLP(embedder_arch).to(device))

In [None]:
classifier_arch = {
    'input_dim':fc_lay[-1],
    'fc_lay': class_lay,
    'fc_drop': class_drop, 
    'fc_use_batchnorm': class_use_batchnorm,
    'fc_use_laynorm': class_use_laynorm,
    'fc_use_laynorm_inp': class_use_laynorm_inp,
    'fc_use_batchnorm_inp':class_use_batchnorm_inp,
    'fc_act': class_act,
}

print(fc_lay[-1], '>>', class_lay[0])
classifier=torch.nn.DataParallel(MLP(classifier_arch).to(device))

In [None]:
# if pt_file!='none':
#     print('checkpoint_load')
#     checkpoint_load = torch.load(pt_file)
#     CNN_net.load_state_dict(checkpoint_load['trunk_model_par'])
#     DNN1_net.load_state_dict(checkpoint_load['embedder_model_par'])
#     DNN2_net.load_state_dict(checkpoint_load['classifier_model_par'])

In [None]:
# Set optimizers
trunk_optimizer = torch.optim.Adam(trunk.parameters(), lr=0.00001, weight_decay=0.0001)
embedder_optimizer = torch.optim.Adam(embedder.parameters(), lr=0.0001, weight_decay=0.0001)
classifier_optimizer = torch.optim.Adam(classifier.parameters(), lr=0.0001, weight_decay=0.0001)

# trunk_optimizer = torch.optim.RMSprop(trunk.parameters(), lr=lr,alpha=0.95, eps=1e-8) 
# embedder_optimizer = torch.optim.RMSprop(embedder.parameters(), lr=lr,alpha=0.95, eps=1e-8) 
# classifier_optimizer = torch.optim.RMSprop(classifier.parameters(), lr=lr,alpha=0.95, eps=1e-8) 

### Load datasets

In [None]:
# Loading label dictionary
lab_dict = np.load(class_dict_file, allow_pickle=True).item()

In [None]:
# training list
wav_lst_tr = ReadList(tr_lst)
snt_tr = len(wav_lst_tr)

In [None]:
# test list
wav_lst_te=ReadList(te_lst)
snt_te=len(wav_lst_te)

In [None]:
# This will be used to create train and val sets that are class-disjoint
class ClassAudioChunkRnd(torch.utils.data.Dataset):
    def __init__(self, original_dataset, lab_dict, wlen=3200, data_folder = 'data', transform = None):  
        self.data, self.targets = [], []
        for item in original_dataset:
            self.data.append(item)
            self.targets.append(lab_dict.get(item, -1))
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        wav, target = self.data[index], self.targets[index]
        wav = get_rnd_chunk(os.path.join(data_folder, wav), wlen)
        if self.transform is not None:
            wav = self.transform(wav)
        else:
            wav *= np.random.uniform(0.8, 1.2)
        wav = Variable(torch.from_numpy(wav).float().contiguous())
        return wav, np.int64(target)

In [None]:
# This will be used to create train and val sets that are class-disjoint
class ClassAudioChunk(torch.utils.data.Dataset):
    def __init__(self, original_dataset, lab_dict, wlen=3200, data_folder = 'data', transform = None):  
        _data, _targets = [], []
        self.data, self.targets = [], []
        for item in original_dataset:
            _data.append(item)
            _targets.append(lab_dict.get(item, -1))
        self.transform = transform
        
        for i in range(len(_data)):
            path = os.path.join(data_folder, _data[i])
            [signal, fs] = sf.read(path)
            beg = 0
            while beg + wlen <= len(signal) or beg == 0:
                end = beg + wlen
                if end > len(signal):
                    end = len(signal)
                self.data.append([_data[i], beg, end])
                self.targets.append(_targets[i])
                beg += wlen
                    

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        wav, target = self.data[index], self.targets[index]
        [signal, fs] = sf.read(os.path.join(data_folder, wav[0]))
        wav = signal[wav[1]:wav[2]]
        if self.transform is not None:
            wav = self.transform(wav)
        else:
            wav *= np.random.uniform(0.8, 1.2)
        wav = Variable(torch.from_numpy(wav).float().contiguous())      
        return wav, np.int64(target)

In [None]:
train_dataset = ClassAudioChunk(wav_lst_tr, lab_dict, wlen)
val_dataset = ClassAudioChunk(wav_lst_te, lab_dict, wlen)

In [None]:
len(train_dataset)

In [None]:
len(val_dataset)

In [None]:
len(set(val_dataset.targets))

In [None]:
# x = np.zeros([2, wlen])
# x[0] = train_dataset[0][0]
# x[1] = train_dataset[1][0]
# x = Variable(torch.from_numpy(x).float().contiguous())
# classifier(embedder(trunk(x)))[0]

### Create the loss, miner, sampler, and package them into dictionaries

In [None]:
# Set the loss function
loss_fn = losses.TripletMarginLoss(margin=0.1)
# loss = losses.CrossMemory(loss_fn, 1024)
loss = loss_fn 

# Set the classification loss:
classification_loss = torch.nn.CrossEntropyLoss()

# Set the mining function
miner = miners.MultiSimilarityMiner(epsilon=0.1)

# Set the dataloader sampler
sampler = samplers.MPerClassSampler(train_dataset.targets, m=1, length_before_new_iter=len(train_dataset))

# Set other training parameters
batch_size = 128

# Package the above stuff into dictionaries.
models = {"trunk": trunk, "embedder": embedder, "classifier": classifier}
optimizers = {"trunk_optimizer": trunk_optimizer, "embedder_optimizer": embedder_optimizer, "classifier_optimizer": classifier_optimizer}
loss_funcs = {"metric_loss": loss, "classifier_loss": classification_loss}
mining_funcs = {"tuple_miner": miner}

# We can specify loss weights if we want to. This is optional
loss_weights = {"metric_loss": 1, "classifier_loss": 0.5}

In [None]:
# # Remove logs if you want to train with new parameters
# !rm -rf example_logs/ example_saved_models/ example_tensorboard/

### Create the training and testing hooks

In [None]:
num_workers = 0

In [None]:
record_keeper, _, _ = logging_presets.get_record_keeper("example_logs", "example_tensorboard")
hooks = logging_presets.get_hook_container(record_keeper)
dataset_dict = {"val": val_dataset}
model_folder = "example_saved_models"

def visualizer_hook(umapper, umap_embeddings, labels, split_name, keyname, *args):
    logging.info("UMAP plot for the {} split and label set {}".format(split_name, keyname))
    label_set = np.unique(labels)
    num_classes = len(label_set)
    fig = plt.figure(figsize=(20,15))
    plt.gca().set_prop_cycle(cycler("color", [plt.cm.nipy_spectral(i) for i in np.linspace(0, 0.9, num_classes)]))
    for i in range(num_classes):
        idx = labels == label_set[i]
        plt.plot(umap_embeddings[idx, 0], umap_embeddings[idx, 1], ".", markersize=1)   
    plt.show()

# Create the tester
tester = testers.GlobalEmbeddingSpaceTester(end_of_testing_hook = hooks.end_of_testing_hook, 
                                            visualizer = umap.UMAP(), 
                                            visualizer_hook = visualizer_hook,
                                            dataloader_num_workers = num_workers,
                                            accuracy_calculator=AccuracyCalculator(k="max_bin_count")
                                           )

end_of_epoch_hook = hooks.end_of_epoch_hook(tester, 
                                            dataset_dict, 
                                            model_folder, 
                                            test_interval = 1,
                                            patience = 1)

### Create trainer

In [None]:
trainer = trainers.TrainWithClassifier(models,
                                optimizers,
                                batch_size,
                                loss_funcs,
                                mining_funcs,
                                train_dataset,
                                sampler=sampler,
                                dataloader_num_workers = num_workers,
                                loss_weights = loss_weights,
                                end_of_iteration_hook = hooks.end_of_iteration_hook,
                                end_of_epoch_hook = end_of_epoch_hook)

In [None]:
batch_size

In [None]:
# setting seed
# torch.manual_seed(seed)
# np.random.seed(seed)

### Start Tensorboard

In [None]:
%load_ext tensorboard
%tensorboard --logdir example_tensorboard

In [None]:
num_epochs = 400

In [None]:
%%time
trainer.train(num_epochs=num_epochs)