In [6]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

['data']


In [7]:
import copy
import gzip
import torch
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset
from torch.utils.data import SubsetRandomSampler, DataLoader

def torch_train_val_split(
        dataset, batch_train, batch_eval,
        val_size=.2, test_size=.3, shuffle=True, seed=42):
    # Creating data indices for training and validation splits:
    dataset_size = len(dataset)
    indices = list(range(dataset_size))
    val_split = int(np.floor(val_size * dataset_size))
    test_split = int(np.floor(test_size * val_split))
    if shuffle:
        np.random.seed(seed)
        np.random.shuffle(indices)
    
    train_indices = indices[val_split:]
    val_indices = indices[:val_split]
    test_indices = val_indices[:test_split]
    val_indices = val_indices[test_split:]
    
    # Creating PT data samplers and loaders:
    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)
    test_sampler = SubsetRandomSampler(test_indices)

    train_loader = DataLoader(dataset,
                              batch_size=batch_train,
                              sampler=train_sampler)
    val_loader = DataLoader(dataset,
                            batch_size=batch_eval,
                            sampler=val_sampler)
    test_loader = DataLoader(dataset,
                            batch_size=batch_eval,
                            sampler=test_sampler)
    
    return train_loader, val_loader, test_loader


def read_spectrogram(spectrogram_file, chroma=True):
    with gzip.GzipFile(spectrogram_file, 'r') as f:
        spectrograms = np.load(f)
    # spectrograms contains a fused mel spectrogram and chromagram
    # Decompose as follows
    return spectrograms.T


class LabelTransformer(LabelEncoder):
    def inverse(self, y):
        try:
            return super(LabelTransformer, self).inverse_transform(y)
        except:
            return super(LabelTransformer, self).inverse_transform([y])

    def transform(self, y):
        try:
            return super(LabelTransformer, self).transform(y)
        except:
            return super(LabelTransformer, self).transform([y])

        
class PaddingTransform(object):
    def __init__(self, max_length, padding_value=0):
        self.max_length = max_length
        self.padding_value = padding_value

    def __call__(self, s):
        if len(s) == self.max_length:
            return s

        if len(s) > self.max_length:
            return s[:self.max_length]

        if len(s) < self.max_length:
            s1 = copy.deepcopy(s)
            pad = np.zeros((self.max_length - s.shape[0], s.shape[1]), dtype=np.float32)
            s1 = np.vstack((s1, pad))
            return s1

        
class SpectrogramDataset(Dataset):
    def __init__(self, path, class_mapping=None, train=True, max_length=-1):
        t = 'train' if train else 'test'
        p = os.path.join(path, t)
        self.index = os.path.join(path, "{}_labels.txt".format(t))
        self.files, self.labels = self.get_files_labels(self.index, class_mapping)
        self.feats = [read_spectrogram(os.path.join(p, f)) for f in self.files]
        self.feat_dim = self.feats[0].shape[1]
        self.lengths = [len(i) for i in self.feats]
        self.max_length = max(self.lengths) if max_length <= 0 else max_length
        self.zero_pad_and_stack = PaddingTransform(self.max_length)
        self.label_transformer = LabelTransformer()
        self.labels = np.array(self.labels)

    def get_files_labels(self, txt, class_mapping):
        with open(txt, 'r') as fd:
            lines = [l.rstrip().split(',') for l in fd.readlines()[1:]]
        files, labels = [], []
        for l in lines:
            label = [float(l[1]), float(l[2]), float(l[3])]
            files.append(l[0] + '.fused.full.npy.gz')
            labels.append(label)
        return files, labels

    def __getitem__(self, item):
        l = min(self.lengths[item], self.max_length)
        return self.zero_pad_and_stack(self.feats[item]), self.labels[item], l

    def __len__(self):
        return len(self.files)
      
if __name__ == '__main__':
    specs = SpectrogramDataset('../input/data/data/multitask_dataset_beat', train=True, max_length=-1)
    train_loader, val_loader, test_loader = torch_train_val_split(specs, 32 ,32, val_size=.4)

 ## *Οι συγγραφείς του [7](https://arxiv.org/pdf/1706.05137.pdf) προτείνουν μια καθολική μηχανή εκμάθησης για διάφορα tasks (classification στο ImageNet, translation, WSJ speech recognition etc). Η προτεινόμενη αρχιτεκτονική αποτελείται κυρίως από κάποια modality nets, υπέυθυνα για την εξαγωγή κοινής αναπαράστασης για τα inputs των διάφορων tasks, έναν encoder, έναν mixer και έναν auto-regressive decoder. Τα αποτελέσματα φαίνεται να δείχνουν πως η εκμάθηση πολλών tasks ταυτόχρονα ενισχύει τη δυνατότητα ακρίβειας και γενίκευσης του μοντέλου.*

In [14]:
import torch.nn as nn
torch.set_default_tensor_type(torch.DoubleTensor)

class UniversalCNN(nn.Module):
    def __init__(self, input_dim, output_dim, channels=[12, 24, 48, 96], kernels=[3, 3, 3, 2]):
        super(UniversalCNN, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.channels = channels
        
        self.layer1 = nn.Sequential(nn.Conv2d(in_channels=self.input_dim, out_channels=channels[0], kernel_size=kernels[0]),
                                    nn.BatchNorm2d(channels[0]),
                                    nn.ReLU(inplace=True),
                                    nn.MaxPool2d(kernel_size=kernels[0]))
        self.layer2 = nn.Sequential(nn.Conv2d(in_channels=channels[0], out_channels=channels[1], kernel_size=kernels[1]),
                                    nn.BatchNorm2d(channels[1]),
                                    nn.ReLU(inplace=True),
                                    nn.MaxPool2d(kernel_size=kernels[1]))
        self.layer3 = nn.Sequential(nn.Conv2d(in_channels=channels[1], out_channels=channels[2], kernel_size=kernels[2]),
                                    nn.BatchNorm2d(channels[2]),
                                    nn.ReLU(inplace=True),
                                    nn.MaxPool2d(kernel_size=kernels[2]))
        self.layer4 = nn.Sequential(nn.Conv2d(in_channels=channels[2], out_channels=channels[3], kernel_size=kernels[3]),
                                    nn.BatchNorm2d(channels[3]),
                                    nn.ReLU(inplace=True),
                                    nn.MaxPool2d(kernel_size=kernels[3]))
        self.linear = nn.Sequential(nn.Linear(channels[3], self.output_dim),
                                    nn.Sigmoid())
    
    def forward(self, x):
        x = x.view(x.shape[0], self.input_dim, x.shape[2], x.shape[1])
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = x.view(x.shape[0], -1)#, self.channels[3]) #<- Shmantikh (?) allagh
        x = self.linear(x)
        
        return x

In [15]:
import scipy.stats as scp

def evalCNN(model, loss_function, data_loader, weight, axis=[0, 1, 2]):
    model.eval()
    with torch.no_grad():
        valscores = [[], [], []]
        gold = [[], [], []]
        metric = [[], [], []]
        for i in axis:
            valscores[i] = torch.tensor([])  
            gold[i] = torch.tensor([])
        ValLoss = 0
        for feats, labels, lens in data_loader:
            scores = model(feats)
            for i in axis:
                valscores[i] = torch.cat((valscores[i], scores.view(-1, 3)[:,i]))
                gold[i] = torch.cat((gold[i], labels[:,i]))
                ValLoss += weight[i]*loss_function(scores.view(-1, 3)[:,i], labels[:,i])
        for i in axis:
            metric[i], _ = scp.spearmanr(valscores[i].detach().numpy(), gold[i])

    return sum(metric)/3, ValLoss
            
def trainCNN(model, loss_function, optimizer, epochs, axis=[0, 1, 2], weight=[1/3, 1/2, 1]):
    best_metric = 0
    for epoch in range(epochs):
        TrainLoss = 0
        model.train()
        for feats, labels, lens in (train_loader):
            # Step 1. Remember that Pytorch accumulates gradients.
            # We need to clear them out before each instance
            model.zero_grad()
            optimizer.zero_grad()

            # Step 3. Run our forward pass.
            pred_labels = model(feats)
            #print(pred_labels.shape)
            
            # Step 4. Compute the loss, gradients, and update the parameters by
            #  calling optimizer.step()
            for i in axis:
                loss = weight[i]*loss_function(pred_labels.view(-1, 3)[:,i], labels[:,i])
                TrainLoss += loss
            loss.backward()
            optimizer.step()
        
        metric, ValLoss = evalCNN(model, loss_function, val_loader, weight, axis)
        print("Epoch: " + str(epoch) + " || train loss: " + str(TrainLoss) + " & val loss: " + str(ValLoss))
        print("Metric is: " + str(metric))
        if metric > best_metric:
            best_metric = metric
            best_model = copy.deepcopy(model)
            
    return best_model

In [18]:
model = UniversalCNN(input_dim=1, output_dim=3, kernels=[(2,4), (1,3), 2, (13,2)])
loss_function = nn.SmoothL1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

model1 = trainCNN(model, loss_function, optimizer, 10, weight=[1/8, 1/2, 1]) # arbitrarily chosen

m, _ = evalCNN(model1, loss_function, val_loader, [1/8, 1/2, 1])

print("Multitask accuracy (on validation set) is: " + str(m))

m, _ = evalCNN(model1,loss_function, test_loader, [1/8 , 1/2, 1])
               
print("Multitask accuracy (on test set) is: " + str(m))

Epoch: 0 || train loss: tensor(1.1357, grad_fn=<AddBackward0>) & val loss: tensor(0.4948)
Metric is: 0.02690643100801258
Epoch: 1 || train loss: tensor(0.9430, grad_fn=<AddBackward0>) & val loss: tensor(0.4431)
Metric is: 0.23781810885598395
Epoch: 2 || train loss: tensor(0.9481, grad_fn=<AddBackward0>) & val loss: tensor(0.4719)
Metric is: 0.28059616650054164
Epoch: 3 || train loss: tensor(1.0143, grad_fn=<AddBackward0>) & val loss: tensor(0.5030)
Metric is: 0.25847264211340143
Epoch: 4 || train loss: tensor(0.9758, grad_fn=<AddBackward0>) & val loss: tensor(0.4961)
Metric is: 0.27100194781714
Epoch: 5 || train loss: tensor(0.9728, grad_fn=<AddBackward0>) & val loss: tensor(0.4855)
Metric is: 0.27247804635461415
Epoch: 6 || train loss: tensor(0.9517, grad_fn=<AddBackward0>) & val loss: tensor(0.4989)
Metric is: 0.252608104181997
Epoch: 7 || train loss: tensor(0.9626, grad_fn=<AddBackward0>) & val loss: tensor(0.4945)
Metric is: 0.27645406076051965
Epoch: 8 || train loss: tensor(0.9589

## Το μοντέλο μας παράγει χειρότερα scores, απ' τις προηγούμενες μεθόδους που εξετάσαμε, ωστόσο δεν πειραματιστήκαμε ιδιαίτερα με τις υπερπαραμέτρους. Φυσικά, η επίδραση των βαρών με τα οποία πολ/νταί τα επιμέρους losses παίζει σημαντικό ρόλο στην ακρίβεια του μοντέλου μας. 