In [None]:
# Import Libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import librosa.display as display
import matplotlib.pyplot as plt
import os
import copy

import sklearn
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from scipy.stats import spearmanr

import seaborn as sns
sns.set_style("darkgrid", {"axes.facecolor": ".9"})
import warnings 
warnings.filterwarnings('ignore')
from collections import Counter

# Torch
import torch
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler
from torch.utils.data import Dataset
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import joblib

In [None]:
# To add the competition data Click File->Add or Upload data-> Search by URL -> https://www.kaggle.com/geoparslp/patreco3-multitask-affective-music

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
for dirname, _, filenames in os.walk('./kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

## Χρήσιμες Συναρτήσεις Προπαρασκευής

In [None]:
##################################################################################
# HINT: Use this class mapping to merge similar classes and ignore classes that do not work very well
CLASS_MAPPING = {
    "Rock": "Rock",
    "Psych-Rock": "Rock",
    "Indie-Rock": None,
    "Post-Rock": "Rock",
    "Psych-Folk": "Folk",
    "Folk": "Folk",
    "Metal": "Metal",
    "Punk": "Metal",
    "Post-Punk": None,
    "Trip-Hop": "Trip-Hop",
    "Pop": "Pop",
    "Electronic": "Electronic",
    "Hip-Hop": "Hip-Hop",
    "Classical": "Classical",
    "Blues": "Blues",
    "Chiptune": "Electronic",
    "Jazz": "Jazz",
    "Soundtrack": None,
    "International": None,
    "Old-Time": None,
}


def torch_train_val_split(
    dataset, batch_train, batch_eval, val_size=0.2, shuffle=True, seed=420
):
    # Creating data indices for training and validation splits:
    dataset_size = len(dataset)
    indices = list(range(dataset_size))
    val_split = int(np.floor(val_size * dataset_size))
    if shuffle:
        np.random.seed(seed)
        np.random.shuffle(indices)
    train_indices = indices[val_split:]
    val_indices = indices[:val_split]

    # Creating PT data samplers and loaders:
    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)

    train_loader = DataLoader(dataset, batch_size=batch_train, sampler=train_sampler)
    val_loader = DataLoader(dataset, batch_size=batch_eval, sampler=val_sampler)
    return train_loader, val_loader


def read_spectrogram(spectrogram_file, mode = 'fused'):
    
    if mode == 'mel_spec': # mel-spectogram only
        spectrograms = np.load(spectrogram_file)[:128]
    elif mode == 'chroma': # chromagram only
        spectrograms = np.load(spectrogram_file)[128:]
    else: # fused mel spectrogram and chromagram
        spectrograms = np.load(spectrogram_file)
    return spectrograms.T


class LabelTransformer(LabelEncoder):
    def inverse(self, y):
        try:
            return super(LabelTransformer, self).inverse_transform(y)
        except:
            return super(LabelTransformer, self).inverse_transform([y])

    def transform(self, y):
        try:
            return super(LabelTransformer, self).transform(y)
        except:
            return super(LabelTransformer, self).transform([y])


class PaddingTransform(object):
    def __init__(self, max_length, padding_value=0):
        self.max_length = max_length
        self.padding_value = padding_value

    def __call__(self, s):
        if len(s) == self.max_length:
            return s

        if len(s) > self.max_length:
            return s[: self.max_length]

        if len(s) < self.max_length:
            s1 = copy.deepcopy(s)
            pad = np.zeros((self.max_length - s.shape[0], s.shape[1]), dtype=np.float32)
            s1 = np.vstack((s1, pad))
            return s1


class SpectrogramDataset(Dataset):
    def __init__(
        self, path, class_mapping=None, train=True, max_length=-1, regression=None, mode='mel_spec'
    ):
        t = "train" if train else "test"
        p = os.path.join(path, t)
        self.regression = regression

        self.index = os.path.join(path, "{}_labels.txt".format(t))
        self.files, labels = self.get_files_labels(self.index, class_mapping)
        self.feats = [read_spectrogram(os.path.join(p, f), mode) for f in self.files]
        self.feat_dim = self.feats[0].shape[1]
        self.lengths = [len(i) for i in self.feats]
        self.max_length = max(self.lengths) if max_length <= 0 else max_length
        self.zero_pad_and_stack = PaddingTransform(self.max_length)
        self.label_transformer = LabelTransformer()
        if isinstance(labels, (list, tuple)):
            if not regression:
                self.labels = np.array(
                    self.label_transformer.fit_transform(labels)
                ).astype("int64")
            else:
                self.labels = np.array(labels).astype("float64")
    
    def get_files_labels(self, txt, class_mapping):
        with open(txt, "r") as fd:
            lines = [l.rstrip().split("\t") for l in fd.readlines()[1:]]
        files, labels = [], []
        for l in lines:
            if self.regression:
                l = l[0].split(",")
                files.append(l[0] + ".fused.full.npy")
                labels.append(l[self.regression])
                continue
            label = l[1]
            if class_mapping:
                label = class_mapping[l[1]]
            if not label:
                continue
            _id = l[0].split('.')[0]
            fname = '{}.fused.full.npy'.format(_id)
            files.append(fname)
            labels.append(label)
        return files, labels

    def __getitem__(self, item):
        length = min(self.lengths[item], self.max_length)
        return self.zero_pad_and_stack(self.feats[item]), self.labels[item], length

    def __len__(self):
        return len(self.labels)

##################################################################################

In [None]:
##### Load mel spectograms (not beat_synced) #####

# Load Train Dataset
mel_train_set = SpectrogramDataset('../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms/', train=True,
                                     class_mapping=CLASS_MAPPING, max_length=-1,
                                     mode = 'mel_spec')
# Train and Val loaders - Batch Size = 32
train_loader_mel, val_loader_mel = torch_train_val_split(mel_train_set, 32, 32, val_size=.33)

# Load Test Dataset
mel_test_set = SpectrogramDataset('../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms/', train=False,
                                             class_mapping=CLASS_MAPPING, max_length=-1,
                                             mode = 'mel_spec')
# Test Loader - Batch Size = 1
test_loader_mel = DataLoader(mel_test_set, batch_size=16)

In [None]:
# Lab 2 Basic LSTM Class
class BasicLSTM(nn.Module):
    def __init__(self, input_dim, hidden_size, output_dim, num_layers, bidirectional=False, dropout = 0):
        super(BasicLSTM, self).__init__()
        self.bidirectional = bidirectional
        self.feature_size = hidden_size * 2 if self.bidirectional else hidden_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # --------------- Insert your code here ---------------- #
        # Initialize the LSTM, Dropout, Output layers
        self.lstm = nn.LSTM(input_dim, hidden_size, num_layers, bidirectional=self.bidirectional, batch_first=True, dropout=dropout)
        self.linear = nn.Linear(self.feature_size, output_dim)

    def forward(self, x, lengths):
        """ 
            x : 3D numpy array of dimension N x L x D
                N: batch index
                L: sequence index
                D: feature index

            lengths: N x 1
         """
        
        # --------------- Insert your code here ---------------- #
        
        # You must have all of the outputs of the LSTM, but you need only the last one (that does not exceed the sequence length)
        # To get it use the last_timestep method
        # Then pass it through the remaining network
        
        if self.bidirectional:
            num_layers = 2*self.num_layers
        else:
            num_layers = self.num_layers
            
        DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        h0 = torch.zeros(num_layers, x.size(0), self.hidden_size).to(DEVICE).double()
        c0 = torch.zeros(num_layers, x.size(0), self.hidden_size).to(DEVICE).double()

        # Forward through LSTM
        hn, _ = self.lstm(x, (h0, c0))
        # Final Linear Layer - pass last timestep
        last_outputs = self.linear(self.last_timestep(hn, lengths, self.bidirectional))

        return last_outputs

    def last_timestep(self, outputs, lengths, bidirectional=False):
        """
            Returns the last output of the LSTM taking into account the zero padding
        """
        if bidirectional:
            forward, backward = self.split_directions(outputs)
            last_forward = self.last_by_index(forward, lengths)
            last_backward = backward[:, 0, :]
            # Concatenate and return - maybe add more functionalities like average
            return torch.cat((last_forward, last_backward), dim=-1)

        else:
            return self.last_by_index(outputs, lengths)

    @staticmethod
    def split_directions(outputs):
        direction_size = int(outputs.size(-1) / 2)
        forward = outputs[:, :, :direction_size]
        backward = outputs[:, :, direction_size:]
        return forward, backward

    @staticmethod
    def last_by_index(outputs, lengths):
        # Index of the last output for each sequence.
        idx = (lengths - 1).view(-1, 1).expand(outputs.size(0),
                                               outputs.size(2)).unsqueeze(1)
        return outputs.gather(1, idx).squeeze()

In [None]:
# Function that Evaluates a Convolutional Neural Network
def eval_NN(dataloader, model, criterion, regression = False, is_CNN = True):
    
    total_loss = 0.0
    model.eval() # switch to evaluation mode
    
    y_gold = []
    y_pred = []
    
    with torch.no_grad(): # don't keep gradients
        for idx, batch in enumerate(dataloader, 1):
            
            (inputs, labels, lengths) = batch

            # Move the batch tensors to the right device
            inputs = inputs.to(device).float()
            lengths = lengths.to(device)
            
            if not regression:
                labels = labels.to(device).long()
            else:
                labels = labels.to(device).float()
            
            # Forward Pass
            if not is_CNN:
                inputs = inputs.to(device).double()
                labels = labels.to(device).double()
                y_preds = model(inputs, lengths)
            else:
                y_preds = model(inputs)

            # Compute Loss
            loss = criterion(y_preds, labels)
            
            # Prediction: argmax of aposterioris
            if regression:
                prediction = y_preds
            else:
                prediction = torch.argmax(y_preds, dim=1)
            
            # Collect Loss and labels
            total_loss += loss.data.item()
            
            y_pred.append(prediction.cpu().numpy())
            y_gold.append(labels.cpu().numpy())

    return total_loss / idx, (y_gold, y_pred)

In [None]:
def train_LSTM(train_loader, val_loader, epochs, save_dir, lr = 0.001, 
              weight_decay = 0.0001, early_stopping=True):
    
    model_lstm = BasicLSTM(input_dim = 128, hidden_size = 128, 
                           output_dim = 1, num_layers = 4,
                           bidirectional=True, dropout = 0.3).double().to(device)
    
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model_lstm.parameters(), lr=lr, weight_decay=weight_decay)

    loss_values_train = []
    loss_values_val = []
    counter=0
    best_val_loss = 999999
    max_increases = 10   
    
    for i in range(epochs):
        running_loss_train=0.0
        train_losses=[]
        
        for inputs, labels, lengths in train_loader:
            model_lstm.train()   
            inputs, labels, lengths = inputs.to(device), labels.to(device), lengths.to(device)
            
            model_lstm.zero_grad()
            output =  model_lstm(inputs.double(), lengths)
            loss = criterion(output.squeeze(), labels.double())
            train_losses.append(loss.item())
            loss.backward()
            optimizer.step()
            
        val_loss, (y_gold, y_pred) = eval_NN(val_loader, model_lstm, criterion, 
                                             regression=True, is_CNN = False)

        # Collect Losses
        loss_values_val.append(val_loss)
        loss_values_train.append(np.mean(train_losses))


        print("Epoch: {}/{}:".format(i+1, epochs),
              "Train Loss: {:.6f} - ".format(loss.item()),
              "Val Loss: {:.6f}".format(val_loss))
        
        
        # Apply Early Stopping Techniques
        if val_loss < best_val_loss:
            torch.save(model_lstm.state_dict(),save_dir) # checkpoint
            best_val_loss = val_loss
            gold = y_gold
            pred = y_pred
            counter = 0 # reset counter
        else:
            counter += 1

        if early_stopping:
            if counter == max_increases: # 10 times in a row no loss improvement - break
                print('Early Stopping')
                break
            
    plt.figure()
    plt.plot(range(len(loss_values_train)),loss_values_train, label = 'Train Loss')
    plt.plot(range(len(loss_values_val)), loss_values_val, label = 'Validation Loss')
    plt.xlabel('Epochs')
    plt.legend()
    plt.show()
    
    torch.save(model_lstm.state_dict(),save_dir) # save best model

## Βήμα 7: 2D CNNs

Υλοποιούμε ένα 2D CNN με 4 επίπεδα (layers) που θα επεξεργάζεται το φασματογράφημα σαν
μονοκάναλη εικόνα. Το εκπαιδεύουμε στο train + validation set και αναφέρουμε τα αποτελέσματα
στο test set. Κάθε επίπεδο θα πραγματοποιεί τις εξής λειτουργίες (operations) με αυτή τη σειρά:
1) 2D convolution <br/>
2) Batch normalization <br/>
3) ReLU activation <br/>
4) Max pooling

In [None]:
class CNN_2D(nn.Module):
    def __init__(self, output_dim = 10):
        super(CNN_2D,self).__init__()
        
        self.main_body = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=4),

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=4),
            
        )
        
        # Fully Connected Layer
        self.fc = nn.Sequential(
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(in_features=10240, out_features=output_dim) #10240
        )

    def forward(self, x):
        #we want x to have dimensions [batch_size,1,]
        x=x.transpose(1,2)
        x=torch.unsqueeze(x,1)
        
        for layer in self.main_body:
            x = layer(x)
        
        x = x.view(x.size(0),-1) # flatten data
        
        for layer in self.fc:
            x=layer(x)
        return x

In [None]:
# Main Pipeline that Trains on Train Set and Evaluates on Validation Set
def train_CNN(data_loader, val_loader, epochs, batch_size, save_dir, lr = 0.001, 
              weight_decay = 0.0001, regression=False, early_stopping = True, load_model = False):

    # Define Hyperparameters and CNN Model        
    
    if not regression:
        model_cnn = CNN_2D(output_dim = 10)
        criterion=nn.CrossEntropyLoss()
    else:
        model_cnn = CNN_2D(output_dim = 1)
        criterion  = nn.MSELoss()
    
    if load_model: # load model for transfer learning
        
        # Load CNN Model from Step 7 train on fma spectrograms
        model_cnn = CNN_2D(output_dim=10).to(device).float()
        model_cnn.load_state_dict(torch.load("CNN_step7"))
        
        # Freeze parameters except from last FC layer
        for weight in model_cnn.parameters():
            weight.requires_grad = False
            
        model_cnn.fc = nn.Sequential(
                    nn.ReLU(),
                    nn.Dropout(),
                    nn.Linear(in_features=10240, out_features=1) #10240
                ).to(device)
        save_dir = "CNN_transfer"
        
    model_cnn.to(device).float()
    optimizer = torch.optim.Adam(model_cnn.parameters(), lr=lr,weight_decay=0.0001)
    
    train_loss_min = np.Inf
    loss_values_train = []
    loss_values_val = []
    
    # Early Stopping Hyperparameters
    counter=0
    best_val_loss = 999999
    max_increases = 10
    
    model_cnn.train()    
    for i in range(epochs):
        running_loss_train=0.0
        running_loss_val=0.0
        train_losses=[]
        for idx, batch in enumerate(data_loader):
             # Unpack Batch
            (inputs, labels, lengths) = batch
            inputs, labels, lengths= inputs.to(device), labels.to(device),lengths.to(device)
            model_cnn.zero_grad()
            output =  model_cnn(inputs.float())
            
            if not regression:
                loss = criterion(output.squeeze(), labels.long())
            else:
                loss = criterion(output.squeeze(), labels.float())
            
            running_loss_train =+ loss.item() * batch_size
            train_losses.append(loss.item())
            loss.backward()
            optimizer.step()
            
        val_loss, (y_gold, y_pred)  = eval_NN(val_loader, model_cnn, criterion, regression)
        loss_values_val.append(val_loss)

        print("Epoch: {}/{}:".format(i+1, epochs),
              "Train Loss: {:.6f} - ".format(loss.item()),
              "Val Loss: {:.6f}".format(val_loss))
        
        loss_values_train.append(np.mean(train_losses))
        
        # Apply Early Stopping Techniques
        if val_loss < best_val_loss:
            torch.save(model_cnn.state_dict(),save_dir) # checkpoint
            best_val_loss = val_loss
            gold = y_gold
            pred = y_pred
            counter = 0 # reset counter
        else:
            counter += 1

        if early_stopping:
            if counter == max_increases: # 10 times in a row no loss improvement - break
                print('Early Stopping')
                break
            
    plt.figure()
    plt.plot(range(len(loss_values_train)),loss_values_train, label = 'Train Loss')
    plt.plot(range(len(loss_values_val)), loss_values_val, label = 'Validation Loss')
    plt.xlabel('Epochs')
    plt.legend()
    plt.show()
    
    torch.save(model_cnn.state_dict(),save_dir) # save best model

In [None]:
train_CNN(train_loader_mel, val_loader_mel, epochs = 30, batch_size = 32, save_dir = 'CNN_step7', regression=False)

In [None]:
# Classification Report on Test
def test_report_CNN(model, test_loader, model_dir, criterion, regression = False):
    model.load_state_dict(torch.load(model_dir))
    test_loss, (y_gold, y_pred) = eval_NN(test_loader, model, criterion, regression)    
    return classification_report(np.concatenate(y_gold), np.concatenate(y_pred))

In [None]:
cnn_step7 = CNN_2D(10).to(device).float()
criterion = nn.CrossEntropyLoss()
print(test_report_CNN(cnn_step7, test_loader_mel, 'CNN_step7', criterion, regression = False))

## Βήμα 8: Εκτίμηση συναισθήματος - συμπεριφοράς με παλινδρόμηση

In [None]:
# Load Multitask Datasets for Valence (1), Energy (2), Danceability (3)

valence_dataset = SpectrogramDataset('../input/patreco3-multitask-affective-music/data/multitask_dataset/',
                            train=True,
                            class_mapping=None, max_length=-1,
                            regression=1)

energy_dataset = SpectrogramDataset('../input/patreco3-multitask-affective-music/data/multitask_dataset/',
                            train=True,
                            class_mapping=None, max_length=-1,
                            regression=2)

danceability_dataset = SpectrogramDataset('../input/patreco3-multitask-affective-music/data/multitask_dataset/',
                            train=True,
                            class_mapping=None, max_length=-1,
                            regression=3)

# Train - Val Split for each one
train_loader_valence, val_loader_valence = torch_train_val_split(valence_dataset, batch_train=32, batch_eval=32, val_size=.2)
train_loader_energy, val_loader_energy = torch_train_val_split(energy_dataset, batch_train=32, batch_eval=32, val_size=.2)
train_loader_danceability, val_loader_danceability = torch_train_val_split(danceability_dataset, batch_train=32, batch_eval=32, val_size=.2)

In [None]:
def test_lstm_multitask(test_loader,model_dir):
    model_lstm = BasicLSTM(input_dim = 128, hidden_size = 128,  bidirectional = True,
                       output_dim = 1, num_layers = 4, dropout = 0.3)
    model_lstm.to(device).double()
    model_lstm.load_state_dict(torch.load(model_dir))

    test_losses = []
    y_pred_test=[]
    y_true=[]
    model_lstm.eval()
    for inputs, labels,lengths in test_loader:
        inputs,labels,lengths= inputs.to(device), labels.to(device),lengths.to(device)
        output = model_lstm(inputs.double(),lengths)
        y_pred_test.append(output.tolist())
        y_true.append(labels.tolist())
        
    true = np.array(y_true).flatten()
    pred = np.array(y_pred_test).flatten()
    
    # Scatter Plot Predictions - Gold Labels
    plt.scatter(true, pred)
    plt.xlabel('y_true')
    plt.ylabel('y_pred')
    golden_line = np.linspace(0,1,1000)
    plt.plot(golden_line,golden_line, '--', color='m')
    plt.show()
    
    rho= spearmanr(np.array(y_true).flatten(),np.array(y_pred_test).flatten()).correlation
    print('\nTest set: Spearman Correlation: {:.6f} \n'.format(rho))
    return rho

In [None]:
# Evaluate with Spearman Correlation
def test_cnn_multitask(test_loader, model_dir):
    
    # Fetch Model
    model_cnn=CNN_2D(output_dim=1).to(device).float()
    model_cnn.load_state_dict(torch.load(model_dir))
    
    test_losses = []
    y_pred_test=[]
    y_true=[]
    model_cnn.eval()
    for inputs, labels,lengths in test_loader:
        inputs,labels,lengths= inputs.to(device), labels.to(device),lengths.to(device)
        output = model_cnn(inputs.float())
        y_pred_test.append(output.data.tolist())
        y_true.append(labels.tolist())
        
    true = np.array(y_true).flatten()
    pred = np.array(y_pred_test).flatten()
    
    # Scatter Plot Predictions - Gold Labels
    plt.scatter(true, pred)
    plt.xlabel('y_true')
    plt.ylabel('y_pred')
    golden_line = np.linspace(0,1,1000)
    plt.plot(golden_line,golden_line, '--', color='m')
    plt.show()
    
    # Ger Spearmann Correlation
    rho= spearmanr(true,pred).correlation
    print('\nTest set: Spearman Correlation: {:.6f} \n'.format(rho))
    return rho

### (a) Valence Label

In [None]:
train_LSTM(train_loader_valence, val_loader_valence, epochs = 30,
           save_dir = 'LSTM_valence', early_stopping=False)

In [None]:
rho_LSTM_valence = test_lstm_multitask(val_loader_valence, 'LSTM_valence')

In [None]:
train_CNN(train_loader_valence, val_loader_valence, epochs = 30, batch_size = 16, save_dir = 'CNN_valence', regression=True, early_stopping = False)

In [None]:
rho_CNN_valence = test_cnn_multitask(val_loader_valence,'CNN_valence')

### (b) Energy

In [None]:
train_LSTM(train_loader_energy, val_loader_energy, epochs = 30,
           save_dir = 'LSTM_energy', early_stopping=False)

In [None]:
rho_LSTM_energy = test_lstm_multitask(val_loader_energy, 'LSTM_energy')

In [None]:
# CNN
train_CNN(train_loader_energy, val_loader_energy, epochs = 30, batch_size = 16, save_dir = 'CNN_energy', regression=True, early_stopping = False)

In [None]:
rho_CNN_energy = test_cnn_multitask(val_loader_energy,'CNN_energy')

### (c) Danceability

In [None]:
train_LSTM(train_loader_danceability, val_loader_danceability, epochs = 30,
           save_dir = 'LSTM_danceability', early_stopping=False)

In [None]:
rho_LSTM_dance = test_lstm_multitask(val_loader_danceability, 'LSTM_danceability')

In [None]:
# CNN
train_CNN(train_loader_danceability, val_loader_danceability, epochs = 30, batch_size = 16, save_dir = 'CNN_dance', regression=True, early_stopping = False)

In [None]:
rho_CNN_dance = test_cnn_multitask(val_loader_danceability,'CNN_dance')

### Mean Spearman Correlations per Model

In [None]:
rho_LSTM_mean = np.mean([rho_LSTM_valence, rho_LSTM_energy, rho_LSTM_dance])
print('LSTM Mean Spearman Correlation: ', rho_LSTM_mean)

In [None]:
rho_CNN_mean = np.mean([rho_CNN_valence, rho_CNN_energy, rho_CNN_dance])
print('CNN Mean Spearman Correlation: ', rho_CNN_mean)

## Βήμα 9: Transfer Learning

In [None]:
# Train for only a few epochs
train_CNN(train_loader_energy, val_loader_energy, epochs = 10, 
          batch_size = 32, save_dir = 'CNN_step7', 
          regression=True, early_stopping = False, load_model = True)

In [None]:
rho_CNN_energy_transfer = test_cnn_multitask(val_loader_energy,'CNN_transfer')

## Βήμα 10: Multitask Learning

In [None]:
class MultitaskDataset(Dataset):
    def __init__(self, path, max_length=-1, read_spec_fn=read_spectrogram, mode = 'mel_spec',
                 label_type='all', train = True):
        if train:
            p = os.path.join(path, 'train')
        else:
            p = os.path.join(path, 'test')

        self.label_type = label_type
        self.index = os.path.join(path, "train_labels.txt")
        self.files, labels = self.get_files_labels(self.index)
        self.feats = [read_spec_fn(os.path.join(p, f), mode) for f in self.files]
        self.feat_dim = self.feats[0].shape[1]
        self.lengths = [len(i) for i in self.feats]
        self.max_length = max(self.lengths) if max_length <= 0 else max_length
        self.zero_pad_and_stack = PaddingTransform(self.max_length) 
        if isinstance(labels, (list, tuple)):
            self.labels = np.array(labels)

    def get_files_labels(self, txt):
        with open(txt, 'r') as fd:
            lines = [l.split(',') for l in fd.readlines()[1:]]
        files, labels = [], []
        for l in lines:
            if self.label_type == 'valence':
                labels.append(float(l[1]))
            elif self.label_type == 'energy':
                labels.append(float(l[2]))
            elif self.label_type == 'danceability':
                labels.append(float(l[3].strip("\n")))
            else:
                labels.append([float(l[1]), float(l[2]), float(l[3].strip("\n"))])
            # Kaggle automatically unzips the npy.gz format so this hack is needed
            _id = l[0]
            npy_file = '{}.fused.full.npy'.format(_id)
            files.append(npy_file)
        return files, labels
    

    def __getitem__(self, item):
        # Return a tuple in the form (padded_feats, valence, energy, danceability, length)
        l = min(self.lengths[item], self.max_length)
        return self.zero_pad_and_stack(self.feats[item]), self.labels[item], l

    def __len__(self):
        return len(self.labels)

In [None]:
multitask_dataset = MultitaskDataset('../input/patreco3-multitask-affective-music/data/multitask_dataset/',
                            max_length=-1, label_type = 'all')

In [None]:
train_loader_multitask, val_loader_multitask = torch_train_val_split(multitask_dataset,
                                                                     batch_train=32, 
                                                                     batch_eval=32, val_size=.2)

In [None]:
# Our Loss Function as sum of all losses for valence, energy, danceability
class Multitask_loss(nn.Module):
    def forward(preds, golds):
        criterion = nn.MSELoss()
        loss_valence = criterion(preds[:,0], golds[:,0])
        loss_energy = criterion(preds[:,1], golds[:,1])
        loss_danceability = criterion(preds[:,2], golds[:,2])
        
        return loss_valence + loss_energy + loss_danceability

In [None]:
# Function that Evaluates a Convolutional Neural Network
def eval_CNN_multitask(dataloader, model):
    
    total_loss = 0.0
    model.eval() # switch to evaluation mode
    
    y_gold = []
    y_pred = []
    
    with torch.no_grad(): # don't keep gradients
        for idx, batch in enumerate(dataloader, 1):
            
            (inputs, labels, lengths) = batch

            # Move the batch tensors to the right device
            inputs = inputs.to(device).float()
            labels = labels.to(device).float()
            
            # Forward Pass
            y_preds = model(inputs)

            # Compute Loss
            loss = Multitask_loss.forward(y_preds, labels)
            prediction = y_preds
            
            # Collect Loss and labels
            total_loss += loss.data.item()
            
            y_pred.append(prediction.cpu().numpy())
            y_gold.append(labels.cpu().numpy())

    return total_loss / idx, (y_gold, y_pred)

In [None]:
# Main Pipeline that Trains on Train Set and Evaluates on Validation Set
def train_CNN_multitask(data_loader, val_loader, epochs, batch_size, save_dir, lr = 0.001, 
              weight_decay = 0.0001, early_stopping = True):

    # Define Hyperparameters and CNN Model        
    model_cnn = CNN_2D(output_dim = 3)
        
    model_cnn.to(device).float()
    optimizer = torch.optim.Adam(model_cnn.parameters(), lr=lr,weight_decay=0.0001)
    
    train_loss_min = np.Inf
    loss_values_train = []
    loss_values_val = []
    
    # Early Stopping Hyperparameters
    counter=0
    best_val_loss = 999999
    max_increases = 10
    
    model_cnn.train()    
    for i in range(epochs):
        running_loss_train=0.0
        running_loss_val=0.0
        train_losses=[]
        for idx, batch in enumerate(data_loader):
             # Unpack Batch
            (inputs, labels, lengths) = batch
            inputs, labels, lengths= inputs.to(device), labels.to(device),lengths.to(device)
            model_cnn.zero_grad()
            output =  model_cnn(inputs.float())
            loss = Multitask_loss.forward(output.squeeze(), labels.float())
            
            running_loss_train =+ loss.item() * batch_size
            train_losses.append(loss.item())
            loss.backward()
            optimizer.step()
            
        val_loss, (y_gold, y_pred)  = eval_CNN_multitask(val_loader, model_cnn)
        loss_values_val.append(val_loss)

        print("Epoch: {}/{}:".format(i+1, epochs),
              "Train Loss: {:.6f} - ".format(loss.item()),
              "Val Loss: {:.6f}".format(val_loss))
        
        loss_values_train.append(np.mean(train_losses))
        
        # Apply Early Stopping Techniques
        if val_loss < best_val_loss:
            torch.save(model_cnn.state_dict(),save_dir) # checkpoint
            best_val_loss = val_loss
            gold = y_gold
            pred = y_pred
            counter = 0 # reset counter
        else:
            counter += 1

        if early_stopping:
            if counter == max_increases: # 10 times in a row no loss improvement - break
                print('Early Stopping')
                break
            
    plt.figure()
    plt.plot(range(len(loss_values_train)),loss_values_train, label = 'Train Loss')
    plt.plot(range(len(loss_values_val)), loss_values_val, label = 'Validation Loss')
    plt.xlabel('Epochs')
    plt.legend()
    plt.show()
    
    #torch.save(model_cnn.state_dict(),save_dir) # save best model

In [None]:
train_CNN_multitask(train_loader_multitask, val_loader_multitask, epochs = 20, batch_size = 16,
                    save_dir = 'CNN_step10', early_stopping = True)

In [None]:
# Evaluate with Spearman Correlation
def test_cnn_all_labels(test_loader, model_dir):
    
    # Fetch Model
    model_cnn=CNN_2D(output_dim=3).to(device).float()
    model_cnn.load_state_dict(torch.load(model_dir))
    
    test_losses = []
    y_pred_test=[]
    y_true=[]
    model_cnn.eval()
    
    for inputs,labels,lengths in test_loader:
        inputs,labels,lengths= inputs.to(device), labels.to(device),lengths.to(device)
        output = model_cnn(inputs.float())
        y_pred_test.append(output.data.tolist())
        y_true.append(labels.tolist())
    
    true = np.array(y_true).flatten()
    pred = np.array(y_pred_test).flatten()

    valence_pred, energy_pred, dance_pred = [], [], []
    valence_true, energy_true, dance_true = [], [], []

    for i in range(0,np.shape(true)[0],3):
        valence_true.append(true[i])
        energy_true.append(true[i+1])
        dance_true.append(true[i+2])
        
        valence_pred.append(pred[i])
        energy_pred.append(pred[i+1])
        dance_pred.append(pred[i+2])
    
    # Ger Spearmann Correlation
 
    rho_valence = spearmanr(valence_true,valence_pred).correlation
    rho_energy = spearmanr(energy_true,energy_pred).correlation
    rho_danceability = spearmanr(dance_true,dance_pred).correlation

    print('\nValence Spearman Correlation: {:.6f} \n'.format(rho_valence))
    print('\nEnergy Spearman Correlation: {:.6f} \n'.format(rho_energy))
    print('\nDanceability Spearman Correlation: {:.6f} \n'.format(rho_danceability))
    print('\nMean Spearman Correlation: {:.6f} \n'.format(np.mean([rho_valence, rho_energy, rho_danceability])))

    
    return rho_valence, rho_energy, rho_danceability

In [None]:
valence, energy, dance = test_cnn_all_labels(val_loader_multitask, 'CNN_step10')

## Βήμα 11: Υποβολή στο Kaggle

In [None]:
CNN_multi = CNN_2D(output_dim=3).to(device).float()
CNN_multi.load_state_dict(torch.load('CNN_step10'))

In [None]:
class MultitaskDatasetTest(Dataset):
    def __init__(self, path, max_length=-1, read_spec_fn=read_spectrogram, label_type='all', mode = 'mel_spec'):
        p = os.path.join(path, 'test')
        self.label_type = label_type
        self.feats = []
        self.files = []
        for f in os.listdir(p):
            self.feats.append(read_spec_fn(os.path.join(p, f), mode))
            self.files.append(f.split('.')[0])
        self.feat_dim = self.feats[0].shape[1]
        self.lengths = [len(i) for i in self.feats]
        self.max_length = max(self.lengths) if max_length <= 0 else max_length
        self.zero_pad_and_stack = PaddingTransform(self.max_length) 

    def __getitem__(self, item):
        # Return a tuple in the form (padded_feats, valence, energy, danceability, length)
        l = min(self.lengths[item], self.max_length)
        return self.zero_pad_and_stack(self.feats[item]), l, self.files[item]

    def __len__(self):
        return len(self.feats)

In [None]:
# Read Test Dataset
test_multitask = MultitaskDatasetTest(
         '../input/patreco3-multitask-affective-music/data/multitask_dataset/',
         max_length=-1,
         read_spec_fn=read_spectrogram)

In [None]:
# Create DataLoader
test_loader_multitask, _ = torch_train_val_split(test_multitask,batch_train=32,batch_eval=32, val_size=0)

In [None]:
# Evaluate with Spearman Correlation
def test_cnn_kaggle(test_loader, model_dir):
    
    # Fetch Model
    model_cnn=CNN_2D(output_dim=3).to(device).float()
    model_cnn.load_state_dict(torch.load(model_dir))
    
    test_losses = []
    y_pred_test=[]
    y_true=[]
    filenames = []
    all_preds = []
    model_cnn.eval()
    filenames = []
    for inputs,lengths,filename in test_loader:
        filenames.append(filename)
        inputs=inputs.to(device)
        output = model_cnn(inputs.float())
        y_pred_test.append(output.data.tolist())
    
    pred = np.array(y_pred_test).flatten()
    
    return filenames, pred

In [None]:
filenames, all_preds = test_cnn_kaggle(test_loader_multitask, 'CNN_step10 (1)')

In [None]:
final = np.array([])
temp_names = np.array([])
for i in filenames:
    for j in i:
        temp_names = np.append(temp_names, j)

results = []
for i in all_preds:
    for j in i:
        results.append(j)

kaggle_res = np.zeros((375,4))
kaggle_res[:, 0] = np.array(temp_names)
kaggle_res[:, 1:] = np.array(results)

In [None]:
import csv
with open('final.csv', 'w', newline='\n') as file:
        writer = csv.writer(file)
        writer.writerow(["Id.fused.full.npy.gz", "valence", "energy", "danceability"])
        for i in range(375):
            tt = []
            tt.append(temp_names[i]+'.fused.full.npy.gz')
            tt.append(str(results[i][0]))
            tt.append(str(results[i][1]))
            tt.append(str(results[i][2]))
            writer.writerow(tt)