In [2]:
# Import Libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import librosa.display as display
import matplotlib.pyplot as plt
import os
import copy

import sklearn
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

import seaborn as sns
import warnings 
warnings.filterwarnings('ignore')
from collections import Counter

# Torch
import torch
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler
from torch.utils.data import Dataset
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

In [3]:
# To add the competition data Click File->Add or Upload data-> Search by URL -> https://www.kaggle.com/geoparslp/patreco3-multitask-affective-music

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
for dirname, _, filenames in os.walk('./kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Βήμα 1: Εξοικείωση με φασματογραφήματα στην κλίμακα mel

α) Διαλέγουμε δύο τυχαίες γραμμές του dataset με διαφορετικές επισημειώσεις.

In [4]:
# Step 1: Spectograms in mel-scale
url_train_labels = '/kaggle/input/patreco3-multitask-affective-music/data/fma_genre_spectrograms/train_labels.txt'
my_file = open(url_train_labels, "r")
content_list = my_file.readlines()

# Pick two random lines with diferrent labels
print(content_list[1])
print(content_list[500])

In [5]:
# Read Corresponging Files
spec1 = np.load('../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms/train/1042.fused.full.npy')
spec2 = np.load('../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms/train/1325.fused.full.npy')

# Decompose to mel spectogram and chromagram
mel1, chroma1 = spec1[:128], spec1[128:]
mel2, chroma2 = spec2[:128], spec2[128:]

# Visualize spectograms
display.specshow(mel1, x_axis = 's', y_axis='mel')
plt.show()
display.specshow(mel2, x_axis = 's', y_axis='mel')
plt.show()

## Βήμα 2: Συγχρονισμός φασματογραφημάτων στο ρυθμό της μουσικής (beat-synced spectrograms)

In [6]:
# Step 2: Beat-synced spectograms
print('Dimensions of the two Spectograms before Beat-Sync:')
print(mel1.shape) 
print(mel2.shape)

In [7]:
# Repeat Step 1 for beat-synced samples
spec1_beat = np.load('../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms_beat/train/1042.fused.full.npy')
spec2_beat = np.load('../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms_beat/train/1325.fused.full.npy')

# Decompose to mel spectogram and chromagram
mel1_beat, chroma1_beat = spec1_beat[:128], spec1_beat[128:]
mel2_beat, chroma2_beat = spec2_beat[:128], spec2_beat[128:]

# Visualize spectograms
display.specshow(mel1_beat, x_axis = 's', y_axis='mel')
plt.show()
display.specshow(mel2_beat, x_axis = 's', y_axis='mel')
plt.show()

In [8]:
print('Dimensions of the two Spectograms after Beat-Sync:')
print(mel1_beat.shape) 
print(mel2_beat.shape)

## Βήμα 3: Εξοικείωση με χρωμογραφήματα

In [9]:
# Visualize chromagrams
display.specshow(chroma1, x_axis = 's', y_axis='mel')
plt.show()
display.specshow(chroma2, x_axis = 's', y_axis='mel')
plt.show()

In [10]:
print('Dimensions of the two Chromagrams before Beat-Sync:')
print(chroma1.shape) 
print(chroma2.shape)

In [11]:
# Visualize spectograms
display.specshow(chroma1_beat, x_axis = 's', y_axis='mel')
plt.show()
display.specshow(chroma2_beat, x_axis = 's', y_axis='mel')
plt.show()

In [12]:
print('Dimensions of the two Chromagrams after Beat-Sync:')
print(chroma1_beat.shape) 
print(chroma2_beat.shape)

## Βήμα 4: Φόρτωση και ανάλυση δεδομένων

In [13]:
##################################################################################
# HINT: Use this class mapping to merge similar classes and ignore classes that do not work very well
CLASS_MAPPING = {
    "Rock": "Rock",
    "Psych-Rock": "Rock",
    "Indie-Rock": None,
    "Post-Rock": "Rock",
    "Psych-Folk": "Folk",
    "Folk": "Folk",
    "Metal": "Metal",
    "Punk": "Metal",
    "Post-Punk": None,
    "Trip-Hop": "Trip-Hop",
    "Pop": "Pop",
    "Electronic": "Electronic",
    "Hip-Hop": "Hip-Hop",
    "Classical": "Classical",
    "Blues": "Blues",
    "Chiptune": "Electronic",
    "Jazz": "Jazz",
    "Soundtrack": None,
    "International": None,
    "Old-Time": None,
}


def torch_train_val_split(
    dataset, batch_train, batch_eval, val_size=0.2, shuffle=True, seed=420
):
    # Creating data indices for training and validation splits:
    dataset_size = len(dataset)
    indices = list(range(dataset_size))
    val_split = int(np.floor(val_size * dataset_size))
    if shuffle:
        np.random.seed(seed)
        np.random.shuffle(indices)
    train_indices = indices[val_split:]
    val_indices = indices[:val_split]

    # Creating PT data samplers and loaders:
    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)

    train_loader = DataLoader(dataset, batch_size=batch_train, sampler=train_sampler)
    val_loader = DataLoader(dataset, batch_size=batch_eval, sampler=val_sampler)
    return train_loader, val_loader


def read_spectrogram(spectrogram_file, mode = 'fused'):
    
    if mode == 'mel_spec': # mel-spectogram only
        spectrograms = np.load(spectrogram_file)[:128]
    elif mode == 'chroma': # chromagram only
        spectrograms = np.load(spectrogram_file)[128:]
    else: # fused mel spectrogram and chromagram
        spectrograms = np.load(spectrogram_file)
    return spectrograms.T


class LabelTransformer(LabelEncoder):
    def inverse(self, y):
        try:
            return super(LabelTransformer, self).inverse_transform(y)
        except:
            return super(LabelTransformer, self).inverse_transform([y])

    def transform(self, y):
        try:
            return super(LabelTransformer, self).transform(y)
        except:
            return super(LabelTransformer, self).transform([y])


class PaddingTransform(object):
    def __init__(self, max_length, padding_value=0):
        self.max_length = max_length
        self.padding_value = padding_value

    def __call__(self, s):
        if len(s) == self.max_length:
            return s

        if len(s) > self.max_length:
            return s[: self.max_length]

        if len(s) < self.max_length:
            s1 = copy.deepcopy(s)
            pad = np.zeros((self.max_length - s.shape[0], s.shape[1]), dtype=np.float32)
            s1 = np.vstack((s1, pad))
            return s1


class SpectrogramDataset(Dataset):
    def __init__(
        self, path, class_mapping=None, train=True, max_length=-1, regression=None, mode='fused'
    ):
        t = "train" if train else "test"
        p = os.path.join(path, t)
        self.regression = regression

        self.index = os.path.join(path, "{}_labels.txt".format(t))
        self.files, labels = self.get_files_labels(self.index, class_mapping)
        self.feats = [read_spectrogram(os.path.join(p, f), mode) for f in self.files]
        self.feat_dim = self.feats[0].shape[1]
        self.lengths = [len(i) for i in self.feats]
        self.max_length = max(self.lengths) if max_length <= 0 else max_length
        self.zero_pad_and_stack = PaddingTransform(self.max_length)
        self.label_transformer = LabelTransformer()
        if isinstance(labels, (list, tuple)):
            if not regression:
                self.labels = np.array(
                    self.label_transformer.fit_transform(labels)
                ).astype("int64")
            else:
                self.labels = np.array(labels).astype("float64")
    
    def get_files_labels(self, txt, class_mapping):
        with open(txt, "r") as fd:
            lines = [l.rstrip().split("\t") for l in fd.readlines()[1:]]
        files, labels = [], []
        for l in lines:
            if self.regression:
                l = l[0].split(",")
                files.append(l[0] + ".fused.full.npy")
                labels.append(l[self.regression])
                continue
            label = l[1]
            if class_mapping:
                label = class_mapping[l[1]]
            if not label:
                continue
            _id = l[0].split('.')[0]
            fname = '{}.fused.full.npy'.format(_id)
            files.append(fname)
            labels.append(label)
        return files, labels

    def __getitem__(self, item):
        length = min(self.lengths[item], self.max_length)
        return self.zero_pad_and_stack(self.feats[item]), self.labels[item], length

    def __len__(self):
        return len(self.labels)

##################################################################################

In [14]:
##### Load mel spectograms (not beat_synced) #####

# Load Train Dataset
mel_train_set = SpectrogramDataset('../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms/', train=True,
                                     class_mapping=CLASS_MAPPING, max_length=-1,
                                     mode = 'mel_spec')
# Train and Val loaders - Batch Size = 32
train_loader_mel, val_loader_mel = torch_train_val_split(mel_train_set, 32, 32, val_size=.33)

# Load Test Dataset
mel_test_set = SpectrogramDataset('../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms/', train=False,
                                             class_mapping=CLASS_MAPPING, max_length=-1,
                                             mode = 'mel_spec')
# Test Loader - Batch Size = 1
test_loader_mel = DataLoader(mel_test_set, batch_size=16)

In [15]:
##### Load beat-synced mel spectograms #####

# Load Train Dataset
beat_mel_train_set = SpectrogramDataset('../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms_beat/', train=True,
                                     class_mapping=CLASS_MAPPING, max_length=-1,
                                     mode = 'mel_spec')
# Train and Val loaders - Batch Size = 32
train_loader_beat_mel, val_loader_beat_mel = torch_train_val_split(beat_mel_train_set, 32, 32, val_size=.33)

# Load Test Dataset
beat_mel_test_set = SpectrogramDataset('../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms_beat/', train=False,
                                             class_mapping=CLASS_MAPPING, max_length=-1,
                                             mode = 'mel_spec')
# Test Loader - Batch Size = 1
test_loader_beat_mel = DataLoader(beat_mel_test_set, batch_size=16)

In [16]:
##### Load beat-synced chromagrams #####

# Load Train Dataset
beat_chroma_train_set = SpectrogramDataset('../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms_beat/', train=True,
                                     class_mapping=CLASS_MAPPING, max_length=-1,
                                     mode = 'chroma')
# Train and Val loaders - Batch Size = 32
train_loader_beat_chroma, val_loader_beat_chroma = torch_train_val_split(beat_chroma_train_set, 32, 32, val_size=.33)

# Load Test Dataset
beat_chroma_test_set = SpectrogramDataset('../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms_beat/', train=False,
                                             class_mapping=CLASS_MAPPING, max_length=-1,
                                             mode = 'chroma')
# Test Loader - Batch Size = 1
test_loader_beat_chroma = DataLoader(beat_chroma_test_set, batch_size=16)

In [17]:
##### Load beat-synced fused mel spectograms & chromagrams #####

# Load Train Dataset
beat_train_set_fused = SpectrogramDataset('../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms_beat/', train=True,
                                     class_mapping=CLASS_MAPPING, max_length=-1,
                                     mode = 'fused')
# Train and Val loaders - Batch Size = 32
train_loader_beat_fused, val_loader_beat_fused = torch_train_val_split(beat_train_set_fused, 32, 32, val_size=.33)

# Load Test Dataset
beat_test_set_fused = SpectrogramDataset('../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms_beat/', train=False,
                                             class_mapping=CLASS_MAPPING, max_length=-1,
                                             mode = 'fused')
# Test Loader - Batch Size = 1
test_loader_beat_fused = DataLoader(beat_test_set_fused, batch_size=16)

Δημιουργούμε τα **ιστογράμματα** κλάσεων πριν και μετά τη συγχώνευση/αφαίρεση κλάσεων.

In [18]:
# (c) Histograms before and after Class Mapping

# Read Labels of Train and Test Dataset
def read_file(txt_file):
    output = []
    file = open(txt_file, 'r') 
    Lines = file.readlines()[1:] 
    for line in Lines: 
        element = line.split()[1]
        output.append(element)
    return output

y_train = read_file('../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms_beat/train_labels.txt')
y_test = read_file('../input/patreco3-multitask-affective-music/data/fma_genre_spectrograms_beat/test_labels.txt')

In [19]:
def plot_bar(labels, palette = "Blues_d"):
    f = plt.figure()
    f.set_figwidth(30)
    f.set_figheight(10)
    freqs = list(Counter(labels).values())
    sns.set(style="darkgrid")
    sns.barplot(np.unique(labels), freqs, alpha=0.9, palette=palette)
    plt.ylabel('Number of Occurrences', fontsize=12)
    plt.xlabel('Music Genres', fontsize=12)
    plt.show()

In [20]:
# Histograms before Category Mapping
# Train Set
plot_bar(y_train)

In [21]:
# Test Set
plot_bar(y_test)

In [22]:
# Histograms after Category Mapping
y_train_mapped = []
for genre in y_train:
    if CLASS_MAPPING[genre]:
        y_train_mapped.append(CLASS_MAPPING[genre])

y_test_mapped = []
for genre in y_test:
    if CLASS_MAPPING[genre]:
        y_test_mapped.append(CLASS_MAPPING[genre])

In [23]:
np.unique(y_train_mapped)

In [24]:
# Train Set
plot_bar(y_train_mapped, 'Reds_d')

In [25]:
# Test Set
plot_bar(y_test_mapped, 'Reds_d')

## Βήμα 5: Αναγνώριση μουσικού είδους με LSTM

In [26]:
# Lab 2 Basic LSTM Class
class BasicLSTM(nn.Module):
    def __init__(self, input_dim, hidden_size, output_dim, num_layers, bidirectional=False, dropout = 0):
        super(BasicLSTM, self).__init__()
        self.bidirectional = bidirectional
        self.feature_size = hidden_size * 2 if self.bidirectional else hidden_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # --------------- Insert your code here ---------------- #
        # Initialize the LSTM, Dropout, Output layers
        self.lstm = nn.LSTM(input_dim, hidden_size, num_layers, bidirectional=self.bidirectional, batch_first=True, dropout=dropout)
        self.linear = nn.Linear(self.feature_size, output_dim)

    def forward(self, x, lengths):
        """ 
            x : 3D numpy array of dimension N x L x D
                N: batch index
                L: sequence index
                D: feature index

            lengths: N x 1
         """
        
        # --------------- Insert your code here ---------------- #
        
        # You must have all of the outputs of the LSTM, but you need only the last one (that does not exceed the sequence length)
        # To get it use the last_timestep method
        # Then pass it through the remaining network
        
        if self.bidirectional:
            num_layers = 2*self.num_layers
        else:
            num_layers = self.num_layers
            
        DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        h0 = torch.zeros(num_layers, x.size(0), self.hidden_size).double().to(DEVICE)
        c0 = torch.zeros(num_layers, x.size(0), self.hidden_size).double().to(DEVICE)

        # Forward through LSTM
        hn, _ = self.lstm(x, (h0, c0))
        # Final Linear Layer - pass last timestep
        last_outputs = self.linear(self.last_timestep(hn, lengths, self.bidirectional))

        return last_outputs

    def last_timestep(self, outputs, lengths, bidirectional=False):
        """
            Returns the last output of the LSTM taking into account the zero padding
        """
        if bidirectional:
            forward, backward = self.split_directions(outputs)
            last_forward = self.last_by_index(forward, lengths)
            last_backward = backward[:, 0, :]
            # Concatenate and return - maybe add more functionalities like average
            return torch.cat((last_forward, last_backward), dim=-1)

        else:
            return self.last_by_index(outputs, lengths)

    @staticmethod
    def split_directions(outputs):
        direction_size = int(outputs.size(-1) / 2)
        forward = outputs[:, :, :direction_size]
        backward = outputs[:, :, direction_size:]
        return forward, backward

    @staticmethod
    def last_by_index(outputs, lengths):
        # Index of the last output for each sequence.
        idx = (lengths - 1).view(-1, 1).expand(outputs.size(0),
                                               outputs.size(2)).unsqueeze(1)
        return outputs.gather(1, idx).squeeze()

class BasicLSTM_packed(nn.Module):
    def __init__(self, input_dim, hidden_size, output_dim, num_layers, bidirectional=False, dropout = 0):
        super(BasicLSTM_packed, self).__init__()
        self.bidirectional = bidirectional
        self.feature_size = hidden_size * 2 if self.bidirectional else hidden_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # --------------- Insert your code here ---------------- #
        # Initialize the LSTM, Dropout, Output layers
        self.lstm = nn.LSTM(input_dim, hidden_size, num_layers, bidirectional=self.bidirectional, batch_first=True, dropout=dropout)
        self.linear = nn.Linear(self.feature_size, output_dim)

    def forward(self, x, lengths):
        """ 
            x : 3D numpy array of dimension N x L x D
                N: batch index
                L: sequence index
                D: feature index

            lengths: N x 1
         """
        
        # --------------- Insert your code here ---------------- #
        
        # You must have all of the outputs of the LSTM, but you need only the last one (that does not exceed the sequence length)
        # To get it use the last_timestep method
        # Then pass it through the remaining network
        
        if self.bidirectional:
            num_layers = 2*self.num_layers
        else:
            num_layers = self.num_layers

        h0 = torch.zeros(num_layers, x.size(0), self.hidden_size).to(DEVICE)
        c0 = torch.zeros(num_layers, x.size(0), self.hidden_size).to(DEVICE)
        
        # Sort Inputs by decreasing length
        lengths, indices = lengths.sort(dim = 0, descending = True)
        x = x[indices] 

        packed_x = pack_padded_sequence(x, list(lengths.data), batch_first=True, enforce_sorted = True)
        
        # Forward through LSTM
        hn, _ = self.lstm(packed_x, (h0, c0))
        hn = pad_packed_sequence(hn, batch_first=True)[0]

        # Final Linear Layer - pass last timestep
        last_outputs = self.linear(self.last_timestep(hn, lengths, self.bidirectional))

        return last_outputs, indices # also return indices to allign data with labels

    def last_timestep(self, outputs, lengths, bidirectional=False):
        """
            Returns the last output of the LSTM taking into account the zero padding
        """
        if bidirectional:
            forward, backward = self.split_directions(outputs)
            last_forward = self.last_by_index(forward, lengths)
            last_backward = backward[:, 0, :]
            # Concatenate and return - maybe add more functionalities like average
            return torch.cat((last_forward, last_backward), dim=-1)

        else:
            return self.last_by_index(outputs, lengths)

    @staticmethod
    def split_directions(outputs):
        direction_size = int(outputs.size(-1) / 2)
        forward = outputs[:, :, :direction_size]
        backward = outputs[:, :, direction_size:]
        return forward, backward

    @staticmethod
    def last_by_index(outputs, lengths):
        # Index of the last output for each sequence.
        idx = (lengths - 1).view(-1, 1).expand(outputs.size(0),
                                               outputs.size(2)).unsqueeze(1)
        return outputs.gather(1, idx).squeeze()

In [27]:
# Function that Trains a Neural Network
def train_NN(dataloader, model, criterion, optimizer, device1, packed = False, overfit_batch = False):
    total_loss = 0.0
    model.train() # switch to train mode
    device = next(model.parameters()).device
    
    for idx, batch in enumerate(dataloader, 1):
        
        # Unpack Batch
        (inputs, labels, lengths) = batch
        
        # Convert to Double
        inputs.double()
        lengths.double()

        # Move the batch tensors to the right device
        inputs = inputs.to(device)
        labels = labels.to(device)
        lengths = lengths.to(device)
        
        # Zero Gradients
        optimizer.zero_grad()
        
        if packed: # allign labels with sorted input
            # Forward Pass
            y_preds, indices = model(inputs, lengths)

            # Compute Loss
            loss = criterion(y_preds, labels[indices])
        else:
            # Forward Pass
            y_preds = model(inputs, lengths)

            # Compute Loss
            loss = criterion(y_preds, labels)

        # Back-Propagate Loss
        loss.backward()

        # Update Loss
        optimizer.step()

        # Collect Loss
        total_loss += loss.data.item()

    return total_loss/idx

# Function that Evaluates a Neural Network
def eval_NN(dataloader, model, criterion, device, packed = False):
    
    total_loss = 0.0
    model.eval() # switch to evaluation mode
    
    y_gold = []
    y_pred = []
    
    with torch.no_grad(): # don't keep gradients
        for idx, batch in enumerate(dataloader, 1):
            
            (inputs, labels, lengths) = batch
            
            # Convert to Double
            inputs.double()
            lengths.double()

            # Move the batch tensors to the right device
            inputs = inputs.to(device)
            labels = labels.to(device)
            lengths = lengths.to(device)
            
            
            if packed: # allign labels with sorted input
                # Forward Pass
                y_preds, indices = model(inputs, lengths)

                # Compute Loss
                loss = criterion(y_preds, labels[indices])
            else:
                # Forward Pass
                y_preds = model(inputs, lengths)

                # Compute Loss
                loss = criterion(y_preds, labels)
            
            # Prediction: argmax of aposterioris
            prediction = torch.argmax(y_preds, dim=1)
            
            # Collect Loss and labels
            total_loss += loss.data.item()
            
            y_pred.append(prediction.cpu().numpy())
            if packed:
                y_gold.append(labels[indices].numpy())
            else:
                y_gold.append(labels.cpu().numpy())

    return total_loss / idx, (y_gold, y_pred)

In [28]:
# Function that plots train and validation losses for given epochs
def plot_losses(epochs, train_loss, val_loss, title = 'Losses per Epoch'):
    n = np.arange(epochs)
    plt.plot(n, train_loss, label = "Train Loss")
    plt.plot(n, val_loss, label = "Validation Loss")
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.title(title)
    plt.show()

(β) **Υπερεκπαίδευση** πάνω σε λίγα batches μέχρι το overfitting.

In [53]:
""""
Main pipeline of Neural Network:
- Trains model on Train Dataset
- Evaluates on Validation Set
- Plots Train and Validation Loss per Epoch

If overfi_batch==True: DEBUG_MODE
Overfit on a few train batches and return accumulated train loss
"""
def NN_pipeline(model, train_set, criterion, optimizer, EPOCHS, DEVICE, overfit_batch = False, packed = False, save_name='model'):
    
    # Lists to collect train and validaiton loss
    train_losses = []
    val_losses = []

    # Early Stopping Hyperparameters
    counter = 0
    max_increases = 10
    best_val_loss = 9999999
    
    if overfit_batch: # USED FOR DEBUGGING
        # Use class Subset to get 4 first batches only for debugging
        subset = torch.utils.data.Subset(train_set, [i for i in range(4)])
        train_loader = DataLoader(subset, batch_size=2)
        
        for epoch in range(EPOCHS):
            # fit model
            train_loss = train_NN(train_loader, model, criterion, optimizer, DEVICE, packed)
            print('EPOCH',str(epoch),':')
            print('Training Loss:',str(train_loss))    
            train_losses.append(train_loss)
        
        return train_losses

    else:
        train_loader, val_loader = torch_train_val_split(train_set, 32, 32, val_size=.33) # split train/val
        
        for epoch in range(EPOCHS):
            # fit model
            train_loss = train_NN(train_loader, model, criterion, optimizer, DEVICE, packed)

            print('EPOCH',str(epoch),':')
            print('Training Loss:',str(train_loss))    
            train_losses.append(train_loss)

            # evaluate on validation set - comment out if unwanted
            val_loss, (y_gold, y_pred) = eval_NN(val_loader, model, criterion, DEVICE, packed)
            print('Validation Loss:',str(val_loss))
            val_losses.append(val_loss)
            
            # Apply Early Stopping Techniques
            if val_loss < best_val_loss:
                torch.save(model, save_name) # checkpoint
                best_val_loss = val_loss
                counter = 0 # reset counter
            else:
                counter += 1

            if counter == max_increases: # 10 times in a row no loss improvement - break
                print('Early Stopping')
                break

        plot_losses(len(train_losses), train_losses, val_losses, title = 'Losses per Epoch - Bidirectional LSTM')

In [30]:
# Define n_features for each case and n_classes of classification problem
n_feats_mel = beat_mel_train_set[0][0].shape[1] #128 features
n_feats_chroma = beat_chroma_train_set[0][0].shape[1] # 12 features
n_classes = len(np.unique(y_train_mapped)) # 10 classes

# Check for GPUs
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [40]:
# Define Model and Hyperparameters - Implement L2-Regularization and Dropout
model = BasicLSTM(input_dim = n_feats_mel, hidden_size = 256, 
                       output_dim = n_classes, num_layers = 2, bidirectional=True, dropout = 0.5)
model.double()
model.to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)
EPOCHS = 500

In [None]:
# (b) OVERFIT MODEL FOR DEBUGGING
train_losses = NN_pipeline(model, mel_train_set, criterion, optimizer, EPOCHS, DEVICE, overfit_batch = True)

In [None]:
plt.plot([i+1 for i in range(0,EPOCHS)], train_losses)
plt.xlabel('Epochs')
plt.ylabel('Train Loss')
plt.show()

γ) Εκπαιδεύστε ένα LSTM [15] δίκτυο, το οποίο θα δέχεται ως είσοδο τα φασματογραφήματα
του συνόλου εκπαίδευσης (train set) και θα προβλέπει τις διαφορετικές κλάσεις (μουσικά είδη)
του συνόλου δεδομένων (dataset).

In [71]:
# (c) LSTM on mel-spectograms
modelC = BasicLSTM(input_dim = n_feats_mel, hidden_size = 256, 
                       output_dim = n_classes, num_layers = 2, bidirectional=True, dropout = 0.5)
modelC.double()
modelC.to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(modelC.parameters(), lr=0.0001, weight_decay=1e-5)
EPOCHS = 50

In [72]:
NN_pipeline(modelC, mel_train_set, criterion, optimizer, EPOCHS, DEVICE, save_name = './best_modelC')

δ) εκπαιδεύστε ένα LSTM δίκτυο, το οποίο θα δέχεται ως είσοδο τα beat-synced spectrograms
(train set) και θα προβλέπει τις διαφορετικές κλάσεις (μουσικά είδη) του συνόλου δεδομένων.

In [55]:
# (d) LSTM on beat-synced mel-spectograms
modelD = BasicLSTM(input_dim = n_feats_mel, hidden_size = 256, 
                       output_dim = n_classes, num_layers = 2, bidirectional=True, dropout = 0.5)
modelD.double()
modelD.to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(modelD.parameters(), lr=0.0001, weight_decay=1e-5)
EPOCHS = 150

In [56]:
NN_pipeline(modelD, beat_mel_train_set, criterion, optimizer, EPOCHS, DEVICE, save_name = './best_modelD')

ε) εκπαιδεύστε ένα LSTM δίκτυο, το οποίο θα δέχεται ως είσοδο τα χρωμογραφήματα (train set)
και θα προβλέπει τις διαφορετικές κλάσεις (μουσικά είδη) του συνόλου δεδομένων.

In [60]:
# (e) LSTM on beat-synced mel-spectograms
modelE = BasicLSTM(input_dim = n_feats_chroma, hidden_size = 256, 
                       output_dim = n_classes, num_layers = 2, bidirectional=True, dropout = 0.5)
modelE.double()
modelE.to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(modelE.parameters(), lr=0.0001, weight_decay=1e-5)
EPOCHS = 150

In [62]:
NN_pipeline(modelE, beat_chroma_train_set, criterion, optimizer, EPOCHS, DEVICE, save_name='./best_modelE')

ζ) (extra credit) εκπαιδεύστε ένα LSTM δίκτυο, το οποίο θα δέχεται ως είσοδο τα ενωμένα
(concatenated) χρωμογραφήματα και φασματογραφήματα (train set) και θα προβλέπει τις
διαφορετικές κλάσεις (μουσικά είδη) του συνόλου δεδομένων.

In [65]:
# (z) LSTM on beat-synced mel-spectograms
modelZ = BasicLSTM(input_dim = n_feats_chroma+n_feats_mel, hidden_size = 256, 
                       output_dim = n_classes, num_layers = 2, bidirectional=True, dropout = 0.5)
modelZ.double()
modelZ.to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(modelZ.parameters(), lr=0.0001, weight_decay=1e-5)
EPOCHS = 150

In [66]:
NN_pipeline(modelZ, beat_train_set_fused, criterion, optimizer, EPOCHS, DEVICE, save_name='./best_modelZ')

## Βήμα 6: Αξιολόγηση των μοντέλων

In [73]:
# Step 6: Evaluation of Models

# Load Best Models of Early Stopping
modelC = torch.load('./best_modelC')
modelD = torch.load('./best_modelD')
modelE = torch.load('./best_modelE')
modelZ = torch.load('./best_modelZ')

In [47]:
def test_report(model, test_loader, criterion, DEVICE, packed = False):
    test_loss, (y_gold, y_pred) = eval_NN(test_loader, model, criterion, DEVICE, packed)    
    return classification_report(np.concatenate(y_gold), np.concatenate(y_pred))

In [74]:
# Evaluate Case (c) on Test Set
print(test_report(modelC, test_loader_mel, criterion, DEVICE))

In [59]:
# Evaluate Case (d) on Test Set
print(test_report(modelD, test_loader_beat_mel, criterion, DEVICE))

In [64]:
# Evaluate Case (e) on Test Set
print(test_report(modelE, test_loader_beat_chroma, criterion, DEVICE))

In [68]:
# Evaluate Case (z) on Test Set
print(test_report(modelZ, test_loader_beat_fused, criterion, DEVICE))