# Pattern Recognition - Flow S
## Lab 3 (Main): Genre and Emotion Recognition from Music
### Dimitris Dimos - 031 17 165
### Konstantinos Kopsinis - 031 17 062

In [None]:
## packages
import numpy as np
import pandas as pd
import os
import gc
import seaborn as sns
import matplotlib.pyplot as plt
import scipy
import csv
from matplotlib.colors import ListedColormap
from librosa.display import specshow
from IPython.display import clear_output
from torch.nn.functional import relu

import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader

from sklearn.metrics import classification_report

sns.set()

In [None]:
# data directories
fma_genre_spectrograms = "/kaggle/input/patreco3-multitask-affective-music/data/fma_genre_spectrograms/"
fma_genre_spectrograms_beat = "/kaggle/input/patreco3-multitask-affective-music/data/fma_genre_spectrograms_beat/"
multitask_dataset = "/kaggle/input/patreco3-multitask-affective-music/data/multitask_dataset/"
multitask_dataset_beat = "/kaggle/input/patreco3-multitask-affective-music/data/multitask_dataset_beat/"

In [None]:
# we now import the auxiliary code from github
!cp -r /kaggle/input/lab3-aux/* ./
import dataset
import dataset2
import multitask_dataset
import lab2_lstm

## Step 7: 2D CNN

In [None]:
# definition of Custom CNN
class CustomCNN(nn.Module):
    def __init__(self):
        super(CustomCNN,self).__init__()
        self._cnn_module = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=4),

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=4)
        )

        ## Kaggle Competition
#         self._cnn_module = nn.Sequential(
#             nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(32),
#             nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2),

#             nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(64),
#             nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2),

#             nn.Conv2d(in_channels=64, out_channels=64, kernel_size=5, stride=1, padding=1),
#             nn.BatchNorm2d(64),
#             nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2),

#             nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(128),
#             nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2),
            
#             nn.Conv2d(in_channels=128, out_channels=32, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(32),
#             nn.ReLU()
#         )
        
        self._fc_module = nn.Sequential(
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(in_features=20480, out_features=1) #10240
        )

    def forward(self, x):
        
        x=x.transpose(1,2)
        x=torch.unsqueeze(x,1)
        x = self._cnn_module(x)
        x = x.view(x.size(0), -1)
        x = self._fc_module(x)
        return x

In [None]:
# hyperparameters
EPOCHS = 30
BATCH_SIZE = 32
LR = 0.001
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def evaluate_model(model, dataloader, criterion, regression=False, lstm=False):
    model.eval() # switch to evaluation mode: disable dropout
    overall_loss = 0.0
    DEVICE = next(model.parameters()).device # set DEVICE to the model's predefined device
    
    y_pred = [] # predicted labels
    y_gold = [] # gold labels
    
    with torch.no_grad(): # do not compute gradients
        for idx, batch in enumerate(dataloader, 1):
            
            inputs  = batch[0].to(DEVICE).float()
            if regression:
                labels  = batch[1].to(DEVICE).float()
            else:
                labels  = batch[1].to(DEVICE).long()
                
            if not lstm:
                y_preds = model(inputs)#, lengths)  # forward pass
            else:
                inputs  = batch[0].to(DEVICE).double()
                labels  = batch[1].to(DEVICE).double()
                lengths = batch[2].to(DEVICE)
                y_preds = model(inputs, lengths)
            
            if not regression:
                loss = criterion(y_preds, labels)
            else:
                loss = criterion(y_preds.squeeze(), labels)
                
            if not regression:
                prediction = torch.argmax(y_preds, dim=1) # predict
            else:
                prediction = y_preds
            
            overall_loss += loss.data.item()
            
            y_pred.append(prediction.cpu().numpy())
            y_gold.append(labels.cpu().numpy())

    return overall_loss/idx, (y_gold, y_pred)

In [None]:
def train_cnn(train_loader, val_loader, save_string, epochs, regression=False, early_stopping=True, transfer=False):
    
    model_cnn = CustomCNN().to(DEVICE).float()
    
    if not regression:
        model_cnn._fc_module = nn.Sequential(
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(in_features=10240, out_features=10) #10240
        ).to(DEVICE)
        
    else:
        model_cnn._fc_module = nn.Sequential(
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(in_features=10240, out_features=1) #10240
        ).to(DEVICE)
        
    if transfer:
        model_cnn = CustomCNN().to(DEVICE).float()
        model_cnn._fc_module = nn.Sequential(
                    nn.ReLU(),
                    nn.Dropout(),
                    nn.Linear(in_features=10240, out_features=10) #10240
                ).to(DEVICE)
        model_cnn.load_state_dict(torch.load("step7"))
        
        for weight in model_cnn.parameters():
            weight.requires_grad = False

        # this one by default stays trainable
        model_cnn._fc_module = nn.Sequential(
                    nn.ReLU(),
                    nn.Dropout(),
                    nn.Linear(in_features=10240, out_features=1) #10240
                ).to(DEVICE)
        save_string = "step9"
    
    batch_size=BATCH_SIZE
    output_dim = 1

    lr=LR
    if not regression:
        criterion = nn.CrossEntropyLoss()
    else:
        criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model_cnn.parameters(), lr=lr, weight_decay=0.0001)

    train_loss_min = np.Inf
    loss_values_train = []
    loss_values_val = []
    count=0
    
    opt_val_loss = np.Inf
    
    model_cnn.train()    
    for epoch in range(epochs):
        running_loss_train=0.0
        #running_loss_val=0.0
        train_losses=[]
        for inputs, labels,lengths in train_loader:
            inputs, labels, lengths= inputs.to(DEVICE), labels.to(DEVICE), lengths.to(DEVICE)
            model_cnn.zero_grad()
            output =  model_cnn(inputs.float())
            
            if not regression:
                loss = criterion(output, labels.long())
            else:
                loss = criterion(output.squeeze(), labels.float())
                
            train_losses.append(loss.item())
            loss.backward()
            optimizer.step()
            
        val_loss, (y_gold, y_pred) = evaluate_model(model_cnn, val_loader, criterion, regression)
        
        if val_loss < opt_val_loss:
            opt_val_loss = val_loss
            torch.save(model_cnn.state_dict(), save_string)
            count = 0
        else:
            count += 1
            
        loss_values_train.append(np.mean(train_losses))
        loss_values_val.append(val_loss)
        
        print(f'Epoch {epoch}: \t Training Loss = {loss_values_train[-1]} \t--\t Validation Loss = {loss_values_val[-1]}')
        if count > 7 and early_stopping:
            print("Terminated due to early stopping")
            break
            
    if not early_stopping:
        torch.save(model_cnn.state_dict(), save_string)
            
    return loss_values_train, loss_values_val

In [None]:
# load data - Mel Spectrograms
mel_spectro_dataset = dataset.SpectrogramDataset(fma_genre_spectrograms,
                                                 train=True,
                                                 class_mapping=dataset.CLASS_MAPPING,
                                                 max_length=-1,
                                                 read_spec_fn=dataset.read_mel_spectrogram#read_mel_spectrogram
                                                )
# train and validation sets
train_loader, val_loader = dataset.torch_train_val_split(mel_spectro_dataset, BATCH_SIZE, BATCH_SIZE, val_size=.33)

# test set
ttest_loader_ = dataset.SpectrogramDataset(fma_genre_spectrograms,
                                           train=False,
                                           class_mapping=dataset.CLASS_MAPPING,
                                           max_length=-1,
                                           read_spec_fn=dataset.read_mel_spectrogram#read_mel_spectrogram
                                          )
ttest_loader = DataLoader(ttest_loader_, batch_size=BATCH_SIZE)

In [None]:
# overfit batch
subset = torch.utils.data.Subset(mel_spectro_dataset, [i for i in range(16)])
overfit_loader = DataLoader(subset, batch_size=2)
_ = train_cnn(train_loader = overfit_loader,
          val_loader = overfit_loader,
          save_string="dummy",
          epochs=10,
          regression=False,
          early_stopping = False)

In [None]:
# real training
loss_values_train, loss_values_val = train_cnn(train_loader = train_loader,
                                               val_loader = val_loader,
                                               save_string="step7",
                                               epochs=EPOCHS,
                                               regression=False)

In [None]:
def test_report_CNN(model, test_loader, model_dir, regression = False):
    
    if not regression:
        criterion = nn.CrossEntropyLoss()
        model._fc_module = nn.Sequential(
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(in_features=10240, out_features=10) #10240
        ).to(DEVICE)
    else:
        criterion = nn.MSELoss()
        model._fc_module = nn.Sequential(
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(in_features=10240, out_features=1) #10240
        ).to(DEVICE)
        
    model.load_state_dict(torch.load(model_dir))
        
    test_loss, (y_gold, y_pred) = evaluate_model(model, test_loader, criterion, regression)
    print(classification_report(np.concatenate(y_gold), np.concatenate(y_pred)))

In [None]:
test_report_CNN(CustomCNN().to(DEVICE), ttest_loader, "step7", regression=False)

## Step 8: Emotion-Behavior Estimation with Regression

In [None]:
multitask_dir = "/kaggle/input/patreco3-multitask-affective-music/data/multitask_dataset/"
multitask_beat_dir = "/kaggle/input/patreco3-multitask-affective-music/data/multitask_dataset_beat/"

In [None]:
dataset_valence = dataset2.SpectrogramDataset(multitask_dir,
                                      train=True,
                                      class_mapping=dataset.CLASS_MAPPING,
                                      max_length=-1,
                                      regression=1
                                      )
# train and validation sets
train_loader_valence, val_loader_valence = dataset2.torch_train_val_split(dataset_valence,
                                                                          batch_train=32,
                                                                          batch_eval=32,
                                                                          val_size=.2)

In [None]:
def test_cnn_model_multitask(test_loader, save_file):
    model_cnn = CustomCNN().to(DEVICE)
    
    model_cnn._fc_module = nn.Sequential(
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(in_features=10240, out_features=1) #10240
        ).to(DEVICE)
    
    
    model_cnn.load_state_dict(torch.load(save_file))
    
    batch_size=5
    test_losses = []
    y_pred_test=[]
    y_true=[]
    model_cnn.eval()
    
    for inputs, labels,lengths in test_loader:
        inputs,labels,lengths= inputs.to(DEVICE), labels.to(DEVICE), lengths.to(DEVICE)
        output = model_cnn(inputs.float())
        y_pred_test.append(output.data.tolist())
        y_true.append(labels.tolist())
    rho= scipy.stats.spearmanr(np.array(y_true).flatten(),np.array(y_pred_test).flatten()).correlation
    print('\nTest set: Spearman Correlation: {:.6f} \n'.format(rho))
    
    true = np.array(y_true).flatten()
    pred = np.array(y_pred_test).flatten()
    
    # Scatter Plot Predictions - Gold Labels
    plt.figure(figsize=(6,6))
    plt.scatter(true, pred)
    plt.xlabel('y_true')
    plt.ylabel('y_pred')
    golden_line = np.linspace(0,1,1000)
    plt.plot(golden_line,golden_line, '-', color='k')
    plt.show()
    
    return rho

In [None]:
# valence training
train_loss_valence, _ = train_cnn(train_loader = train_loader_valence,
                                               val_loader = val_loader_valence,
                                               save_string="valence_CNN",
                                               epochs=EPOCHS,
                                               regression=True,
                                               early_stopping=False)

In [None]:
valence_spear = test_cnn_model_multitask(val_loader_valence, "valence_CNN")

In [None]:
dataset_energy = dataset2.SpectrogramDataset(multitask_dir,
                                             train=True,
                                             class_mapping=dataset.CLASS_MAPPING,
                                             max_length=-1,
                                             regression=2
                                             )
# train and validation sets
train_loader_energy, val_loader_energy = dataset2.torch_train_val_split(dataset_energy,
                                                                          batch_train=32,
                                                                          batch_eval=32,
                                                                          val_size=.2)

In [None]:
# energy training
train_loss_energy, _ = train_cnn(train_loader = train_loader_energy,
                                               val_loader = val_loader_energy,
                                               save_string="energy_CNN",
                                               epochs=EPOCHS,
                                               regression=True,
                                               early_stopping=False)

In [None]:
energy_spear = test_cnn_model_multitask(val_loader_energy, "energy_CNN")

In [None]:
dataset_dancability = dataset2.SpectrogramDataset(multitask_dir,
                                             train=True,
                                             class_mapping=dataset.CLASS_MAPPING,
                                             max_length=-1,
                                             regression=3
                                             )
# train and validation sets
train_loader_dancability, val_loader_dancability = dataset2.torch_train_val_split(dataset_dancability,
                                                                          batch_train=32,
                                                                          batch_eval=32,
                                                                          val_size=.2)

In [None]:
# dancability training
train_loss_dancability, _ = train_cnn(train_loader = train_loader_dancability,
                                               val_loader = val_loader_dancability,
                                               save_string="dancability_CNN",
                                               epochs=EPOCHS,
                                               regression=True,
                                               early_stopping=False)

In [None]:
dancability_spear = test_cnn_model_multitask(val_loader_dancability, "dancability_CNN")

In [None]:
print((valence_spear + energy_spear + dancability_spear)/3)

### Repeat the same for the LSTM of lab2

In [None]:
RNN_SIZE = 128
NUM_LAYERS = 4
n_mel = 140
EPOCHS = 30
lr = 0.001

In [None]:
def train_lstm(train_loader, val_loader, save_string, early_stopping=True):
    
    model_lstm = lab2_lstm.CustomLSTM(input_dim=n_mel,
                                      rnn_size=RNN_SIZE,
                                      output_dim=1,
                                      num_layers=NUM_LAYERS,
                                      bidirectional=True,
                                      dropout=0).double().to(DEVICE)
    
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model_lstm.parameters(), lr=lr, weight_decay=0.0001)

    loss_values_train = []
    loss_values_val = []
    count=0
    opt_val_loss = np.Inf
    
    for epoch in range(EPOCHS):
        running_loss_train=0.0
        train_losses=[]
        
        for inputs, labels, lengths in train_loader:
            model_lstm.train()   
            inputs, labels, lengths = inputs.to(DEVICE), labels.to(DEVICE), lengths.to(DEVICE)
            
            model_lstm.zero_grad()
            output =  model_lstm(inputs.double(), lengths)
            loss = criterion(output.squeeze(), labels.double())
            train_losses.append(loss.item())
            loss.backward()
            optimizer.step()
            
        val_loss, (y_gold, y_pred) = evaluate_model(model_lstm, val_loader, criterion, regression=True, lstm=True)
        
        if val_loss < opt_val_loss:
            opt_val_loss = val_loss
            torch.save(model_lstm.state_dict(), save_string)
            count = 0
        else:
            count += 1
            
        loss_values_train.append(np.mean(train_losses))
        loss_values_val.append(val_loss)
            
        print(f'Epoch {epoch}: \t Training Loss = {loss_values_train[-1]} \t--\t Validation Loss = {loss_values_val[-1]}')
        if count > 7 and early_stopping:
            print("Terminated due to early stopping")
            break
            
    if not early_stopping:
        torch.save(model_lstm.state_dict(), save_string)
        
    return loss_values_train, loss_values_val

In [None]:
def test_lstm_model_multitask(test_loader, rnn_size, num_layers, save_file):
    model_lstm = lab2_lstm.CustomLSTM(input_dim=n_mel,
                                      rnn_size=rnn_size,
                                      output_dim=1,
                                      num_layers=num_layers,
                                      bidirectional=True,
                                      dropout=0).double().to(DEVICE)
    
    model_lstm.load_state_dict(torch.load(save_file))
    
    batch_size=5
    test_losses = []
    y_pred_test=[]
    y_true=[]
    model_lstm.eval()
    
    for inputs, labels,lengths in test_loader:
        inputs, labels, lengths= inputs.to(DEVICE), labels.to(DEVICE), lengths.to(DEVICE)
        output = model_lstm(inputs.double(), lengths)
        y_pred_test.append(output.tolist())
        y_true.append(labels.tolist())
        
    rho = scipy.stats.spearmanr(np.array(y_true).flatten(),np.array(y_pred_test).flatten()).correlation
    print('\nTest set: Spearman Correlation: {:.6f} \n'.format(rho))
    
    true = np.array(y_true).flatten()
    pred = np.array(y_pred_test).flatten()
    
    # Scatter Plots
    plt.figure(figsize=(6,6))
    plt.scatter(true, pred)
    plt.xlabel('y_true')
    plt.ylabel('y_pred')
    golden_line = np.linspace(0,1,1000)
    plt.plot(golden_line,golden_line, '-', color='k')
    plt.show()
    
    return rho

In [None]:
# valence
train_losses_lstm_val, val_losses_lstm_val = train_lstm(train_loader_valence,
                                                        val_loader_valence,
                                                        "lstm_valence",
                                                        early_stopping=False)

In [None]:
lstm_valence_rho = test_lstm_model_multitask(val_loader_valence,
                                             rnn_size=RNN_SIZE,
                                             num_layers=NUM_LAYERS,
                                             save_file="lstm_valence")

In [None]:
# energy
train_losses_lstm_energy, val_losses_lstm_energy = train_lstm(train_loader_energy,
                                                        val_loader_energy,
                                                        "lstm_energy",
                                                        early_stopping=False)

In [None]:
lstm_energy_rho = test_lstm_model_multitask(val_loader_energy,
                                             rnn_size=RNN_SIZE,
                                             num_layers=NUM_LAYERS,
                                             save_file="lstm_energy")

In [None]:
# dancability
train_losses_lstm_dance, val_losses_lstm_dance = train_lstm(train_loader_dancability,
                                                        val_loader_dancability,
                                                        "lstm_dancability",
                                                        early_stopping=False)

In [None]:
lstm_dancability_rho = test_lstm_model_multitask(val_loader_dancability,
                                                 rnn_size=RNN_SIZE,
                                                 num_layers=NUM_LAYERS,
                                                 save_file="lstm_dancability")

In [None]:
print(np.mean([lstm_valence_rho, lstm_energy_rho, lstm_dancability_rho]))

## Step 9: Transfer Learning

In [None]:
# energy training
train_loss_energy, _ = train_cnn(train_loader = train_loader_energy,
                                   val_loader = val_loader_energy,
                                   save_string="step9",
                                   epochs=10,
                                   regression=True,
                                   early_stopping=True,
                                   transfer=True)

In [None]:
energy_spear_step9 = test_cnn_model_multitask(val_loader_energy, "step9")

## Step 10: Multitask Learning


In [None]:
class MyLoss(nn.Module):
    def forward(self, y_gold, y_pred):
        gold_valence = y_gold[:, 0]
        gold_energy  = y_gold[:, 1]
        gold_dance   = y_gold[:, 2]
        
        pred_valence = y_pred[:, 0]
        pred_energy  = y_pred[:, 1]
        pred_dance   = y_pred[:, 2]
        
        valence_loss = nn.MSELoss()(gold_valence, pred_valence)
        energy_loss  = nn.MSELoss()(gold_energy,  pred_energy)
        dance_loss   = nn.MSELoss()(gold_dance,   pred_dance)
        
        return valence_loss + energy_loss + dance_loss

In [None]:
multi_dataset = multitask_dataset.MultitaskDataset(multitask_dir,
                                                   train=True,
                                                   class_mapping=dataset.CLASS_MAPPING,
                                                   max_length=-1,
                                                   regression=True
                                                  )

# train and validation sets
train_loader_multi, val_loader_multi = dataset2.torch_train_val_split(multi_dataset,
                                                                      batch_train=32,
                                                                      batch_eval=32,
                                                                      val_size=0.15)

In [None]:
def evaluate_model2(model, dataloader, criterion):
    model.eval() # switch to evaluation mode: disable dropout
    overall_loss = 0.0
    DEVICE = next(model.parameters()).device # set DEVICE to the model's predefined device
    
    y_pred = [] # predicted labels
    y_gold = [] # gold labels
    
    with torch.no_grad(): # do not compute gradients
        for idx, batch in enumerate(dataloader, 1):
            
            inputs  = batch[0].to(DEVICE).float()
            labels  = batch[1].to(DEVICE).float()
            y_preds = model(inputs)
            loss = criterion(y_preds.squeeze(), labels)
            prediction = y_preds
            
            overall_loss += loss.data.item()
            
            y_pred.append(prediction.cpu().numpy())
            y_gold.append(labels.cpu().numpy())

    return overall_loss/idx, (y_gold, y_pred)

In [None]:
def train_cnn2(train_loader, val_loader, save_string, epochs, early_stopping=True, transfer=False):
    
    model_cnn = CustomCNN().to(DEVICE).float()
    
    model_cnn._fc_module = nn.Sequential(
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(in_features=20480, out_features=3) #1 0240
        ).to(DEVICE)
    

    lr=LR
    criterion = MyLoss()
    optimizer = torch.optim.Adam(model_cnn.parameters(), lr=lr, weight_decay=0.0001)

    train_loss_min = np.Inf
    loss_values_train = []
    loss_values_val = []
    count=0
    
    opt_val_loss = np.Inf
    
    model_cnn.train()    
    for epoch in range(epochs):
        
        running_loss_train=0.0
        train_losses=[]
        for inputs, labels,lengths in train_loader:
            inputs, labels, lengths= inputs.to(DEVICE), labels.to(DEVICE), lengths.to(DEVICE)
            model_cnn.zero_grad()
            output =  model_cnn(inputs.float())
            loss = criterion(output.squeeze(), labels.float())
            train_losses.append(loss.item())
            loss.backward()
            optimizer.step()
            
        val_loss, (y_gold, y_pred) = evaluate_model2(model_cnn, val_loader, criterion)
        
        if val_loss < opt_val_loss:
            opt_val_loss = val_loss
            torch.save(model_cnn.state_dict(), save_string)
            count = 0
        else:
            count += 1
            
        loss_values_train.append(np.mean(train_losses))
        loss_values_val.append(val_loss)
        
        print(f'Epoch {epoch}: \t Training Loss = {loss_values_train[-1]} \t--\t Validation Loss = {loss_values_val[-1]}')
        if count > 10 and early_stopping:
            print("Terminated due to early stopping")
            break
            
    if not early_stopping:
        torch.save(model_cnn.state_dict(), save_string)
            
    return loss_values_train, loss_values_val

In [None]:
loss_values_train, loss_values_val = train_cnn2(train_loader_multi,
                                                val_loader_multi,
                                                "step10",
                                                epochs=30,
                                                early_stopping=True)

In [None]:
def test_cnn_model_multitask(test_loader, save_file):
    
    model_cnn = CustomCNN().to(DEVICE)
    model_cnn._fc_module = nn.Sequential(
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(in_features=20480, out_features=3) #10240
        ).to(DEVICE)
    
    model_cnn.load_state_dict(torch.load(save_file))
    
    test_losses = []
    y_pred_test=[]
    y_true=[]
    model_cnn.eval()
    
    for inputs, labels,lengths in test_loader:
        inputs,labels,lengths= inputs.to(DEVICE), labels.to(DEVICE), lengths.to(DEVICE)
        output = model_cnn(inputs.float())
        y_pred_test.append(output.data.tolist())
        y_true.append(labels.tolist())
        
    convenient_y_pred_test = np.concatenate(np.array(y_pred_test))
    convenient_y_true = np.concatenate(np.array(y_true))
    
    rho_valence = scipy.stats.spearmanr(convenient_y_pred_test[:, 0], convenient_y_true[:, 0]).correlation
    rho_energy  = scipy.stats.spearmanr(convenient_y_pred_test[:, 1], convenient_y_true[:, 1]).correlation
    rho_dance   = scipy.stats.spearmanr(convenient_y_pred_test[:, 2], convenient_y_true[:, 2]).correlation
    mean = np.mean([rho_valence, rho_energy, rho_dance])
    
    print(rho_valence)
    print(rho_energy)
    print(rho_dance)
    print(mean)

In [None]:
test_cnn_model_multitask(val_loader_multi, "step10")

## Step 11: Kaggle Submission

In [None]:
import copy
import os

import numpy as np
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler


def torch_train_val_split(
    dataset, batch_train, batch_eval, val_size=0.2, shuffle=True, seed=420
):
    # Creating data indices for training and validation splits:
    dataset_size = len(dataset)
    indices = list(range(dataset_size))
    val_split = int(np.floor(val_size * dataset_size))
    if shuffle:
        np.random.seed(seed)
        np.random.shuffle(indices)
    train_indices = indices[val_split:]
    val_indices = indices[:val_split]

    # Creating PT data samplers and loaders:
    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)

    train_loader = DataLoader(dataset, batch_size=batch_train, sampler=train_sampler)
    val_loader = DataLoader(dataset, batch_size=batch_eval, sampler=val_sampler)
    return train_loader, val_loader


def read_spectrogram(spectrogram_file, chroma=True):
    spectrograms = np.load(spectrogram_file)
    return spectrograms.T


class LabelTransformer(LabelEncoder):
    def inverse(self, y):
        try:
            return super(LabelTransformer, self).inverse_transform(y)
        except:
            return super(LabelTransformer, self).inverse_transform([y])

    def transform(self, y):
        try:
            return super(LabelTransformer, self).transform(y)
        except:
            return super(LabelTransformer, self).transform([y])

class PaddingTransform(object):
    def __init__(self, max_length, padding_value=0):
        self.max_length = max_length
        self.padding_value = padding_value

    def __call__(self, s):
        if len(s) == self.max_length:
            return s

        if len(s) > self.max_length:
            return s[: self.max_length]

        if len(s) < self.max_length:
            s1 = copy.deepcopy(s)
            pad = np.zeros((self.max_length - s.shape[0], s.shape[1]), dtype=np.float32)
            s1 = np.vstack((s1, pad))
            return s1

class KaggleDataset(Dataset):
    def __init__(
        self, path, class_mapping=None, max_length=-1, regression=None
    ):
        t = "test"
        p = os.path.join(path, t)
        self.regression = regression

        self.files = [] 
        self.feats = [] 

        for f in os.listdir(p):
            self.feats.append(read_spectrogram(os.path.join(p, f)))
            self.files.append(f.split('.')[0])

        self.feat_dim = self.feats[0].shape[1]
        self.lengths = [len(i) for i in self.feats]
        self.max_length = max(self.lengths) if max_length <= 0 else max_length
        self.zero_pad_and_stack = PaddingTransform(self.max_length)


    def __getitem__(self, item):
        length = min(self.lengths[item], self.max_length)
        return self.zero_pad_and_stack(self.feats[item]), self.files[item], length

    def __len__(self):
        return len(self.feats)

In [None]:
kaggle_test_dataset = KaggleDataset(multitask_dir,
                                      class_mapping=dataset.CLASS_MAPPING
                                      )

kaggle_test_dataloader, _ = torch_train_val_split(dataset=kaggle_test_dataset,
                                               batch_train=32,
                                               batch_eval=32,
                                               val_size=0)

In [None]:
def test_cnn_2(test_loader, model_dir):
    
    model_cnn = CustomCNN().to(DEVICE).float()
    model_cnn._fc_module = nn.Sequential(
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(in_features=20480, out_features=3) # 10240
        ).to(DEVICE)
    
    model_cnn.load_state_dict(torch.load(model_dir))
    
    test_losses = []
    y_pred_test=[]
    y_true=[]
    filenames = []
    all_preds = []
    model_cnn.eval()
    filenames = []
    
    counterr = 0
    for batch in test_loader:
        inputs, filename, _ = batch
        
        for file in filename:
            filenames.append(file)
            
        inputs = inputs.to(DEVICE).float()
        output = model_cnn(inputs)
        
        y_pred_test.append(output.cpu().detach().numpy()) # output.data.tolist()
    
    pred = np.concatenate(y_pred_test)    
    
    return filenames, pred

In [None]:
filenames, pred = test_cnn_2(kaggle_test_dataloader, "step10")
toKaggle = np.zeros((375,4))
toKaggle[:,0] = filenames
toKaggle[:,1:] = pred

In [None]:
with open('/kaggle/working/kaggle_sub.csv', 'w', newline='\n') as file:
        writer = csv.writer(file)
        writer.writerow(["Id.fused.full.npy.gz", "valence", "energy", "danceability"])
        for i in range(375):
            tt = []
            tt.append(str(int(toKaggle[i][0]))+'.fused.full.npy.gz')
            tt.append(str(toKaggle[i][1]))
            tt.append(str(toKaggle[i][2]))
            tt.append(str(toKaggle[i][3]))
            writer.writerow(tt)