# Neural Networks

In [1]:
import time
import os
import pprint
import torch
import torch.nn as nn
from sklearn.metrics import ConfusionMatrixDisplay
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary
import IPython.display as ipd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import librosa
import librosa.display
import tqdm.notebook as tq
import utils
import Datasets, Models
from pydub import AudioSegment
from tkinter import Tcl # file sorting by name

# Load STFT Dataset

### Dictionary creation for the classes

We want a dictionary indicating a numbeer for each genre:

{0: 'Hip-Hop', 1: 'Pop', 2: 'Folk', 3: 'Rock', 4: 'Experimental', 5: 'International', 6: 'Electronic', 7: 'Instrumental'}

### Creation of the labels vector

In [2]:
def create_single_dataset(folder_path, tracks_dataframe, genre_dictionary):    
    labels = []
   
    _, file_list = get_sorted_file_paths(folder_path)
    
    for i,file in enumerate(file_list):
        #print("considering file:",file, "({}/{})".format(i,len(file_list)))
        track_id_clip_id = file.split('.')[0]
        track_id = track_id_clip_id.split('_')[0]
        #print("track id with clip: {}, track id: {}".format(track_id_clip_id, track_id))
        genre = tracks_dataframe.loc[int(track_id)]
        #print("genre from dataframe: ", genre)
        label = genre_dictionary[genre]
        #print("label from dictionary:",label)
        labels.append(label)
    print("labels length: {}".format(len(labels)))
    return labels
    

#create the train,validation and test vectors using the files in the train/validation/test folders
def create_dataset_splitted(folder_path):
    train_folder = os.path.join(folder_path,'train') # concatenate train folder to path
    validation_folder = os.path.join(folder_path,'validation') # concatenate train folder to path
    test_folder = os.path.join(folder_path,'test') # concatenate train folder to path
    
    print("train_folder:",train_folder)
    print("validation_folder:",validation_folder)
    print("test_folder:",test_folder,"\n")
    
    AUDIO_DIR = os.environ.get('AUDIO_DIR')
    print("audio directory: ",AUDIO_DIR)
    print("Loading tracks.csv...")
    tracks = utils.load('data/fma_metadata/tracks.csv')
    
    #get only the small subset of the dataset
    small = tracks[tracks['set', 'subset'] <= 'small']
    print("small dataset shape:",small.shape)    

    small_training = small.loc[small[('set', 'split')] == 'training']['track']
    small_validation = small.loc[small[('set', 'split')] == 'validation']['track']
    small_test = small.loc[small[('set', 'split')] == 'test']['track']

    print("Track.csv: {} training samples, {} validation samples, {} test samples\n".format(len(small_training), len(small_validation), len(small_test)))

    small_training_top_genres = small_training['genre_top']
    small_validation_top_genres = small_validation['genre_top']
    small_test_top_genres = small_test['genre_top']
    
    #create dictionary of genre classes:
    unique_genres = small_training_top_genres.unique()
    unique_genres = np.array(unique_genres)
    print("there are {} unique genres".format(len(unique_genres)))
    genre_dictionary = {}
    for i,genre in enumerate(unique_genres):
        genre_dictionary[genre] = i
    print("Dictionary of genres created:",genre_dictionary)
    
    
    Y_train = create_single_dataset(train_folder, small_training_top_genres, genre_dictionary)
    Y_validation = create_single_dataset(validation_folder, small_validation_top_genres, genre_dictionary)
    Y_test = create_single_dataset(test_folder, small_test_top_genres, genre_dictionary)
    
    return Y_train, Y_validation, Y_test
 
def get_sorted_file_paths(folder_path):
    file_list = os.listdir(folder_path)
    #sort the dataset files in alphabetical order (important to associate correct labels created using track_id in track.csv)
    file_list = Tcl().call('lsort', '-dict', file_list) # sort file by name: 2_0,2_1, ... 2_9,3_0, ... 400_0,400_1, ...
    file_paths = [os.path.join(folder_path, file_name) for file_name in file_list] #join filename with folder path
    #print("There are {} in the folder: {}".format(len(file_list),file_list))
    return file_paths, file_list
    
    
folder_path="data/fma_small_stft_transposed_22050_overlapped"
Y_train, Y_validation, Y_test = create_dataset_splitted(folder_path)

train_folder: data/fma_small_stft_transposed_22050_overlapped/train
validation_folder: data/fma_small_stft_transposed_22050_overlapped/validation
test_folder: data/fma_small_stft_transposed_22050_overlapped/test 

audio directory:  ./data/fma_small/
Loading tracks.csv...
small dataset shape: (8000, 52)
Track.csv: 6400 training samples, 800 validation samples, 800 test samples

there are 8 unique genres
Dictionary of genres created: {'Hip-Hop': 0, 'Pop': 1, 'Folk': 2, 'Rock': 3, 'Experimental': 4, 'International': 5, 'Electronic': 6, 'Instrumental': 7}


FileNotFoundError: [Errno 2] No such file or directory: 'data/fma_small_stft_transposed_22050_overlapped/train'

# Dataset Class

Class to load the STFT from files. Each file has a (128,513) matrix containing the STFT of a 3 seconds audio clip.

In [13]:
folder_path="data/fma_small_stft_transposed_22050_overlapped"

stft_train_folder = os.path.join(folder_path,'train') # concatenate train folder to path
stft_validation_folder = os.path.join(folder_path,'validation') # concatenate train folder to path
stft_test_folder = os.path.join(folder_path,'test') # concatenate train folder to path

stft_train_file_paths, _ = get_sorted_file_paths(stft_train_folder)
stft_train_dataset = Datasets.DatasetSTFT(stft_train_file_paths, Y_train)
print("len of train dataset: ",len(stft_train_dataset))

stft_validation_file_paths, _ = get_sorted_file_paths(stft_validation_folder)
stft_validation_dataset = Datasets.DatasetSTFT(stft_validation_file_paths, Y_validation)
print("len of validation dataset: ",len(stft_validation_dataset))

stft_test_file_paths, _ = get_sorted_file_paths(stft_test_folder)
stft_test_dataset = Datasets.DatasetSTFT(stft_test_file_paths, Y_test)
print("len of test dataset: ",len(stft_test_dataset))

FileNotFoundError: [Errno 2] No such file or directory: 'data/fma_small_stft_transposed_22050_overlapped/train'

# Data normalization
We will use Z-Score to normalize the training, validation and test set by calculating the mean and the std deviation on the training set.

In [None]:
save_filename = './data/fma_small_stft_transposed_22050_overlapped/train_mean'
std_save_filename = './data/fma_small_stft_transposed_22050_overlapped/train_std_deviation'

## Calculation of mean and standard deviation (Long)

In [None]:
batch_size=1
total_n_batches = len(stft_train_dataset)/batch_size
train_loader = torch.utils.data.DataLoader(stft_train_dataset, batch_size=batch_size)
current_sum=0

#iter all the training set by batches and calculate the sum of all the sample values (513*128 values for each sample)
for batch_idx, batch in enumerate(train_loader):
    #print("batch",batch_idx,"/",total_n_batches,"current_sum:",current_sum)
    inputs = batch[0]
    labels = batch[1]
    #print("inputs: shape:",inputs.shape,"content:",inputs)
    #print("labels:",labels)
    for sample in inputs:
        #print("sample: shape",sample.shape,"content:",sample)
        current_sum += torch.sum(sample)
        #print("current_sum:",current_sum)
print("final sum:",current_sum)



In [None]:
mean = current_sum/(len(stft_train_dataset)*513*128) #divide the sum for the total number of values considerated
print("mean of training set:",mean)

print("Saving the mean in file:",save_filename) 
np.save(save_filename,mean)

In [None]:
#now let's calculate the standard deviation (squared root of the variance)

batch_size=1
total_n_batches = len(stft_train_dataset)/batch_size
train_loader = torch.utils.data.DataLoader(stft_train_dataset, batch_size=batch_size)
current_sum_of_squares = 0

for batch_idx, batch in enumerate(train_loader):
    #print("batch",batch_idx,"/",total_n_batches,"current_sum_of_squares:",current_sum_of_squares)
    inputs = batch[0]
    labels = batch[1]
    #print("inputs: shape:",inputs.shape,"content:",inputs)
    #print("labels:",labels)
    for sample in inputs:
        #print("sample shape",sample.shape)
        for row in sample:
            #print("row shape:",row.shape)
            for elem in row:
                #print("elem: shape",elem.shape,"content:",elem)
                difference = elem - mean
                difference_squared = difference**2
                current_sum_of_squares += difference_squared
                #print("current_sum:",current_sum)
print("final sum of squares:",current_sum_of_squares)


In [None]:
import math

variance = current_sum_of_squares/((len(stft_train_dataset) * 513 * 128)-1)
std_deviation = math.sqrt(variance)

print("variance:",variance)
print("std_deviation:",std_deviation)

print("Saving the std_deviation in file:",std_save_filename) 
np.save(std_save_filename,std_deviation)

## Load calculated mean and std deviation from file

In [None]:
loaded_mean = np.load(save_filename+'.npy')
print("loaded mean:",loaded_mean)

loaded_std = np.load(std_save_filename+'.npy')
print("loaded std:",loaded_std)

## Create the normalized dataset

In [None]:
from torchvision import transforms

batch_size = 1

transform = transforms.Compose([
    transforms.Normalize(mean= loaded_mean, std= loaded_std)
])

stft_train_dataset = Datasets.DatasetSTFT(stft_train_file_paths, Y_train,  transform = transform)
stft_validation_dataset = Datasets.DatasetSTFT(stft_validation_file_paths, Y_validation,  transform = transform)
stft_test_dataset = Datasets.DatasetSTFT(stft_test_file_paths, Y_test,  transform = transform)

## Plot a STFT spectrogram

In [None]:
batch_size=1
train_loader = torch.utils.data.DataLoader(stft_train_dataset, batch_size=batch_size)

#iter all the training set by batches and calculate the sum of all the sample values (513*128 values for each sample)
for batch_idx, batch in enumerate(train_loader):
    inputs = batch[0]
    labels = batch[1]
    print("inputs: shape:",inputs.shape,"content:",inputs)
    print("labels:",labels)
    for sample in inputs:
        print("sample: shape",sample.shape,"content:",sample)
        fig, ax = plt.subplots(dpi=500)
        img = librosa.display.specshow(librosa.amplitude_to_db(sample.T,ref=np.max), y_axis='log', x_axis='time', ax=ax)
        ax.set_xlabel('Time (s)')
        ax.set_ylabel('Frequency (Hz)')

        fig.colorbar(img, ax=ax, format="%+2.0f dB")
    if(batch_idx==4):
        break


# STFT Models

### NNet1

In [27]:
model_NNet1 = Models.NNet1()
summary(model_NNet1, (1, 128, 513))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 128, 125, 1]         262,784
       BatchNorm2d-2          [-1, 128, 125, 1]             256
              ReLU-3          [-1, 128, 125, 1]               0
         MaxPool2d-4           [-1, 128, 62, 1]               0
            Conv2d-5           [-1, 128, 59, 1]          65,664
       BatchNorm2d-6           [-1, 128, 59, 1]             256
              ReLU-7           [-1, 128, 59, 1]               0
         MaxPool2d-8           [-1, 128, 29, 1]               0
            Conv2d-9           [-1, 256, 26, 1]         131,328
      BatchNorm2d-10           [-1, 256, 26, 1]             512
        AvgPool2d-11            [-1, 256, 1, 1]               0
        MaxPool2d-12            [-1, 256, 1, 1]               0
          Flatten-13                  [-1, 512]               0
           Linear-14                  [

### NNet2

In [28]:
model_NNet2 = Models.NNet2()
summary(model_NNet2, (1, 128, 513))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 128, 125, 1]         262,784
       BatchNorm2d-2          [-1, 128, 125, 1]             256
            Conv2d-3          [-1, 128, 124, 3]          65,664
       BatchNorm2d-4          [-1, 128, 124, 3]             256
            Conv2d-5          [-1, 128, 125, 7]          65,664
       BatchNorm2d-6          [-1, 128, 125, 7]             256
         MaxPool2d-7               [-1, 1, 125]               0
         AvgPool2d-8               [-1, 1, 125]               0
            Linear-9                  [-1, 128]          32,128
          Dropout-10                  [-1, 128]               0
      BatchNorm1d-11                  [-1, 128]             256
           Linear-12                   [-1, 64]           8,256
          Dropout-13                   [-1, 64]               0
      BatchNorm1d-14                   

### NNet1_Small

In [29]:
model_NNet1_Small = Models.NNet1_Small()
summary(model_NNet1_Small, (1, 128, 513))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 128, 125, 1]         262,784
       BatchNorm2d-2          [-1, 128, 125, 1]             256
            Conv2d-3          [-1, 128, 124, 3]          65,664
       BatchNorm2d-4          [-1, 128, 124, 3]             256
            Conv2d-5          [-1, 128, 125, 7]          65,664
       BatchNorm2d-6          [-1, 128, 125, 7]             256
         MaxPool2d-7               [-1, 1, 125]               0
         AvgPool2d-8               [-1, 1, 125]               0
            Linear-9                  [-1, 128]          32,128
          Dropout-10                  [-1, 128]               0
      BatchNorm1d-11                  [-1, 128]             256
           Linear-12                   [-1, 64]           8,256
          Dropout-13                   [-1, 64]               0
      BatchNorm1d-14                   

# Hyperparameters

In [None]:
BATCH_SIZE=32
EPOCHS=10

learning_rate_list = [0.001,0.0001,0.00001]
reg_list=[0.001,0.0001,0.00001]

# Train function

In [14]:
def test(model,entries , true_labels,RGB=False):
    # Stop parameters learning
    model.eval()

    data_loader = torch.utils.data.DataLoader(entries)

    criterion = nn.CrossEntropyLoss()
    correct = 0
    total = 0
    total_loss = 0
    confusion_matrix = np.zeros((8, 8), dtype=int)

    with torch.no_grad():
        for sample, label in data_loader:
            
            if RGB==False:
                sample = sample.unsqueeze(1)

            # Predict label
            output = model(sample)
            # Compute loss
            loss = criterion(output, label)
            total_loss += loss.item()

            max_index = torch.argmax(output).item()  # The index with maximum probability

            confusion_matrix[label][max_index] += 1

            correct += (max_index == label)

    fig, ax = plt.subplots(dpi=500)
    cm=ConfusionMatrixDisplay(confusion_matrix=confusion_matrix)
    cm.plot(ax=ax)
    print(confusion_matrix)
    accuracy = 100 * correct / len(true_labels)
    average_loss = total_loss / len(true_labels)

    model.train()
    return accuracy, average_loss, confusion_matrix

In [18]:
def vote_test(model, entries, true_labels, RGB=False):
    #Stop parameters learning
    model.eval()
    
    data_loader = torch.utils.data.DataLoader(entries, batch_size=20)

    # Crea una funzione di perdita con pesi
    criterion = nn.CrossEntropyLoss()
    
    correct = 0
    total = 0
    total_loss = 0
    confusion_matrix = np.zeros((8,8 ), dtype=int)

    correct_maj=0
    
    
    with torch.no_grad():
        for inputs, labels in dat_loader:
            
            if(RGB==False):
                inputs=inputs.unsqueeze(1)
            #predict label
            outputs = model(inputs)
            
            #compute loss
            voting=outputs.mean(dim=0)
            voting=voting.unsqueeze(0)
            label=labels[0].unsqueeze(0)
            loss = criterion(voting, label)
            total_loss += loss.item()
            
            predicted= torch.argmax(voting)
            
            correct += (predicted == labels[0])
            confusion_matrix[label][predicted]+=1
            
            votes=[0,0,0,0,0,0,0,0]
       
            
    cm=ConfusionMatrixDisplay(confusion_matrix=confusion_matrix)
    cm.plot(dpi=500)
    print(confusion_matrix)
    accuracy = 100*correct / 800 
    average_loss = total_loss / 800

    model.train()
    return accuracy, average_loss, confusion_matrix

In [19]:
def train(model, dataset, batch_size, num_epochs, learning_rate, verbose = False, RGB=False, reg=1e-5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    val_loss_list=[]
    val_acc_list=[]
    train_loss_list=[]
    train_acc_list=[]
    counted_labels=[0,0,0,0,0,0,0,0]
    
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=reg)
    criterion = nn.CrossEntropyLoss()

    if not isinstance(dataset, Dataset):
        raise ValueError("The dataset parameter should be an instance of torch.utils.data.Dataset.")

    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    num_batches = len(data_loader)
    
    
    for epoch in range(num_epochs):
        running_loss = 0.0 
        running_accuracy = 0.0
        #initialize correctly predicted samples
        
        # Initialize the progress bar
        progress_bar = tq.tqdm(total=num_batches, unit="batch")
    
        # Initialize the progress bar description
        progress_bar.set_description(f"Epoch {epoch+1}/{num_epochs}")
        start_time = time.time()
        
        for batch_idx, batch in enumerate(data_loader):
            
            correct = 0 # reset train accuracy each batch
            
            inputs,labels = batch[0],batch[1]
            if(verbose == True):
                print("\ninputs shape:",inputs.size(),", dtype:",inputs.dtype," content: ",inputs)
                print("min value:",torch.min(inputs))
                print("max value:",torch.max(inputs))
                print("\nlabels shape:",labels.size(),",dtype:",labels.dtype,", content: ",labels)
            if(RGB==False):
                inputs = inputs.unsqueeze(1) #add a dimension if input is to be considered just grayscale
                #if input is RGB, there are already 3 channels
            
            # Extract the inputs and targets
            optimizer.zero_grad()
            outputs = model(inputs)
            
            if(verbose == True):
                print("\noutputs size:",outputs.size(),"content:",outputs)
                print("List of labels until now:",counted_labels)

            loss = criterion(outputs, labels) #labels need to be a vector of class indexes (0-7) of dim (batch_size)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
            #calculate train accuracy
            for index, output in enumerate(outputs):
                max_index = torch.argmax(output).item() #the index with maximum probability
                counted_labels[labels[index].item()]+=1
                if(labels[index].item() == max_index):
                    correct += 1
            
                if(verbose==True):
                    print("considering output at index {}:".format(index,output))
                    print("max output index = {}",max_index)
                    if(labels[index].item() == max_index):
                        print("correct! in fact labels[index] = {}, max_index = {}".format(labels[index].item(),max_index))
                    else:
                        print("NOT correct! in fact labels[index] = {}, max_index = {}".format(labels[index].item(),max_index))

            
            accuracy = 100 * correct / batch_size
            running_accuracy += accuracy #epoch running_accuracy
            
            # Update the progress bar description and calculate bps
            #progress_bar.set_postfix({"Loss": running_loss / (batch_idx + 1)})
            average_accuracy = running_accuracy / (batch_idx + 1)
            average_loss = running_loss / (batch_idx + 1)
            progress_bar.set_postfix({"avg_loss": average_loss, "acc": accuracy, "avg_acc": average_accuracy})

            # Update the progress bar
            progress_bar.update(1)
            # Evaluate the model on the validation dataset
        
        #calculate train loss and accuracy
        average_loss = running_loss / len(data_loader)
        average_accuracy = running_accuracy / len(data_loader)
        train_loss_list.append(average_loss)
        train_acc_list.append(average_accuracy)
        
        #calculate validation loss and accuracy
        val_acc, val_loss,_ = test(model, validation_dataset, Y_validation,RGB=RGB)
        val_loss_list.append(val_loss)
        val_acc_list.append(val_acc)
        
        
        print(f"Epoch [{epoch+1}/{num_epochs}],Train Loss: {average_loss:.4f}. Train Accuracy: {average_accuracy} Val Loss: {val_loss} Val Accuracy: {val_acc}")
        progress_bar.close()
    return train_loss_list, train_acc_list, val_loss_list, val_acc_list

# Grid Search on NNet1_Small

In [None]:
#TODO: Add batch_size optimization loop
save_directory="./results/NNet1_Small/"

lr_list= [ 0.0001]
r_list=[0.0001]

for i in lr_list:
    for j in r_list:
        if(j!=1e-5 and j!=1e-6):
            filename=save_directory+"lr_"+"0"+str(i).split(".")[1]+"_reg_"+"0"+str(j).split(".")[1]
        elif(j==1e-5):
            filename=save_directory+"lr_"+"0"+str(i).split(".")[1]+"_reg_"+"00001"
        else:
            filename=save_directory+"lr_"+"0"+str(i).split(".")[1]+"_reg_"+"000001"
        print(filename)
        model = Models.NNet1_Small()
        train_loss_list, train_acc_list, val_loss_list, val_acc_list =train(model, train_dataset, batch_size=128, num_epochs=10, learning_rate=i, reg=j)
        print("Trained with learning rate=",i," and with regularization term=",j)
        print("Loss:",val_loss_list)
        print("Accuracy:",val_acc_list)
        save_values=[val_loss_list,val_acc_list]
        np.savetxt(filename,save_values)

# Grid Search on NNet1

In [None]:
#TODO: Add batch_size optimization loop
save_directory="./results/NNet1_Small/"

lr_list= [ 0.0001]
r_list=[0.0001]

for i in lr_list:
    for j in r_list:
        if(j!=1e-5 and j!=1e-6):
            filename=save_directory+"lr_"+"0"+str(i).split(".")[1]+"_reg_"+"0"+str(j).split(".")[1]
        elif(j==1e-5):
            filename=save_directory+"lr_"+"0"+str(i).split(".")[1]+"_reg_"+"00001"
        else:
            filename=save_directory+"lr_"+"0"+str(i).split(".")[1]+"_reg_"+"000001"
        print(filename)
        model = Models.NNet1_Small()
        train_loss_list, train_acc_list, val_loss_list, val_acc_list =train(model, train_dataset, batch_size=128, num_epochs=10, learning_rate=i, reg=j)
        print("Trained with learning rate=",i," and with regularization term=",j)
        print("Loss:",val_loss_list)
        print("Accuracy:",val_acc_list)
        save_values=[val_loss_list,val_acc_list]
        np.savetxt(filename,save_values)

# Grid Search on NNet2

In [30]:
#TODO: Add batch_size optimization loop
save_directory="./results/NNet2/"

lr_list= [ 0.0001]
r_list=[0.0001]

for i in lr_list:
    for j in r_list:
        if(j!=1e-5 and j!=1e-6):
            filename=save_directory+"lr_"+"0"+str(i).split(".")[1]+"_reg_"+"0"+str(j).split(".")[1]
        elif(j==1e-5):
            filename=save_directory+"lr_"+"0"+str(i).split(".")[1]+"_reg_"+"00001"
        else:
            filename=save_directory+"lr_"+"0"+str(i).split(".")[1]+"_reg_"+"000001"
        print(filename)
        model = Models.NNet2()
        train_loss_list, train_acc_list, val_loss_list, val_acc_list =train(model, train_dataset, batch_size=128, num_epochs=10, learning_rate=i, reg=j)
        print("Trained with learning rate=",i," and with regularization term=",j)
        print("Loss:",val_loss_list)
        print("Accuracy:",val_acc_list)
        save_values=[val_loss_list,val_acc_list]
        np.savetxt(filename,save_values)

./results/NNet2/lr_00001_reg_00001


NameError: name 'NNet2' is not defined

# Normalization of raw audio

We calculate mean and std deviation

In [None]:
mean_save_filename_raw = './data/fma_small_raw_array_22050_overlapped/train_mean'
std_save_filename_raw = './data/fma_small_raw_array_22050_overlapped/train_std_deviation'

# Calculation of mean raw (Long)

In [None]:
raw_train_dataset = Datasets.DatasetRaw(raw_file_paths_train, Y_train)
batch_size=1
total_n_batches = len(raw_train_dataset)/batch_size
train_loader = torch.utils.data.DataLoader(raw_train_dataset, batch_size=batch_size)
current_sum=0

#iter all the training set by batches and calculate the sum of all the sample values (513*128 values for each sample)
for batch_idx, batch in enumerate(train_loader):
    if(batch_idx%1000==0):
        print("batch",batch_idx,"/",total_n_batches,"(",round((batch_idx/len(train_dataset)*100)),"%), current_sum:",current_sum)
    
    inputs = batch[0]
    labels = batch[1]
    #print("inputs: shape:",inputs.shape,"content:",inputs)
    #print("labels:",labels)
    for sample in inputs:
        #print("sample: shape",sample.shape,"content:",sample)
        current_sum += torch.sum(sample)
       
        #print("type of current_sum:",current_sum.dtype)
        #print("current_sum:",current_sum)
print("final sum:",current_sum)

In [None]:
print("current_sum",current_sum)
mean_raw = current_sum/(len(raw_train_dataset)*66150) #divide the sum for the total number of values considerated
print("mean of training set:",mean_raw)

print("Saving the mean in file:",mean_save_filename_raw) 
np.save(mean_save_filename_raw,mean_raw)

# Calculation of std deviation raw (Long)

In [None]:
#now let's calculate the standard deviation (squared root of the variance)

batch_size=1
total_n_batches = len(raw_train_dataset)/batch_size
train_loader = torch.utils.data.DataLoader(raw_train_dataset, batch_size=batch_size)
current_sum_of_squares = 0

for batch_idx, batch in enumerate(train_loader):
    if(batch_idx%1000==0):
        print("batch",batch_idx,"/",total_n_batches,round((batch_idx/len(train_dataset)*100)),"%, current_sum_of_squares:",current_sum_of_squares)
    inputs = batch[0]
    labels = batch[1]
    #print("inputs: shape:",inputs.shape,"content:\n",inputs)
    #print("labels:",labels)
    for elem in inputs:
        elem = elem.double() #convert to float64 for precise calculations
        #print("elem: shape",elem.shape,"content:\n",elem)
        difference = elem - mean_raw
        #print("difference: shape:",difference.shape,"content:\n", difference)
        difference_squared = difference**2
        #print("difference_squared: shape:",difference_squared.shape,"content:\n", difference_squared)
        current_sum_of_squares += torch.sum(difference_squared)
        #print("current_sum_of_squares:",current_sum_of_squares)
print("final sum of squares:",current_sum_of_squares)

In [None]:
import math

variance_raw = current_sum_of_squares/((len(train_dataset)*66150)-1)
std_deviation_raw = math.sqrt(variance)

print("variance raw:",variance)
print("std_deviation_raw:",std_deviation_raw)

print("Saving the std_deviation_raw in file:",std_save_filename_raw) 
np.save(std_save_filename_raw,std_deviation_raw)

# Load mean and std from file (fast)

In [None]:
loaded_mean_raw = np.load(mean_save_filename_raw+'.npy')
print("loaded mean:",loaded_mean_raw)

loaded_std_raw = np.load(std_save_filename_raw+'.npy')
print("loaded std:",loaded_std_raw)

## Create dataset for raw audio

The labels are the same as the STFT dataset (in the same order).

In [21]:
raw_folder_path="data/fma_small_raw_array_22050_overlapped"
    
Y_train, Y_validation, Y_test = create_dataset_splitted(raw_folder_path)
raw_train_folder = os.path.join(raw_folder_path,'train') # concatenate train folder to path
raw_validation_folder = os.path.join(raw_folder_path,'validation') # concatenate train folder to path
raw_test_folder = os.path.join(raw_folder_path,'test') # concatenate train folder to path

raw_train_file_paths, _ = get_sorted_file_paths(raw_train_folder)
raw_train_dataset = Datasets.DatasetRaw(raw_train_file_paths, Y_train)
print("len of train dataset: ",len(train_dataset))

raw_validation_file_paths, _ = get_sorted_file_paths(raw_validation_folder)
raw_validation_dataset = Datasets.DatasetRaw(raw_validation_file_paths, Y_validation)
print("len of validation dataset: ",len(raw_validation_dataset))

raw_test_file_paths, _ = get_sorted_file_paths(raw_test_folder)
raw_test_dataset = Datasets.DatasetRaw(raw_test_file_paths, Y_test)
print("len of test dataset: ",len(raw_test_dataset))

train_folder: data/fma_small_raw_array_22050_overlapped/train
validation_folder: data/fma_small_raw_array_22050_overlapped/validation
test_folder: data/fma_small_raw_array_22050_overlapped/test 

audio directory:  ./data/fma_small/
Loading tracks.csv...
small dataset shape: (8000, 52)
Track.csv: 6400 training samples, 800 validation samples, 800 test samples

there are 8 unique genres
Dictionary of genres created: {'Hip-Hop': 0, 'Pop': 1, 'Folk': 2, 'Rock': 3, 'Experimental': 4, 'International': 5, 'Electronic': 6, 'Instrumental': 7}


FileNotFoundError: [Errno 2] No such file or directory: 'data/fma_small_raw_array_22050_overlapped/train'

# Raw Model

In [31]:
RawModel=Models.NNet_Raw()
summary(RawModel, (1,66150))
print(MyModel)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         MaxPool1d-1              [-1, 1, 8268]               0
            Conv1d-2             [-1, 32, 8253]             544
              ReLU-3             [-1, 32, 8253]               0
       BatchNorm1d-4             [-1, 32, 8253]              64
         MaxPool1d-5             [-1, 32, 1031]               0
            Conv1d-6              [-1, 8, 1016]           4,104
              ReLU-7              [-1, 8, 1016]               0
       BatchNorm1d-8              [-1, 8, 1016]              16
         MaxPool1d-9                [-1, 8, 31]               0
           Linear-10                   [-1, 24]           5,976
             ReLU-11                   [-1, 24]               0
      BatchNorm1d-12                   [-1, 24]              48
          Dropout-13                   [-1, 24]               0
           Linear-14                   

## Plot raw sample

In [None]:
from torch import Tensor
batch_size=1
train_loader = torch.utils.data.DataLoader(raw_train_dataset, batch_size=batch_size)

#iter all the training set by batches and calculate the sum of all the sample values (513*128 values for each sample)
for batch_idx, batch in enumerate(train_loader):
    inputs = batch[0]
    labels = batch[1]
    print("inputs: shape:",inputs.shape,"content:",inputs)
    print("labels:",labels)
    for sample in inputs:
        print("sample: shape",sample.shape,"content:",sample)
        fig, ax = plt.subplots(dpi=300)
        librosa.display.waveshow(Tensor.numpy(sample.float()), sr=22150, ax=ax)
        ax.set(title='Envelope view, mono')
        ax.set_ylabel('Amplitude')
        ax.set_xlabel('Time (s)')
        ax.label_outer()
    if(batch_idx==4):
        break

# Grid Search for NNet_Raw

In [23]:
#TODO: Add batch_size optimization loop
save_directory="./results/NNet_Raw/"

lr_list= [ 0.0001]
r_list=[0.0001]

for i in lr_list:
    for j in r_list:
        if(j!=1e-5 and j!=1e-6):
            filename=save_directory+"lr_"+"0"+str(i).split(".")[1]+"_reg_"+"0"+str(j).split(".")[1]
        elif(j==1e-5):
            filename=save_directory+"lr_"+"0"+str(i).split(".")[1]+"_reg_"+"00001"
        else:
            filename=save_directory+"lr_"+"0"+str(i).split(".")[1]+"_reg_"+"000001"
        print(filename)
        model = Models.NNet_Raw()
        train_loss_list, train_acc_list, val_loss_list, val_acc_list =train(model, train_dataset, batch_size=128, num_epochs=10, learning_rate=i, reg=j)
        print("Trained with learning rate=",i," and with regularization term=",j)
        print("Loss:",val_loss_list)
        print("Accuracy:",val_acc_list)
        save_values=[val_loss_list,val_acc_list]
        np.savetxt(filename,save_values)

  0%|          | 0/125 [00:00<?, ?batch/s]

FileNotFoundError: [Errno 2] No such file or directory: 'data/fma_small_raw_array_22050_overlapped/validation/131978_16.npy'

# NN on echonest features 

As you can see below, unfortunately we cannot perform a train using echonest features (danceability, tempo, acousticness, ...) because there are only ►1300 tracks with echonest features which are not NaN.

In [None]:
print("opening csvs...")

tracks = utils.load('data/fma_metadata/tracks.csv')

echonest = utils.load('data/fma_metadata/echonest.csv')
echonest = echonest['echonest', 'audio_features']
small = tracks[tracks['set', 'subset'] <= 'small']

print("small dataset shape:",small.shape)
print("echonest csv shape (only audio features):",echonest.shape)

In [None]:
#select small dataset from echonest csv

track_ids = small.index.values.tolist()
print("Track ids shape:",len(track_ids),"content:",track_ids[:10],"...")
echonest_small = pd.DataFrame(echonest,index=track_ids)

ipd.display(echonest_small)

In [None]:
X_train_echonest = echonest_small.to_numpy(dtype=np.float16)
print("X_train_echonest: shape:",X_train_echonest.shape)

nan_rows = np.argwhere(np.isnan(X_train_echonest).all(axis=1))

print("There are",len(nan_rows),"rows containing NaN only as data")
#nan_rows = nan_rows.squeeze()
#for i in nan_rows:
#    print("row:",i,":",X_train_echonest[i])

# ResNet
We try to use transfer learning using a ResNet18 by torch

In [32]:
#Choose the model to use by changing the value of the following variable
fine_tuning=True

ResNetModel=Models.Model_ResNet18(pretrained=fine_tuning)
summary(ResNetModel, (3,128,513))
print(ResNetMyModel)

AttributeError: module 'Models' has no attribute 'Model_ResNet18'

In [None]:

transform_ResNet18 = transforms.Compose([
    transforms.Normalize(mean= [loaded_mean,loaded_mean,loaded_mean], std=[loaded_std,loaded_std,loaded_std]) #our values
    #transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) #ResNet18 specific values
])

rgb_train_dataset = Datasets.DatasetRGB(stft_train_file_paths, Y_train,  transform = transform)
rgb_validation_dataset = Datasets.DatasetRGB(stft_validation_file_paths, Y_validation,  transform = transform)
rgb_test_dataset = Datasets.DatasetRGB(stft_test_file_paths, Y_test,  transform = transform)

train_data_loader = DataLoader(rgb_train_dataset, batch_size = 10, shuffle=True)



# Training ResNet model

In [None]:
#train the model

train_loss_list, train_acc_list, val_loss_list, val_acc_list =train(ResNetModel, rgb_train_dataset, batch_size=64, num_epochs=10, learning_rate=0.001,verbose=False, RGB=True)

# Example of "Best Model" Training

In [None]:
save_directory="./best_models/"

learning_rate_list = [0.001]
reg_list=[0.0001]
epochs=9

for i in learning_rate_list:
    for j in reg_list:
        filename=save_directory+"results/NNet_Raw"        
        print(filename)
        model = Models.NNet_Raw()
        train_loss_list, train_acc_list, val_loss_list, val_acc_list =train(model, raw_train_dataset, batch_size=64, num_epochs=epochs, learning_rate=i, reg=j)
        print("Trained with learning rate=",i," and with regularization term=",j)
        torch.save(model.state_dict(), save_directory+"best_models/NNet_Raw")

# Example of Models Testing

In [None]:
model_path="./best_models/best_models/"
model_name="NNet_Raw"

test_model=Models.NNet1()
test_model.load_state_dict(torch.load(model_path+model_name), strict=False)

print(test(test_model,raw_train_dataset, Y_train))



# Results Loader

In [None]:
def find_best_value(values,loss):
    if loss==True:
        max_v=100
    else:
        max_v=0
    index=-1
    for i in range(len(values)):
        if loss==True:
            if values[i]<max_v:
                max_v=values[i]
                index=i+1
        else:
            if values[i]>max_v:
                max_v=values[i]
                index=i+1
    return max_v,index
            

In [None]:
res_directory="./results/"
models=os.listdir(res_directory) 
for i in models:
    print(i)
    best_loss=100
    best_acc=0
    best_loss_ep=0
    best_acc_ep=0
    model_folder=res_directory+i
    trials=os.listdir(model_folder)
    if(len(trials)==0):
        continue
    best_trials=[(best_loss,best_loss_ep),(best_acc,best_acc_ep)]
    best_trials_names=['','']
    for j in trials:
        print(j)
        res=np.loadtxt(model_folder+"/"+j)
        loss,epoch_l=find_best_value(res[0],True)
        accuracy,epoch_a=find_best_value(res[1],False)
        if(loss<best_loss):
            best_trials_names[0]=j
            best_loss=loss
            best_loss_ep=epoch_l
            best_trials[0]=(best_loss,best_loss_ep)
        if(accuracy>best_acc):
            best_trials_names[1]=j
            best_acc=accuracy
            best_acc_ep=epoch_a
            best_trials[1]=(best_acc,best_acc_ep)
            
    print("Model:",i)
    print("Best Model for accuracy:",best_trials_names[1])
    print("Value:",best_trials[1][0],"Epoch:",best_trials[1][1])
    print("Best Model for loss:",best_trials_names[0])
    print("Value:",best_trials[0][0],"Epoch:",best_trials[0][1])
    print("")
    
print(models)

In [None]:
#plot the first three best models train vs val loss in 10 epochs (ResNet f.t., Ensamble l.w., NNet1_Small) (in best_models/results)

data_ResNet = np.loadtxt('./best_models/results/ResNet18_Reduced_FineTuning')
print("\nResNet18_Reduced_FineTuning:\n",data_ResNet)

data_Ensemble_Weights = np.loadtxt('./best_models/results/Ensemble_Weights')
print("\nEnsemble_Weights:\n",data_Ensemble_Weights)

data_NNet1_Small = np.loadtxt('./best_models/results/NNet1_Small')
print("\nNNet1_Small:\n",data_NNet1_Small)

print("asdasd\n",data_ResNet[0])

# Epochs (assuming you have 10 epochs)
epochs = np.arange(1, 11)

# Create a figure and axis
fig, ax = plt.subplots(dpi=500)

# Plot train loss and validation accuracy for each model
ax.plot(epochs, data_ResNet[0], label='ResNet (f.t) Train Loss', color='blue', linestyle='dashed')
ax.plot(epochs, data_ResNet[2], label='ResNet (f.t.) Validation Loss', color='blue', linestyle='solid')

ax.plot(epochs, data_Ensemble_Weights[0], label='Ensemble Weights Train Loss', color='green', linestyle='dashed')
ax.plot(epochs, data_Ensemble_Weights[2], label='Ensemble Weights Validation Loss', color='green', linestyle='solid')

ax.plot(epochs, data_NNet1_Small[0], label='NNet1 Small Train Loss', color='red', linestyle='dashed')
ax.plot(epochs, data_NNet1_Small[2], label='NNet1 Small Validation Loss', color='red', linestyle='solid')

# Add labels and title
ax.set_xlabel('Epochs')
ax.set_ylabel('Loss')
ax.grid(True)

# Place the legend below the plot
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), fancybox=True, shadow=True, ncol=2)




# Show the plot
plt.show()