In [1]:
import librosa.display
import IPython.display as lpd # Listening the audio
import matplotlib.pyplot as plt # Plotting coefficients
import numpy as np
from os import walk
from os import path
from prep_functions import extract_features, preprocess_labels # Our preprocessing functions
import time


## Create Training and Validation DataSets

A song is split in several windows so we have for each song a dataset of m windows : <br> 
> (_m_, _window_, _mfccs_, 1 )

Since we are going to feed the model each time with a single song we may think to just preprocess just a single song for each time the model will be fed, but when iterating through songs this will make training slower, so we need to have the whole training (and validation) set preprocessed before training

In [2]:
def create_dataset(audio_dir_path, tags_dir_path, outputdir_audio, outputdir_tags, resolution="Mid", context= 16,smear=1.5,  verbose=False):

    outputdir_original_tags = outputdir_tags + "/original"
    outputdir_smeared_tags = outputdir_tags + "/smeared"
    sufix_audio = resolution + "_" + str(context)
    if str(smear).find("."):
        sufix_labels = sufix_audio + "_" + str(smear).replace(".","p")
    else:
        sufix_labels = sufix_audio + "_" + str(smear)

    tic = time.time() 
    
    for (dirpath, dirnames, filenames) in walk(path.join(audio_dir_path,'.')):
        if len(filenames) > 0 and dirpath == audio_dir_path + '\.':
            for file in filenames:
                # Get path from a song/tag file
                audio_path = path.join(audio_dir_path,file)
                txt_path = path.join(tags_dir_path,file[:-4] + '.txt')

                #Call to preprocess functions
                print(f"Preprocessing song : {file} ... \n ")
                x_features, n_frames_extracted_features = extract_features(audio_path, n_mels=80, resolution=resolution , context=context, debug=verbose)
                y_original_labels, y_smeared_labels = preprocess_labels(txt_path, n_frames_extracted_features, resolution=resolution, smear=smear, debug=verbose)

                # Saving features
                np.save(
                    path.join(outputdir_audio,file[:-4] + '_X_' + sufix_audio),
                    x_features
                    )
                # Original labels array
                np.save(
                    path.join(outputdir_original_tags,file[:-4] + '_oY_' + sufix_labels ),
                    y_original_labels
                    )
                # Smeared labels array
                np.save(
                    path.join(outputdir_smeared_tags,file[:-4] + '_sY_' + sufix_labels ),
                    y_smeared_labels
                    )
                
                if verbose:
                    print("Song successfully processed and saved!\n ")
                    print("-------------------------------------------------------------------\n")
    
    toc = time.time()
    print(f"Directory completed. Process time {round(toc-tic)} seconds. \n")


In [None]:
# create datasets
create_dataset("../Data/Audio/Test", "../Data/Tags/Test", "../Data/Audio/Test/Features/Mid_16", "../Data/Tags/Test/Arrays/Mid_16", resolution="Mid", context=16, smear = 1,  verbose=True)

Preprocessing song : 04.  Lena & Nico Santos  -  Better.mp3 ... 
 
Y labels shape: (1429, 1) 

Positive labels without Target smearing: 
 Total: 1429
    Positive: 17 (1.19% of total)

Positive labels with Target smearing: 
 Total: 1429
    Positive: 119 (8.33% of total)

Song successfully processed and saved!
 
-------------------------------------------------------------------

Preprocessing song : 06.  Dominic Fike  -  3 Nights.mp3 ... 
 
Y labels shape: (1276, 1) 

Positive labels without Target smearing: 
 Total: 1276
    Positive: 28 (2.19% of total)

Positive labels with Target smearing: 
 Total: 1276
    Positive: 196 (15.36% of total)

Song successfully processed and saved!
 
-------------------------------------------------------------------

Preprocessing song : 08.  Billie Eilish  -  Bad Guy.mp3 ... 
 
Y labels shape: (1394, 1) 

Positive labels without Target smearing: 
 Total: 1394
    Positive: 28 (2.01% of total)

Positive labels with Target smearing: 
 Total: 1394
    