## dataset preparations - 03/01/2021 

The dataset must be prepared according to the input requirements to the model. the model should then be designed to take spectrograms with dimensions $16*128$.

several steps should be taken,

we would like the samples from each class to be stored in separate folders, it is worth considering what folder structure should be used.

each data sample should be converted into a spectrogram and stored in a json file. 

This makes things easier down the line as when training we can just read the json file for the spectrograms. this is much faster than if it is required to read the audio, convert them into a spectrogram during training. 



In [6]:
import librosa 
import os 
import json 
from tqdm import tqdm 




def prepare_dataset(n_mfcc = 128,n_mels= 128, hop_length= 520, n_fft =2048, train = True, f_max = 1024, mode = 0):
    modes = ["train","test","various"]
    
    mode_string = modes[mode]
    dataset_path = "./data/synthetic/"+mode_string+"_sounds/"
    json_path = "./data/synthetic/"+mode_string+"_data.json"

    
    
    
    data = {
        "mappings" : [],
        "labels" : [],
        "data":[],
        "files":[]
    }
    
    print(dataset_path)

    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        category = dirpath.split("/")[-1]
        data["mappings"].append(category)
        
        for j in range(len(filenames)):
            f = filenames[j]
            fpath = os.path.join(dirpath,f)
            audio, sr = librosa.load(fpath)
            
            mel = librosa.feature.melspectrogram(audio, n_mels = n_mels, hop_length = hop_length, n_fft = n_fft, fmax = f_max)
            data["labels"].append(i-1)
            data["data"].append([librosa.power_to_db(mel).tolist()])
            data["files"].append(fpath)
        if train: 
            if i ==1:
                break 
                
    
    with open(json_path,"w") as fp: 
        json.dump(data, fp, indent=4)
        
    return 

prepare_dataset(mode =2)

./data/synthetic/various_sounds/
