In [15]:
import librosa
import os
import json
import numpy as np

DATASET_PATH = "dataset"
JSON_PATH = "denoised_data.json"
SAMPLES_TO_CONSIDER = 22050 # 1 sec. of audio


def preprocess_dataset(dataset_path, json_path, num_mfcc=13, n_fft=512, hop_length=512):
    """Extracts MFCCs from sound dataset and saves them into a json file.

    :param dataset_path (str): Path to dataset
    :param json_path (str): Path to json file used to save MFCCs
    :param num_mfcc (int): Number of coefficients to extract
    :param n_fft (int): Interval we consider to apply FFT. Measured in # of samples
    :param hop_length (int): Sliding window for FFT. Measured in # of samples
    :return:
    """

    # dictionary where we'll store mapping, labels, MFCCs and filenames
    data = {
        "mapping": [],
        "labels": [],
        "MFCCs": [],
        "files": []
    }

    # loop through all sub-dirs
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

        # ensure we're at sub-folder level / s'assurer que nous sommes au niveau du sous-dossier
        if dirpath is not dataset_path:

            # save label (i.e., sub-folder name) in the mapping
            label = dirpath.split("/")[-1]
            data["mapping"].append(label)
            print("\nProcessing: '{}'".format(label))

            # process all audio files in sub-dir and store MFCCs
            for f in filenames:
                file_path = os.path.join(dirpath, f)

                # load audio file and slice it to ensure length consistency among different files
                signal, sample_rate = librosa.load(file_path)
                
                # drop audio files with less than pre-decided number of samples
                if len(signal) >= SAMPLES_TO_CONSIDER:
                    
                    # ensure consistency of the length of the signal
                    signal = signal[:SAMPLES_TO_CONSIDER]
                    
                    short_f=librosa.stft(signal,n_fft=512,hop_length=512)
                    short_mag=np.abs(short_f)
                    thres=short_mag.max()/8
                    short_mag[short_mag<thres]=0
                    denoised_signal=librosa.istft(short_mag,length=len(signal))
                    # extract MFCCs
                    MFCCs = librosa.feature.mfcc(signal, sample_rate, n_mfcc=num_mfcc, n_fft=n_fft,
                                                 hop_length=hop_length)

                    # store data for analysed track
                    data["MFCCs"].append(MFCCs.T.tolist())
                    data["labels"].append(i-1)
                    data["files"].append(file_path)
                    print("{}: {}".format(file_path, i-1))

    # save data in json file
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)


if __name__ == "__main__":
    preprocess_dataset(DATASET_PATH, JSON_PATH)



Processing: 'dataset\deideid'
dataset\deideid\d1.wav: 0
dataset\deideid\d10.wav: 0
dataset\deideid\d100.wav: 0
dataset\deideid\d101.wav: 0
dataset\deideid\d102.wav: 0
dataset\deideid\d103.wav: 0
dataset\deideid\d104.wav: 0
dataset\deideid\d105.wav: 0
dataset\deideid\d106.wav: 0
dataset\deideid\d107.wav: 0
dataset\deideid\d108.wav: 0
dataset\deideid\d109.wav: 0
dataset\deideid\d11.wav: 0
dataset\deideid\d110.wav: 0
dataset\deideid\d111.wav: 0
dataset\deideid\d112.wav: 0
dataset\deideid\d113.wav: 0
dataset\deideid\d114.wav: 0
dataset\deideid\d115.wav: 0
dataset\deideid\d116.wav: 0
dataset\deideid\d117.wav: 0
dataset\deideid\d118.wav: 0
dataset\deideid\d119.wav: 0
dataset\deideid\d12.wav: 0
dataset\deideid\d120.wav: 0
dataset\deideid\d121.wav: 0
dataset\deideid\d122.wav: 0
dataset\deideid\d123.wav: 0
dataset\deideid\d124.wav: 0
dataset\deideid\d125.wav: 0
dataset\deideid\d126.wav: 0
dataset\deideid\d127.wav: 0
dataset\deideid\d128.wav: 0
dataset\deideid\d129.wav: 0
dataset\deideid\d13.wa

 -0.00054932], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -0.00128174], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  2.9256335e-03  3.0298969e-03], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  3.4973145e-02 -8.6364746e-03], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 2.5329590e-03], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 1.2207031e-03], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 7.3242188e-04], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -1.4038086e-03 -5.7983398e-04], sr=22050 as keyword args. From version 0.10 passing these as positional argume

dataset\deideid\d136.wav: 0
dataset\deideid\d137.wav: 0
dataset\deideid\d138.wav: 0
dataset\deideid\d139.wav: 0
dataset\deideid\d14.wav: 0
dataset\deideid\d140.wav: 0
dataset\deideid\d142.wav: 0
dataset\deideid\d144.wav: 0
dataset\deideid\d146.wav: 0
dataset\deideid\d147.wav: 0
dataset\deideid\d148.wav: 0
dataset\deideid\d149.wav: 0
dataset\deideid\d15.wav: 0
dataset\deideid\d150.wav: 0
dataset\deideid\d151.wav: 0
dataset\deideid\d152.wav: 0
dataset\deideid\d153.wav: 0
dataset\deideid\d154.wav: 0
dataset\deideid\d16.wav: 0
dataset\deideid\d17.wav: 0
dataset\deideid\d18.wav: 0
dataset\deideid\d19.wav: 0
dataset\deideid\d2.wav: 0
dataset\deideid\d20.wav: 0
dataset\deideid\d21.wav: 0
dataset\deideid\d22.wav: 0
dataset\deideid\d23.wav: 0
dataset\deideid\d24.wav: 0
dataset\deideid\d25.wav: 0
dataset\deideid\d26.wav: 0
dataset\deideid\d27.wav: 0
dataset\deideid\d28.wav: 0
dataset\deideid\d29.wav: 0
dataset\deideid\d3.wav: 0
dataset\deideid\d30.wav: 0
dataset\deideid\d31.wav: 0
dataset\deidei

  1.4526367e-02  1.4404297e-02], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  1.7822266e-02  1.6418457e-02], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -9.1552734e-05  1.8310547e-04], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  0.03799438], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -0.01300049], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  0.00216675], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  1.8615723e-03  6.1035156e-05], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -0.00064087], sr=22050 as keyword args. From version 0.10 passing these as posit

dataset\deideid\d92.wav: 0
dataset\deideid\d93.wav: 0
dataset\deideid\d94.wav: 0
dataset\deideid\d95.wav: 0
dataset\deideid\d96.wav: 0
dataset\deideid\d97.wav: 0


  3.8535953e-03  4.6904492e-03], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  1.4012385e-02  1.6915938e-02], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  1.9405365e-02  2.0187655e-02], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  5.5901562e-03  4.6280799e-03], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  1.56833269e-02  1.64018609e-02], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error


dataset\deideid\d98.wav: 0
dataset\deideid\d99.wav: 0

Processing: 'dataset\waaw'
dataset\waaw\w1.wav: 1
dataset\waaw\w10.wav: 1
dataset\waaw\w100.wav: 1
dataset\waaw\w101.wav: 1
dataset\waaw\w102.wav: 1
dataset\waaw\w103.wav: 1
dataset\waaw\w104.wav: 1
dataset\waaw\w105.wav: 1
dataset\waaw\w106.wav: 1
dataset\waaw\w107.wav: 1
dataset\waaw\w108.wav: 1
dataset\waaw\w109.wav: 1
dataset\waaw\w11.wav: 1
dataset\waaw\w110.wav: 1
dataset\waaw\w111.wav: 1
dataset\waaw\w112.wav: 1
dataset\waaw\w113.wav: 1
dataset\waaw\w114.wav: 1
dataset\waaw\w115.wav: 1
dataset\waaw\w116.wav: 1
dataset\waaw\w117.wav: 1
dataset\waaw\w118.wav: 1
dataset\waaw\w119.wav: 1
dataset\waaw\w12.wav: 1
dataset\waaw\w120.wav: 1
dataset\waaw\w121.wav: 1
dataset\waaw\w122.wav: 1
dataset\waaw\w123.wav: 1
dataset\waaw\w124.wav: 1
dataset\waaw\w125.wav: 1
dataset\waaw\w126.wav: 1
dataset\waaw\w127.wav: 1
dataset\waaw\w128.wav: 1
dataset\waaw\w129.wav: 1
dataset\waaw\w13.wav: 1
dataset\waaw\w130.wav: 1
dataset\waaw\w131.wav: 1

  5.2105736e-02  5.4563820e-02], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -0.00274658], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -7.4462891e-03 -6.8664551e-03], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -1.6979980e-01 -9.4879150e-02], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -8.5449219e-04 -5.4931641e-04], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 1.0833740e-02], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 1.2512207e-03], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 2.3803711e-03], sr=22050 as keyword args. From version 0.10 passing these as

dataset\waaw\w14.wav: 1
dataset\waaw\w141.wav: 1
dataset\waaw\w142.wav: 1
dataset\waaw\w144.wav: 1
dataset\waaw\w145.wav: 1
dataset\waaw\w146.wav: 1
dataset\waaw\w147.wav: 1
dataset\waaw\w148.wav: 1
dataset\waaw\w149.wav: 1
dataset\waaw\w15.wav: 1
dataset\waaw\w150.wav: 1
dataset\waaw\w151.wav: 1
dataset\waaw\w152.wav: 1
dataset\waaw\w153.wav: 1
dataset\waaw\w154.wav: 1
dataset\waaw\w155.wav: 1
dataset\waaw\w16.wav: 1
dataset\waaw\w17.wav: 1
dataset\waaw\w18.wav: 1
dataset\waaw\w19.wav: 1
dataset\waaw\w2.wav: 1
dataset\waaw\w20.wav: 1
dataset\waaw\w21.wav: 1
dataset\waaw\w22.wav: 1
dataset\waaw\w23.wav: 1
dataset\waaw\w24.wav: 1
dataset\waaw\w25.wav: 1
dataset\waaw\w26.wav: 1
dataset\waaw\w27.wav: 1
dataset\waaw\w28.wav: 1
dataset\waaw\w29.wav: 1
dataset\waaw\w3.wav: 1
dataset\waaw\w30.wav: 1
dataset\waaw\w31.wav: 1
dataset\waaw\w32.wav: 1
dataset\waaw\w33.wav: 1
dataset\waaw\w34.wav: 1
dataset\waaw\w35.wav: 1
dataset\waaw\w36.wav: 1
dataset\waaw\w37.wav: 1
dataset\waaw\w38.wav: 1
data

 -0.00012207], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -1.5380859e-02 -1.6906738e-02], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -4.4525146e-02 -4.6539307e-02], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -1.8267822e-01 -1.3366699e-01], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -2.8015137e-02 -2.9022217e-02], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -3.0517578e-05  1.2207031e-04], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  0.00180054], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  0.05187988], sr=22050 as keyword args. From version 0.10 pass

dataset\waaw\w53.wav: 1
dataset\waaw\w54.wav: 1
dataset\waaw\w55.wav: 1
dataset\waaw\w56.wav: 1
dataset\waaw\w57.wav: 1
dataset\waaw\w58.wav: 1
dataset\waaw\w59.wav: 1
dataset\waaw\w6.wav: 1
dataset\waaw\w60.wav: 1
dataset\waaw\w61.wav: 1
dataset\waaw\w62.wav: 1
dataset\waaw\w63.wav: 1
dataset\waaw\w64.wav: 1
dataset\waaw\w65.wav: 1
dataset\waaw\w66.wav: 1
dataset\waaw\w67.wav: 1
dataset\waaw\w68.wav: 1
dataset\waaw\w69.wav: 1
dataset\waaw\w7.wav: 1
dataset\waaw\w70.wav: 1
dataset\waaw\w71.wav: 1
dataset\waaw\w72.wav: 1
dataset\waaw\w73.wav: 1
dataset\waaw\w74.wav: 1
dataset\waaw\w75.wav: 1
dataset\waaw\w76.wav: 1
dataset\waaw\w77.wav: 1
dataset\waaw\w78.wav: 1
dataset\waaw\w79.wav: 1
dataset\waaw\w8.wav: 1
dataset\waaw\w80.wav: 1
dataset\waaw\w81.wav: 1
dataset\waaw\w82.wav: 1
dataset\waaw\w83.wav: 1
dataset\waaw\w84.wav: 1
dataset\waaw\w85.wav: 1
dataset\waaw\w86.wav: 1
dataset\waaw\w87.wav: 1
dataset\waaw\w88.wav: 1
dataset\waaw\w89.wav: 1
dataset\waaw\w9.wav: 1
dataset\waaw\w90.wav

 -1.5045166e-02 -1.3702393e-02], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  0.00134277], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -0.00289917], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -1.2716675e-01 -7.8399658e-02], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -0.00036621], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -0.03009033], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  3.3569336e-04  3.9672852e-04], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  0.00616455], sr=22050 as keyword args. From version 0.10 passing these as positional arguments wi

dataset\waaw\w92.wav: 1
dataset\waaw\w93.wav: 1
dataset\waaw\w94.wav: 1
dataset\waaw\w95.wav: 1
dataset\waaw\w96.wav: 1


 -2.5571932e-03 -7.1042590e-04], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  1.2301694e-03  1.1075159e-03], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -7.68841524e-03 -6.65256241e-03], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -6.1170547e-03 -1.7570830e-03], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -7.0511065e-02 -5.5697653e-02], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
 -9.8850648e-04  4.5485341e-04], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error


dataset\waaw\w97.wav: 1
dataset\waaw\w98.wav: 1
dataset\waaw\w99.wav: 1


 -5.5828192e-03 -7.0258207e-03], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
