In [1]:
from os import walk
from joblib import Parallel, delayed
import librosa
import numpy as np
import multiprocessing
import json
from deep_audio import Directory, Audio, JSON

In [2]:
num_cores = multiprocessing.cpu_count()

sampling_rate = 22050

path = f'audios/{sampling_rate}/'

f = Directory.filenames(path)

data = {
    "mapping": [],
    "data": [],
    "labels": []
}

In [3]:
def process_directory(dir, index):
    signal, sr = Audio.read(f'{path}/{dir}')

    signal = np.array(signal)

    signal = signal[:len(signal) - len(signal) % (sr * 5)]

    segments = len(signal) // (sr * 5)

    m = {
        "data": [],
        "labels": [index] * segments
    }

    for i in range(segments):
        start_sample = sr * i * 5
        finish_sample = start_sample + (sr * 5)
        sample = signal[start_sample:finish_sample]

        mfcc = librosa.feature.mfcc(sample, sr=sr, n_mfcc=13, hop_length=512, n_fft=2048, lifter=22)
        lpcc = Audio.lpcc(sample)

        data = np.concatenate((mfcc.T, lpcc), axis=0)

        m['data'].append(data.tolist())

    print(f'{dir} -> segments: {segments}')
    return m

In [4]:
def object_data_to_json(m):
    data['mapping'] = [file.replace('.wav', '') for file in f]

    for i in m:
        data['data'].extend(i['data'])
        data['labels'].extend(i['labels'])

    JSON.create_json_file(f'processed/combined/data_{sampling_rate}.json', data)

In [5]:
if __name__ == '__main__':
    m = Parallel(n_jobs=num_cores, verbose=len(f))(
        delayed(process_directory)(i, j) for j, i in enumerate(f) if i is not None)
    object_data_to_json(m)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:   11.1s
[Parallel(n_jobs=4)]: Done   2 tasks      | elapsed:   11.5s
[Parallel(n_jobs=4)]: Done   3 tasks      | elapsed:   11.7s
[Parallel(n_jobs=4)]: Done   4 tasks      | elapsed:   13.4s
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:   19.7s
[Parallel(n_jobs=4)]: Done   6 tasks      | elapsed:   20.1s
[Parallel(n_jobs=4)]: Done   7 tasks      | elapsed:   21.4s
[Parallel(n_jobs=4)]: Done   8 tasks      | elapsed:   22.5s
[Parallel(n_jobs=4)]: Done   9 tasks      | elapsed:   28.7s
[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:   30.6s
[Parallel(n_jobs=4)]: Done  11 tasks      | elapsed:   31.0s
[Parallel(n_jobs=4)]: Done  12 tasks      | elapsed:   33.8s
[Parallel(n_jobs=4)]: Done  13 tasks      | elapsed:   37.4s
[Parallel(n_jobs=4)]: Done  14 tasks      | elapsed:   42.3s
[Parallel(n_jobs=4)]: Done  15 tasks      | elapsed:   42.5s
[Parallel(