In [1]:
import math
import os.path

import keras
import librosa.feature
import sklearn

import pandas
import numpy
from matplotlib import pyplot as plt


Using TensorFlow backend.


In [2]:
def load_dataset(path):
    metadata_path = os.path.join(path, 'metadata/UrbanSound8K.csv')
    samples = pandas.read_csv(metadata_path)
    return samples

dataset_path = '../../data/UrbanSound8K/'
def sample_path(sample):
    return os.path.join(dataset_path, 'audio', 'fold'+str(sample.fold), sample.slice_file_name)

data = load_dataset(dataset_path)
data.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


In [16]:
def feature_extract(y, sr, n_mels=32, n_fft=512, hop_length=256):
    mels = librosa.feature.melspectrogram(y, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
    log_mels = librosa.core.power_to_db(mels, top_db=80, ref=numpy.max)
    return log_mels
    
def silence_feature(bands, length):
    return numpy.full((bands, length), -0)
    
def test_silence_feature():
    sr = 16000
    mels = 32
    
    length = int(0.1*sr)
    silence = numpy.zeros(length) + numpy.random.normal(0.0, 1e-6, size=length)
    f = feature_extract(silence, sr, n_mels=mels)
    silent_f = silence_feature(mels, f.shape[1])
    numpy.testing.assert_equal(f, silent_f)

test_silence_feature()

In [45]:
def settings_id(settings, feature='feature'):
    keys = sorted(settings.keys())
    settings_str = ','.join([ "{}={}".format(k, str(settings[k])) for k in keys ])
    return feature + ':' + settings_str
        
settings = dict(
    samplerate=16000,
    n_mels=32,
    fmin=0,
    fmax=8000,
    n_fft=512,
    hop_length=256,
)

settings_id(settings, feature='mels')

'mels:fmax=8000,fmin=0,hop_length=256,n_fft=512,n_mels=32,samplerate=16000'

In [73]:
def compute_mels(filepath, settings):
    y, sr = librosa.load(filepath, sr=settings['samplerate'])
    from librosa.feature import melspectrogram 
    mels = melspectrogram(y, sr=sr,
                         n_mels=settings['n_mels'],
                         n_fft=settings['n_fft'],
                         hop_length=settings['hop_length'],
                         fmin=settings['fmin'],
                         fmax=settings['fmax'])
    return mels

import joblib

def precompute(samples, settings, out_dir, n_jobs=8, verbose=1, force=False):
    out_folder = os.path.join(out_dir, settings_id(settings, feature='mels'))
    if not os.path.exists(out_folder):
        os.makedirs(out_folder)

    def compute(inp, outp):
        if os.path.exists(outp) and not force:
            return outp

        f = compute_mels(inp, settings)
        numpy.savez(outp, f)
        return outp
    
    def job_spec(sample):
        path = sample_path(sample)
        tokens = path.split(os.sep)
        filename = tokens[-1]
        filename = filename.replace('.wav', '.npz')
        out_fold = os.path.join(out_folder, tokens[-2])
        if not os.path.exists(out_fold):
            os.makedirs(out_fold)
        
        out_path = os.path.join(out_fold, filename)
        return path, out_path
        
    jobs = [joblib.delayed(compute)(*job_spec(sample)) for _, sample in samples.iterrows()]
    feature_files = joblib.Parallel(n_jobs=n_jobs, verbose=verbose)(jobs) 
        

precompute(data, settings, out_dir='./pre', force=True)

[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done 148 tasks      | elapsed:   12.1s
[Parallel(n_jobs=8)]: Done 346 tasks      | elapsed:   28.4s
[Parallel(n_jobs=8)]: Done 596 tasks      | elapsed:   58.0s
[Parallel(n_jobs=8)]: Done 946 tasks      | elapsed:  1.8min
[Parallel(n_jobs=8)]: Done 1396 tasks      | elapsed:  2.4min
[Parallel(n_jobs=8)]: Done 1946 tasks      | elapsed:  3.6min
[Parallel(n_jobs=8)]: Done 2596 tasks      | elapsed:  4.5min
[Parallel(n_jobs=8)]: Done 3346 tasks      | elapsed:  6.2min
[Parallel(n_jobs=8)]: Done 4196 tasks      | elapsed:  7.4min
[Parallel(n_jobs=8)]: Done 5146 tasks      | elapsed:  9.4min
[Parallel(n_jobs=8)]: Done 6196 tasks      | elapsed: 11.1min
[Parallel(n_jobs=8)]: Done 7346 tasks      | elapsed: 13.1min
[Parallel(n_jobs=8)]: Done 8622 tasks      | elapsed: 15.1min
[Parallel(n_jobs=8)]: Done 8732 out of 8732 | elapsed: 15.3min finished


In [21]:
(1024/44.100)

23.219954648526077

In [22]:
(256)/16.000

16.0