Skip to content
Permalink
Browse files

signal: Add general FFT filterbank function

  • Loading branch information...
jonnor committed Mar 1, 2019
1 parent ba9fc2a commit 38dd3a4a3b295d3fdb8db42720caad7baaca2848
Showing with 154 additions and 1 deletion.
  1. +26 −1 bindings/eml_audio.cpp
  2. +1 −0 emlearn/__init__.py
  3. +36 −0 emlearn/eml_audio.h
  4. +45 −0 emlearn/signal.py
  5. +46 −0 test/test_audio.py
@@ -11,6 +11,8 @@
#include <eml_fft.h>
#include <eml_audio.h>

#include "emlpy_common.hpp"

namespace py = pybind11;


@@ -54,6 +56,7 @@ rfft_py(py::array_t<float, py::array::c_style | py::array::forcecast> in) {
return ret;
}


py::array_t<float>
melfilter_py(py::array_t<float, py::array::c_style | py::array::forcecast> in,
int samplerate, int n_fft, int n_mels, float fmin, float fmax
@@ -130,6 +133,28 @@ melspectrogram_py(py::array_t<float, py::array::c_style | py::array::forcecast>
}


py::array_t<float>
sparse_filterbank_py(py::array_t<float, py::array::c_style | py::array::forcecast> in,
py::array_t<int, py::array::c_style | py::array::forcecast> starts,
py::array_t<int, py::array::c_style | py::array::forcecast> stops,
py::array_t<float, py::array::c_style | py::array::forcecast> coeffs
)
{
EMLPY_PRECONDITION(in.ndim() == 1, "Input must have dim 1");
EMLPY_PRECONDITION(starts.ndim() == 1, "Starts must have dim 1");
EMLPY_PRECONDITION(stops.ndim() == 1, "Stops must have dim 1");
EMLPY_PRECONDITION(coeffs.ndim() == 1, "Coefficients must have dim 1");
EMLPY_PRECONDITION(starts.shape(0) == stops.shape(0), "Number of starts must equals stops");

const int output_length = starts.shape(0);
auto ret = py::array_t<float>(output_length);

EMLPY_CHECK_ERROR(eml_sparse_filterbank(in.data(),
(float *)ret.data(), output_length,
starts.data(), stops.data(), coeffs.data()));
return ret;
}


PYBIND11_MODULE(eml_audio, m) {
m.doc() = "Audio machine learning for microcontrollers and embedded devices";
@@ -138,6 +163,6 @@ PYBIND11_MODULE(eml_audio, m) {
m.def("melfilter", melfilter_py);

m.def("melspectrogram", melspectrogram_py);
m.def("sparse_filterbank", sparse_filterbank_py);

}

@@ -1,6 +1,7 @@

from . import trees
from . import common
from . import signal

from .convert import convert

@@ -192,6 +192,42 @@ eml_audio_melspectrogram(EmlAudioMel mel_params, EmlFFT fft, EmlVector inout, Em
return EmlOk;
}

/*
Apply a sparse filterbank which reduces @input to a smaller @output
Each filter is on form 0000nonzero0000
The nonzero filter coefficients are stored consecutively in @lut,
with @start and @end indicating which index (in the input) each filter start/end at
Typically the filters are triangular and applied to an FFT power spectrum
Can be used for mel-filtering a spectrogram
*/
EmlError
eml_sparse_filterbank(const float *input,
float *output, int output_length,
const int *starts, const int *stops, const float *lut)
{
memset(output, 0, output_length * sizeof(float));

int offset = 0;
for (int i = 0; i < output_length; i++) {
const int start = starts[i];
const int stop = stops[i];

//EML_PRECONDITION(start > 0, EmlUninitialized);
//EML_PRECONDITION(stop > 0, EmlUninitialized);

for (int j = start; j <= stop; j++) {
const float f = lut[offset];
//EML_PRECONDITION(f > 0, EmlUninitialized);
output[i] += input[j] * f;
offset++;
}
}

return EmlOk;
}

#ifdef __cplusplus
} // extern "C"
#endif
@@ -0,0 +1,45 @@

from . import cgen

import numpy


def sparse_filterbank(mels):
starts = []
ends = []
coeffs = []
for mel_idx in range(mels.shape[0]):
mel = mels[mel_idx]
nonzero = numpy.nonzero(mel)[0]
first, last = nonzero[0], nonzero[-1]
starts.append(first)
ends.append(last)
coeffs += list(mel[nonzero])

return starts, ends, coeffs


def sparse_filterbank_serialize(sparse, name):
starts, ends, coeffs = sparse

arrays = [
cgen.array_declare(name+'_starts', len(starts), dtype='int', values=starts),
cgen.array_declare(name+'_ends', len(ends), dtype='int', values=ends),
cgen.array_declare(name+'_lut', len(coeffs), values=coeffs),
]
out = '\n\n'.join(arrays)
return out


def sparse_filterbank_reduce(sparse, input):
starts, ends, coeffs = sparse
assert len(starts) == len(ends)

offset = 0
out = numpy.zeros(shape=(len(starts),))
for i in range(len(starts)):
for j in range(starts[i], ends[i]+1):
out[i] += input[j] * coeffs[offset]
offset += 1

return out
@@ -244,3 +244,49 @@ def specshow(d, ax):
#print(out-ref)
numpy.testing.assert_allclose(out, ref, rtol=1e-6);


def test_sparse_filterbank_ref():
# testcase based on working example in STM32AI Function pack, mel_filters_lut_30.c
mel = librosa.filters.mel(sr=16000, n_fft=1024, n_mels=30, htk=False)
sparse = emlearn.signal.sparse_filterbank(mel)

expected_starts = [1, 7, 13, 19, 25, 32, 38, 44, 50, 57, 63, 69, 77, 85, 93,
103, 114, 126, 139, 154, 170, 188, 208, 230, 254, 281, 311, 343, 379, 419]
expected_ends = [12, 18, 24, 31, 37, 43, 49, 56, 62, 68, 76, 84, 92, 102, 113,
125, 138, 153, 169, 187, 207, 229, 253, 280, 310, 342, 378, 418, 463, 511]

starts, ends, coeffs = sparse
assert starts == expected_starts
assert ends == expected_ends
assert len(coeffs) == 968
assert coeffs[0] == pytest.approx(1.6503363149e-03)
assert coeffs[-1] == pytest.approx(2.8125530662e-05)


def test_sparse_filterbank_apply():
n_fft = 1024
n_mels = 30
mel_basis = librosa.filters.mel(sr=16000, n_fft=n_fft, n_mels=n_mels, htk=False)

sparse = emlearn.signal.sparse_filterbank(mel_basis)
starts, ends, coeffs = sparse
assert len(starts) == n_mels
assert len(ends) == n_mels

name = 'fofofo'
c = emlearn.signal.sparse_filterbank_serialize(sparse, name=name)

assert name+'_lut' in c
assert name+'_ends' in c
assert name+'_starts' in c
assert 'static const float' in c
assert 'static const int' in c
assert str(n_mels) in c

data = numpy.ones(shape=mel_basis.shape[1]) * 100
ref = numpy.dot(mel_basis, data)
py_out = emlearn.signal.sparse_filterbank_reduce(sparse, data)
numpy.testing.assert_allclose(py_out, ref, rtol=1e-5)

c_out = eml_audio.sparse_filterbank(data, starts, ends, coeffs)
numpy.testing.assert_allclose(c_out, ref, rtol=1e-5)

0 comments on commit 38dd3a4

Please sign in to comment.
You can’t perform that action at this time.