diff --git a/.gitignore b/.gitignore index c7a5286..ce74a7e 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ dist *.egg-info **/__pycache__ build-arm-linux-gnueabihf + +# vim +*.sw? diff --git a/kaldi-native-fbank/csrc/CMakeLists.txt b/kaldi-native-fbank/csrc/CMakeLists.txt index 7c6870b..d07558a 100644 --- a/kaldi-native-fbank/csrc/CMakeLists.txt +++ b/kaldi-native-fbank/csrc/CMakeLists.txt @@ -5,6 +5,7 @@ set(sources feature-functions.cc feature-window.cc fftsg.cc + kaldi-math.cc mel-computations.cc online-feature.cc rfft.cc diff --git a/kaldi-native-fbank/csrc/feature-window.cc b/kaldi-native-fbank/csrc/feature-window.cc index b990cbf..2423a0a 100644 --- a/kaldi-native-fbank/csrc/feature-window.cc +++ b/kaldi-native-fbank/csrc/feature-window.cc @@ -5,16 +5,13 @@ // This file is copied/modified from kaldi/src/feat/feature-window.cc #include "kaldi-native-fbank/csrc/feature-window.h" +#include "kaldi-native-fbank/csrc/kaldi-math.h" #include #include #include #include -#ifndef M_2PI -#define M_2PI 6.283185307179586476925286766559005 -#endif - namespace knf { std::ostream &operator<<(std::ostream &os, const FrameExtractionOptions &opts) { @@ -205,6 +202,14 @@ float InnerProduct(const float *a, const float *b, int32_t n) { return sum; } +void Dither(float *d, int32_t n, float dither_value) { + if (dither_value == 0.0) + return; + RandomState rstate; + for (int32_t i = 0; i < n; i++) + d[i] += RandGauss(&rstate) * dither_value; +} + static void Preemphasize(float *d, int32_t n, float preemph_coeff) { if (preemph_coeff == 0.0) { return; @@ -223,6 +228,9 @@ void ProcessWindow(const FrameExtractionOptions &opts, float *log_energy_pre_window /*= nullptr*/) { int32_t frame_length = opts.WindowSize(); + if (opts.dither != 0.0) + Dither(window, frame_length, opts.dither); + if (opts.remove_dc_offset) { RemoveDcOffset(window, frame_length); } diff --git a/kaldi-native-fbank/csrc/feature-window.h b/kaldi-native-fbank/csrc/feature-window.h index 641908a..6a68349 100644 --- a/kaldi-native-fbank/csrc/feature-window.h +++ b/kaldi-native-fbank/csrc/feature-window.h @@ -31,7 +31,8 @@ struct FrameExtractionOptions { float samp_freq = 16000; float frame_shift_ms = 10.0f; // in milliseconds. float frame_length_ms = 25.0f; // in milliseconds. - float dither = 1.0f; // Amount of dithering, 0.0 means no dither. + float dither = 0.00003f; // Amount of dithering, 0.0 means no dither. + // Value 0.00003f is equivalent to 1.0 in kaldi. float preemph_coeff = 0.97f; // Preemphasis coefficient. bool remove_dc_offset = true; // Subtract mean of wave before FFT. std::string window_type = "povey"; // e.g. Hamming window diff --git a/kaldi-native-fbank/csrc/kaldi-math.cc b/kaldi-native-fbank/csrc/kaldi-math.cc new file mode 100644 index 0000000..b4ce1f0 --- /dev/null +++ b/kaldi-native-fbank/csrc/kaldi-math.cc @@ -0,0 +1,38 @@ +// kaldi-native-fbank/csrc/kaldi-math.cc +// +// Copyright (c) 2024 Brno University of Technology (authors: Karel Vesely) + +// This file is an excerpt from kaldi/src/feat/kaldi-math.cc + +#include "kaldi-math.h" + +namespace knf { + +int Rand(struct RandomState* state) { +#if !defined(_POSIX_THREAD_SAFE_FUNCTIONS) + // On Windows and Cygwin, just call Rand() + return rand(); +#else + if (state) { + return rand_r(&(state->seed)); + } else { + std::lock_guard lock(_RandMutex); + return rand(); + } +#endif +} + +RandomState::RandomState() { + // we initialize it as Rand() + 27437 instead of just Rand(), because on some + // systems, e.g. at the very least Mac OSX Yosemite and later, it seems to be + // the case that rand_r when initialized with rand() will give you the exact + // same sequence of numbers that rand() will give if you keep calling rand() + // after that initial call. This can cause problems with repeated sequences. + // For example if you initialize two RandomState structs one after the other + // without calling rand() in between, they would give you the same sequence + // offset by one (if we didn't have the "+ 27437" in the code). 27437 is just + // a randomly chosen prime number. + seed = unsigned(Rand()) + 27437; +} + +} // namespace knf diff --git a/kaldi-native-fbank/csrc/kaldi-math.h b/kaldi-native-fbank/csrc/kaldi-math.h new file mode 100644 index 0000000..a1342cf --- /dev/null +++ b/kaldi-native-fbank/csrc/kaldi-math.h @@ -0,0 +1,43 @@ +// kaldi-native-fbank/csrc/kaldi-math.h +// +// Copyright (c) 2024 Brno University of Technology (authors: Karel Vesely) + +// This file is an excerpt from kaldi/src/feat/kaldi-math.h + +#pragma once + +#include // logf, sqrtf, cosf +#include // RAND_MAX + +#ifndef M_PI +#define M_PI 3.1415926535897932384626433832795 +#endif + +#ifndef M_2PI +#define M_2PI 6.283185307179586476925286766559005 +#endif + +namespace knf { + +inline float Log(float x) { return logf(x); } + +// Returns a random integer between 0 and RAND_MAX, inclusive +int Rand(struct RandomState* state = NULL); + +// State for thread-safe random number generator +struct RandomState { + RandomState(); + unsigned seed; +}; + +/// Returns a random number strictly between 0 and 1. +inline float RandUniform(struct RandomState* state = NULL) { + return static_cast((Rand(state) + 1.0) / (RAND_MAX+2.0)); +} + +inline float RandGauss(struct RandomState* state = NULL) { + return static_cast(sqrtf (-2 * Log(RandUniform(state))) + * cosf(2*M_PI*RandUniform(state))); +} + +} // namespace knf