Skip to content

Commit

Permalink
Merge pull request #360 from daanzu/kaldi
Browse files Browse the repository at this point in the history
Kaldi Update, including better local pronunciation generation
  • Loading branch information
drmfinlay committed Nov 30, 2021
2 parents 118defb + ab8dfc4 commit 82d08bd
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 13 deletions.
25 changes: 18 additions & 7 deletions documentation/kaldi_engine.txt
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ the ``get_engine()`` function, which passes them to the
vad_padding_end_ms=200,
vad_complex_padding_end_ms=600,
auto_add_to_user_lexicon=True,
allow_online_pronunciations=False,
lazy_compilation=True,
invalidate_cache=False,
expected_error_rate_threshold=None,
Expand Down Expand Up @@ -266,9 +267,13 @@ The engine can also be configured via the :ref:`command-line interface
quality.

* ``auto_add_to_user_lexicon`` (``bool``) -- Enables automatically
adding unknown words to the `User Lexicon`_. This may make requests to
a cloud service, to predict pronunciations, depending on your installed
packages.
adding unknown words to the `User Lexicon`_. This will only work if you have
additional required packages installed. This will only work locally, unless
you also enable ``allow_online_pronunciations``.

* ``allow_online_pronunciations`` (``bool``) -- Enables online
pronunciation generation for unknown words, if you have also enabled
``auto_add_to_user_lexicon``, and you have the required packages installed.

* ``lazy_compilation`` (``bool``) -- Enables deferred grammar/rule
compilation, which then allows parallel compilation up to your number
Expand Down Expand Up @@ -324,10 +329,16 @@ splitting up the words or using to similar words, e.g. changing
``auto_add_to_user_lexicon=True`` to enable. If an unknown word is
encountered while loading a grammar, its pronunciation is predicted
based on its spelling. This uses either a local library, or a free cloud
service if the library is not installed. The library can be installed
with ``pip install g2p_en==2.0.0`` but has dependencies that can be
difficult, so it is recommended to just not install it and instead let
the cloud be used.
service if the library is not installed.

The local library (*g2p_en*) can be installed by running the following on
the command line::

pip install g2p_en==2.0.0

Note that the dependencies for this library can be difficult to install, in
which case it is recommended to use the cloud service instead. Set the
engine parameter ``allow_online_pronunciations=True`` to enable it.

**Manually editing User Lexicon:** You can add a word without specifying
a pronunciation, and let it be predicted as above, by running at the
Expand Down
5 changes: 3 additions & 2 deletions dragonfly/engines/backend_kaldi/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,12 @@ def dec(self, element, src_state, dst_state, grammar, fst):

class KaldiCompiler(CompilerBase, KaldiAGCompiler):

def __init__(self, model_dir, tmp_dir, auto_add_to_user_lexicon=None, lazy_compilation=None, **kwargs):
def __init__(self, model_dir, tmp_dir, auto_add_to_user_lexicon=None, allow_online_pronunciations=None, lazy_compilation=None, **kwargs):
CompilerBase.__init__(self)
KaldiAGCompiler.__init__(self, model_dir=model_dir, tmp_dir=tmp_dir, **kwargs)

self.auto_add_to_user_lexicon = bool(auto_add_to_user_lexicon)
self.allow_online_pronunciations = bool(allow_online_pronunciations)
self.lazy_compilation = bool(lazy_compilation)

self.kaldi_rule_by_rule_dict = collections.OrderedDict() # Rule -> KaldiRule
Expand Down Expand Up @@ -115,7 +116,7 @@ def translate_words(self, words):
def handle_oov_word(self, word):
if self.auto_add_to_user_lexicon:
try:
pronunciations = self.add_word(word, lazy_compilation=True)
pronunciations = self.add_word(word, lazy_compilation=True, allow_online_pronunciations=self.allow_online_pronunciations)
except Exception as e:
self._log.exception("%s: exception automatically adding word %r" % (self, word))
else:
Expand Down
10 changes: 7 additions & 3 deletions dragonfly/engines/backend_kaldi/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,17 @@ class KaldiEngine(EngineBase, DelegateTimerManagerInterface):

_name = "kaldi"
DictationContainer = DictationContainerBase
_required_kag_version = "2.1.0"
# NOTE: Remember to also update setup.py to the same version!
_required_kag_version = "3.1.0"

#-----------------------------------------------------------------------

def __init__(self, model_dir=None, tmp_dir=None, input_device_index=None,
audio_input_device=None, audio_self_threaded=True, audio_auto_reconnect=True, audio_reconnect_callback=None,
retain_dir=None, retain_audio=None, retain_metadata=None, retain_approval_func=None,
vad_aggressiveness=3, vad_padding_start_ms=150, vad_padding_end_ms=200, vad_complex_padding_end_ms=600,
auto_add_to_user_lexicon=False, lazy_compilation=True, invalidate_cache=False,
auto_add_to_user_lexicon=True, allow_online_pronunciations=False,
lazy_compilation=True, invalidate_cache=False,
expected_error_rate_threshold=None,
alternative_dictation=None,
compiler_init_config=None, decoder_init_config=None,
Expand Down Expand Up @@ -129,6 +131,7 @@ def __init__(self, model_dir=None, tmp_dir=None, input_device_index=None,
vad_padding_end_ms = int(vad_padding_end_ms),
vad_complex_padding_end_ms = int(vad_complex_padding_end_ms),
auto_add_to_user_lexicon = bool(auto_add_to_user_lexicon),
allow_online_pronunciations = bool(allow_online_pronunciations),
lazy_compilation = bool(lazy_compilation),
invalidate_cache = bool(invalidate_cache),
expected_error_rate_threshold = float(expected_error_rate_threshold) if expected_error_rate_threshold is not None else None,
Expand Down Expand Up @@ -169,6 +172,7 @@ def connect(self):

self._compiler = KaldiCompiler(self._options['model_dir'], tmp_dir=self._options['tmp_dir'],
auto_add_to_user_lexicon=self._options['auto_add_to_user_lexicon'],
allow_online_pronunciations=self._options['allow_online_pronunciations'],
lazy_compilation=self._options['lazy_compilation'],
alternative_dictation=self._options['alternative_dictation'],
**self._options['compiler_init_config']
Expand Down Expand Up @@ -425,7 +429,7 @@ def _do_recognition(self, timeout=None, single=False, audio_iter=None):
if kaldi_rule and is_acceptable_recognition: # Don't store audio/metadata for bad recognitions
self.audio_store.finalize(parsed_output,
kaldi_rule.parent_grammar.name, kaldi_rule.parent_rule.name,
likelihood=expected_error_rate, has_dictation=kaldi_rule.has_dictation)
likelihood=expected_error_rate, has_dictation=recognition.has_dictation)
else:
self.audio_store.cancel()

Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ def read(*names):
"pyaudio"
],
"kaldi": [
"kaldi-active-grammar ~= 2.1.0",
# NOTE: Remember to also update engine.py to the same version!
"kaldi-active-grammar ~= 3.1.0",
"sounddevice == 0.3.*",
"webrtcvad-wheels == 2.0.*",
],
Expand Down

0 comments on commit 82d08bd

Please sign in to comment.