Skip to content

Commit

Permalink
Merge pull request #345 from dictation-toolbox/tts-improvements
Browse files Browse the repository at this point in the history
Make SR engine text-to-speech functionality more flexible
  • Loading branch information
drmfinlay committed Mar 18, 2022
2 parents 1a7814e + abbb361 commit 330cbca
Show file tree
Hide file tree
Showing 17 changed files with 557 additions and 55 deletions.
4 changes: 2 additions & 2 deletions documentation/base_engine.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ Base engine classes
EngineBase class
----------------------------------------------------------------------------

The :class:`dragonfly.engines.engine_base.EngineBase` class forms the base
class for this specific speech recognition engine classes. It defines
The :class:`dragonfly.engines.base.EngineBase` class forms the base
class for the specific speech recognition engine classes. It defines
the stubs required and performs some of the logic necessary for
Dragonfly to be able to interact with a speech recognition engine.

Expand Down
1 change: 1 addition & 0 deletions documentation/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def __getattr__(cls, name):
"numpy",
"pyperclip",
"regex",
"natlink"
}

for module_name in mock_modules:
Expand Down
15 changes: 15 additions & 0 deletions documentation/engines.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ Dragonfly supports multiple speech recognition engines as its backend.
The *engines* sub-package implements the interface code for each
supported engine.

Also contained within this sub-package are a number of text-to-speech
implementations. These can be used independently of the speech recognition
engines via the ``get_speaker()`` function.


Main SR engine back-end interface
----------------------------------------------------------------------------
Expand All @@ -28,3 +32,14 @@ Engine back-ends
kaldi_engine
sphinx_engine
text_engine

Text-to-speech (speaker) back-ends
----------------------------------------------------------------------------

For more information on the available text-to-speech implementations, see
the following sections:

.. toctree::
:maxdepth: 2

speakers
18 changes: 9 additions & 9 deletions dragonfly/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@

import sys

# --------------------------------------------------------------------------
#---------------------------------------------------------------------------
from .config import Config, Section, Item
from .error import DragonflyError, GrammarError

# --------------------------------------------------------------------------
#---------------------------------------------------------------------------
from .engines import (get_engine, EngineError, MimicFailure,
get_current_engine)
get_current_engine, get_speaker)

# --------------------------------------------------------------------------
#---------------------------------------------------------------------------
from .grammar.grammar_base import Grammar
from .grammar.grammar_connection import ConnectionGrammar
from .grammar.rule_base import Rule
Expand All @@ -52,7 +52,7 @@
register_ending_callback,
register_post_recognition_callback)

# --------------------------------------------------------------------------
#---------------------------------------------------------------------------

from .actions import (ActionBase, DynStrActionBase, ActionError,
Repeat, Key, Text, Mouse, Paste, Pause,
Expand All @@ -65,25 +65,25 @@
from .actions import (KeyboardInput, MouseInput, HardwareInput,
make_input_array, send_input_array)

# --------------------------------------------------------------------------
#---------------------------------------------------------------------------

if sys.platform.startswith("win"):
from .windows.clipboard import Clipboard
else:
from .util import Clipboard

# --------------------------------------------------------------------------
#---------------------------------------------------------------------------

from .windows.rectangle import Rectangle, unit
from .windows.point import Point
from .windows import Window, Monitor, monitors

# --------------------------------------------------------------------------
#---------------------------------------------------------------------------
from .language import (Integer, IntegerRef, ShortIntegerRef,
Digits, DigitsRef,
Number, NumberRef)

# --------------------------------------------------------------------------
#---------------------------------------------------------------------------
from .accessibility import (CursorPosition, TextQuery,
get_accessibility_controller,
get_stopping_accessibility_controller)
Expand Down
182 changes: 173 additions & 9 deletions dragonfly/engines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,19 @@

from .base import EngineBase, EngineError, MimicFailure


# ---------------------------------------------------------------------------
#---------------------------------------------------------------------------

_default_engine = None
_engines_by_name = {}

_default_speaker = None
_speakers_by_name = {}

_sapi5_names = ("sapi5shared", "sapi5inproc", "sapi5")
_valid_engine_names = ("natlink", "kaldi", "sphinx", "text") + _sapi5_names
_valid_speaker_names = ("natlink", "text", "espeak", "flite") + _sapi5_names



def get_engine(name=None, **kwargs):
"""
Expand Down Expand Up @@ -97,7 +104,7 @@ def get_engine(name=None, **kwargs):
if engine:
return engine

# Check if we're on Windows. If we're not on Windows, then we don't
# Check if we're on Windows. If we're not on Windows, then we don't
# evaluate Windows-only engines like natlink.
windows = os.name == 'nt'

Expand Down Expand Up @@ -129,8 +136,7 @@ def get_engine(name=None, **kwargs):
if name:
raise EngineError(message)

sapi5_names = (None, "sapi5shared", "sapi5inproc", "sapi5")
if not engine and windows and name in sapi5_names:
if not engine and windows and name in (None,) + _sapi5_names:
# Attempt to retrieve the sapi5 back-end.
try:
from .backend_sapi5 import is_engine_available
Expand Down Expand Up @@ -184,9 +190,7 @@ def get_engine(name=None, **kwargs):
elif not name:
raise EngineError("No usable engines found.")
else:
valid_names = ["natlink", "kaldi", "sphinx", "sapi5shared",
"sapi5inproc", "sapi5", "text"]
if name not in valid_names:
if name not in _valid_engine_names:
raise EngineError("Requested engine %r is not a valid engine "
"name." % (name,))
else:
Expand Down Expand Up @@ -222,7 +226,7 @@ def get_current_engine():
return _default_engine


# ---------------------------------------------------------------------------
#---------------------------------------------------------------------------

def register_engine_init(engine):
"""
Expand All @@ -238,3 +242,163 @@ def register_engine_init(engine):
_default_engine = engine
if engine and engine.name not in _engines_by_name:
_engines_by_name[engine.name] = engine


#---------------------------------------------------------------------------

def get_speaker(name=None):
"""
Get the text-to-speech (speaker) implementation.
This function will initialize and return a speaker instance instance
of the available speaker back-end. If one has already been
initialized, it will be returned instead.
If no specific speaker back-end is requested and no speaker has
already been initialized, this function will initialize and return
an instance of the first available back-end in the following order:
======================= =========================================
TTS speaker back-end Speaker name string(s)
======================= =========================================
1. SAPI 5 ``"sapi5"``
2. Dragon/Natlink ``"natlink"``
3. eSpeak ``"espeak"``
4. CMU Flite ``"flite"``
5. Text (stdout) ``"text"``
======================= =========================================
The first two speaker back-ends are only available on Microsoft
Windows. The second requires that Dragon NaturallySpeaking and
Natlink are installed on the system.
The third and fourth back-ends, eSpeak and CMU Flite, may be used on
most platforms. These require that the appropriate command-line
programs are installed on the system.
The last back-end (text) is used as a fallback when no real speaker
implementation is available. This back-end writes input text to
stdout, i.e., prints text to the console.
**Arguments**:
:param name: optional human-readable name of the speaker to return.
:type name: str
:rtype: SpeakerBase
:returns: speaker instance
:raises: EngineError
"""
global _default_speaker, _speakers_by_name
log = logging.getLogger("speaker")

if name and name in _speakers_by_name:
speaker = _speakers_by_name[name]
elif not name and _default_speaker:
speaker = _default_speaker
else:
speaker = None
if speaker:
return speaker

windows = os.name == 'nt'
if not speaker and windows and name in (None,) + _sapi5_names:
# Check if the sapi5 back-end is available.
try:
from .backend_sapi5 import is_engine_available
from .backend_sapi5.speaker import Sapi5Speaker
if is_engine_available(name):
speaker = Sapi5Speaker()
except Exception as e:
message = ("Exception while initializing sapi5 speaker:"
" %s" % (e,))
log.warning(message)
if name:
raise EngineError(message)

if not speaker and windows and name in (None, "natlink"):
# Check if the natlink back-end is available.
try:
from .backend_natlink import is_engine_available
from .backend_natlink.speaker import NatlinkSpeaker
if is_engine_available():
speaker = NatlinkSpeaker()
except Exception as e:
message = ("Exception while initializing natlink speaker:"
" %s" % (e,))
log.warning(message)
if name:
raise EngineError(message)

if not speaker and name in (None, "espeak"):
# Check if eSpeak is available.
try:
from .base.speaker_stdin import EspeakSpeaker
if EspeakSpeaker.is_available():
speaker = EspeakSpeaker()
except Exception as e:
message = ("Exception while initializing eSpeak speaker:"
" %s" % (e,))
log.warning(message)
if name:
raise EngineError(message)

if not speaker and name in (None, "flite"):
# Check if CMU Flite is available.
try:
from .base.speaker_stdin import FliteSpeaker
if FliteSpeaker.is_available():
speaker = FliteSpeaker()
except Exception as e:
message = ("Exception while initializing Flite speaker:"
" %s" % (e,))
log.warning(message)
if name:
raise EngineError(message)

if not speaker and name in (None, "text"):
# Check if the text back-end is available.
try:
from .backend_text import is_engine_available
from .backend_text.speaker import TextSpeaker
if is_engine_available():
speaker = TextSpeaker()
except Exception as e:
message = ("Exception while initializing text speaker:"
" %s" % (e,))
log.warning(message)
if name:
raise EngineError(message)

# Return the speaker instance, if one has been initialized. Log a
# message about which SR speaker back-end was used.
if speaker:
message = "Initialized %r speaker: %r." % (speaker.name, speaker)
log.info(message)
return speaker
elif not name:
raise EngineError("No usable speakers found.")
else:
if name not in _valid_speaker_names:
raise EngineError("Requested speaker %r is not a valid speaker "
"name." % (name,))
else:
raise EngineError("Requested speaker %r not available."
% (name,))


#---------------------------------------------------------------------------

def register_speaker_init(speaker):
"""
Register initialization of a speaker.
This function sets the default speaker to the first speaker
initialized.
"""

global _default_speaker, _speakers_by_name
if not _default_speaker:
_default_speaker = speaker
if speaker and speaker.name not in _speakers_by_name:
_speakers_by_name[speaker.name] = speaker
5 changes: 2 additions & 3 deletions dragonfly/engines/backend_kaldi/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from six.moves import zip
from kaldi_active_grammar import KaldiError, KaldiRule

import dragonfly.engines
from dragonfly.windows.window import Window
from dragonfly.engines.base import (EngineBase,
EngineError,
Expand Down Expand Up @@ -320,9 +321,7 @@ def mimic(self, words):

def speak(self, text):
""" Speak the given *text* using text-to-speech. """
# FIXME
self._log.warning("Text-to-speech is not implemented for this engine; printing text instead.")
print_(text)
dragonfly.engines.get_speaker().speak(text)

def _get_language(self):
return "en"
Expand Down
15 changes: 4 additions & 11 deletions dragonfly/engines/backend_natlink/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,12 @@
from locale import getpreferredencoding
from threading import Thread, Event

from six import text_type, binary_type, string_types, PY2
from six import text_type, binary_type, string_types, PY2


from dragonfly.engines.base import (EngineBase, EngineError, MimicFailure,
GrammarWrapperBase)
from dragonfly.engines.backend_natlink.speaker import NatlinkSpeaker
from dragonfly.engines.backend_natlink.compiler import NatlinkCompiler
from dragonfly.engines.backend_natlink.dictation import \
NatlinkDictationContainer
Expand Down Expand Up @@ -140,6 +141,7 @@ def __init__(self, retain_dir=None):
self._timer_manager = NatlinkTimerManager(0.02, self)
self._timer_thread = None
self._retain_dir = None
self._speaker = NatlinkSpeaker()
try:
self.set_retain_directory(retain_dir)
except EngineError as err:
Expand Down Expand Up @@ -351,16 +353,7 @@ def mimic(self, words):

def speak(self, text):
""" Speak the given *text* using text-to-speech. """
# Store the current mic state.
mic_state = self.natlink.getMicState()

# Say the text.
self.natlink.execScript('TTSPlayString "%s"' % text)

# Restore the previous mic state if necessary.
# This is to have consistent behaviour for each version of Dragon.
if mic_state != self.natlink.getMicState():
self.natlink.setMicState(mic_state)
self._speaker.speak(text)

def _get_language(self):
# Get a Windows language identifier from Dragon.
Expand Down

0 comments on commit 330cbca

Please sign in to comment.