-
-
Notifications
You must be signed in to change notification settings - Fork 477
/
text_to_speech.py
256 lines (196 loc) · 8.5 KB
/
text_to_speech.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
"""
The Panel TextToSpeak Widget provides functionality for *text to
speech* via the the HTML5 SpeechSynthesis API.
See https://developer.mozilla.org/en-US/docs/Web/API/SpeechSynthesis
The term *utterance* is used throughout the API. It is the smallest
unit of speech in spoken language analysis.
"""
from __future__ import annotations
import uuid
from typing import (
TYPE_CHECKING, ClassVar, Mapping, Type,
)
import param
from panel.widgets import Widget
from ..models.text_to_speech import TextToSpeech as _BkTextToSpeech
if TYPE_CHECKING:
from bokeh.model import Model
class Voice(param.Parameterized):
"""
The current device (i.e. OS and Browser) provides a list of
Voices. Each with a unique name and speaking a specific language.
Wraps the HTML5 SpeecSynthesisVoice API
See https://developer.mozilla.org/en-US/docs/Web/API/SpeechSynthesisVoice
"""
default = param.Boolean(constant=True, default=False, doc="""
A Boolean indicating whether the voice is the default voice
for the current app language (True), or not (False.)""")
lang = param.String(constant=True, doc="""
Returns a BCP 47 language tag indicating the language of the voice.""")
local_service = param.Boolean(constant=True, doc="""
A Boolean indicating whether the voice is supplied by a local
speech synthesizer service (True), or a remote speech
synthesizer service (False.)""")
name = param.String(constant=True, doc="""
Returns a human-readable name that represents the voice.""")
voice_uri = param.String(constant=True, doc="""
Returns the type of URI and location of the speech synthesis
service for this voice.""")
@staticmethod
def to_voices_list(voices):
"""Returns a list of Voice objects from the list of dicts provided"""
result = []
for _voice in voices: # pylint: disable=not-an-iterable
voice = Voice(**_voice)
result.append(voice)
return result
@staticmethod
def group_by_lang(voices):
"""Returns a dictionary where the key is the `lang` and the value is a list of voices
for that language."""
if not voices:
return {}
sorted_lang = sorted(list(set(voice.lang for voice in voices)))
result = {lang: [] for lang in sorted_lang}
for voice in voices:
result[voice.lang].append(voice)
result = {key: sorted(value, key=lambda x: x.name) for key, value in result.items()}
return result
class Utterance(param.Parameterized):
"""
An *utterance* is the smallest unit of speech in spoken language analysis.
The Utterance Model wraps the HTML5 SpeechSynthesisUtterance API
See https://developer.mozilla.org/en-US/docs/Web/API/SpeechSynthesisUtterance
"""
value = param.String(default="", doc="""
The text that will be synthesised when the utterance is
spoken. The text may be provided as plain text, or a
well-formed SSML document.""")
lang = param.ObjectSelector(default="", doc="""
The language of the utterance.""")
pitch = param.Number(default=1.0, bounds=(0.0, 2.0), doc="""
The pitch at which the utterance will be spoken at expressed
as a number between 0 and 2.""")
rate = param.Number(default=1.0, bounds=(0.1, 10.0), doc="""
The speed at which the utterance will be spoken at expressed
as a number between 0.1 and 10.""" )
voice = param.ObjectSelector(doc="""
The voice that will be used to speak the utterance.""")
volume = param.Number(default=1.0, bounds=(0.0, 1.0), doc=""" The
volume that the utterance will be spoken at expressed as a
number between 0 and 1.""")
def __init__(self, **params):
voices = params.pop('voices', [])
super().__init__(**params)
self._voices_by_language = {}
self.set_voices(voices)
def to_dict(self, include_uuid=True):
"""Returns the object parameter values in a dictionary
Returns:
Dict: [description]
"""
result = {
"lang": self.lang,
"pitch": self.pitch,
"rate": self.rate,
"text": self.value,
"volume": self.volume,
}
if self.voice and self.voice.name:
result["voice"] = self.voice.name
if include_uuid:
result["uuid"] = str(uuid.uuid4())
return result
def set_voices(self, voices):
"""Updates the `lang` and `voice` parameter objects, default and value"""
if not voices:
self.param.lang.objects = ["en-US"]
self.param.lang.default = "en-US"
self.lang = "en-US"
return
self._voices_by_language = Voice.group_by_lang(voices)
self.param.lang.objects = list(self._voices_by_language.keys())
if "en-US" in self._voices_by_language:
default_lang = "en-US"
else:
default_lang = list(self._voices_by_language.keys())[0]
self.param.lang.default = default_lang
self.lang = default_lang
self.param.trigger("lang")
@param.depends("lang", watch=True)
def _handle_lang_changed(self):
if not self._voices_by_language or not self.lang:
self.param.voice.default = None
self.voice = None
self.param.voice.objects = []
return
voices = self._voices_by_language[self.lang]
if self.voice and self.voice in voices:
default_voice = self.voice
else:
default_voice = voices[0]
for voice in voices:
if voice.default:
default_voice = voice
self.param.voice.objects = voices
self.param.voice.default = default_voice
self.voice = default_voice
class TextToSpeech(Utterance, Widget):
"""
The `TextToSpeech` widget wraps the HTML5 SpeechSynthesis API
See https://developer.mozilla.org/en-US/docs/Web/API/SpeechSynthesis
Reference: https://panel.holoviz.org/reference/widgets/TextToSpeech.html
:Example:
>>> TextToSpeech(name="Speech Synthesis", value="Data apps are nice")
"""
auto_speak = param.Boolean(default=True, doc="""
Whether or not to automatically speak when the value changes.""")
cancel = param.Event(doc="""
Removes all utterances from the utterance queue.""")
pause = param.Event(doc="""
Puts the TextToSpeak object into a paused state.""")
resume = param.Event(doc="""
Puts the TextToSpeak object into a non-paused state: resumes
it if it was already paused.""")
paused = param.Boolean(readonly=True, doc="""
A Boolean that returns true if the TextToSpeak object is in a
paused state.""")
pending = param.Boolean(readonly=True, doc="""
A Boolean that returns true if the utterance queue contains
as-yet-unspoken utterances.""")
speak = param.Event(doc="""
Speak. I.e. send a new Utterance to the browser""")
speaking = param.Boolean(readonly=True, doc="""
A Boolean that returns true if an utterance is currently in
the process of being spoken — even if TextToSpeak is in a
paused state.""")
voices = param.List(readonly=True, doc="""
Returns a list of Voice objects representing all the available
voices on the current device.""")
_voices = param.List()
_rename: ClassVar[Mapping[str, str | None]] = {
'auto_speak': None, 'lang': None, 'name': None, 'pitch': None,
'rate': None, 'speak': None, 'value': None, 'voice': None,
'voices': None, 'volume': None, '_voices': 'voices',
}
_widget_type: ClassVar[Type[Model]] = _BkTextToSpeech
def _process_param_change(self, msg):
speak = msg.get('speak') or ('value' in msg and self.auto_speak)
msg = super()._process_param_change(msg)
if speak:
msg['speak'] = self.to_dict()
return msg
@param.depends('_voices', watch=True)
def _update_voices(self):
voices = []
for _voice in self._voices: # pylint: disable=not-an-iterable
voice = Voice(**_voice)
voices.append(voice)
self.voices = voices
self.set_voices(self.voices)
def __repr__(self, depth=None):
# We need to do this because otherwise a error is raised when used in notebook
# due to infinite recursion
return f'TextToSpeech(name={self.name!r})'
def __str__(self):
return f'TextToSpeech(name={self.name!r})'