Skip to content

Commit

Permalink
Add speech
Browse files Browse the repository at this point in the history
  • Loading branch information
ilesinge committed Oct 16, 2022
1 parent e9eb0e1 commit 29faf7a
Show file tree
Hide file tree
Showing 7 changed files with 404 additions and 66 deletions.
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ requests = "*"
"flask[async]" = "*"
gunicorn = "*"
typer = {extras = ["all"], version = "*"}
google-cloud-texttospeech = "*"

[dev-packages]
pylint = "*"
Expand Down
289 changes: 234 additions & 55 deletions Pipfile.lock

Large diffs are not rendered by default.

74 changes: 69 additions & 5 deletions shabda/dj.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
from termcolor import colored
from shabda.display import print_error
from shabda.client import Client
from shabda.sampleset import SampleSet
from shabda.sampleset import FREESOUND, SampleSet, TTS
from shabda.sound import Sound
from google.cloud import texttospeech


class Dj:
Expand All @@ -33,7 +34,7 @@ def parse_definition(self, definition):
for section in sections:
parts = section.split(":")
rawword = parts[0]
word = "".join(ch for ch in rawword if ch.isalnum())
word = "".join(ch for ch in rawword if ch.isalnum() or ch == "_")
if len(word) == 0:
raise ValueError("A sample name is required")
number = None
Expand All @@ -51,10 +52,73 @@ def parse_definition(self, definition):
words[word] = number
return words

def list(self, word, max_number=None, licenses=None):
def list(
self,
word,
max_number=None,
licenses=None,
gender=None,
language=None,
soundtype=None,
):
"""List files for a sample name"""
sampleset = SampleSet(word)
return sampleset.list(max_number, licenses=licenses)
if soundtype == "tts":
stype = TTS
else:
stype = FREESOUND
sampleset = SampleSet(word, stype)
return sampleset.list(
max_number, licenses=licenses, gender=gender, language=language
)

async def speak(self, word, language, gender):
"""Speak a word"""
sampleset = SampleSet(word, TTS)
existing_samples = sampleset.list()
if len(existing_samples) > 0:
return True
word_dir = sampleset.dir()
client = texttospeech.TextToSpeechClient()
synthesis_input = texttospeech.SynthesisInput(text=word.replace("_", " "))
# mini hack
if language == "en-GB" and gender == "f":
voice = texttospeech.VoiceSelectionParams(
name="en-GB-Neural2-A",
language_code="en-GB",
ssml_gender=texttospeech.SsmlVoiceGender.FEMALE,
)
# speaking_rate=0.85
# pitch=-4
else:
if gender == "m":
ssml_gender = texttospeech.SsmlVoiceGender.MALE
else:
ssml_gender = texttospeech.SsmlVoiceGender.FEMALE
voice = texttospeech.VoiceSelectionParams(
language_code=language,
ssml_gender=ssml_gender,
)
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.LINEAR16,
# speaking_rate=0.85,
# pitch=-4,
)
response = client.synthesize_speech(
input=synthesis_input, voice=voice, audio_config=audio_config
)
filepath = word_dir + "/" + word + "_0.wav"
with open(filepath, "wb") as out:
out.write(response.audio_content)
sound = Sound(
speechsound={
"gender": gender,
"language": language,
"file": filepath,
}
)
sampleset.add(sound)
sampleset.saveconfig()
return True

async def fetch(self, word, num, licenses):
"""Fetch a collection of samples"""
Expand Down
22 changes: 18 additions & 4 deletions shabda/sampleset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,22 @@
from glob import glob
from shabda.sound import Sound

FREESOUND = 1
TTS = 2


class SampleSet:
"""A set of sample files"""

word = None
master_id = None
sounds = []
type = FREESOUND

def __init__(self, word):
def __init__(self, word, soundtype=FREESOUND):
"""Initialize the sample set"""
self.word = word
self.type = soundtype
directory = self.dir()
if not os.path.exists(directory):
os.makedirs(directory)
Expand All @@ -31,15 +36,22 @@ def __init__(self, word):

def dir(self):
"""Return the directory for this sample set"""
return "samples/" + self.word
directory = "samples/" + self.word
if self.type == TTS:
directory = "speech_" + directory
return directory

def list(self, max_number=None, licenses=None):
def list(self, max_number=None, licenses=None, gender=None, language=None):
"""List sounds for a sample name"""
# accept None as a max_number

sounds = []
for sound in self.sounds:
if licenses is None or sound["license"] in licenses:
if (
(licenses is None or sound["license"] in licenses)
and (gender is None or sound["gender"] == gender)
and (language is None or sound["language"] == language)
):
sounds.append(Sound(configsound=sound))
if max_number is not None:
sounds = sounds[0:max_number]
Expand All @@ -55,6 +67,8 @@ def add(self, sound):
"username": sound.username,
"license": sound.licensename,
"file": sound.file,
"gender": sound.gender,
"language": sound.language,
}
)

Expand Down
8 changes: 7 additions & 1 deletion shabda/sound.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ class Sound:
url = None
licensename = None
file = None
language = None
gender = None

def __init__(self, freesound=None, configsound=None):
def __init__(self, freesound=None, configsound=None, speechsound=None):
if freesound is not None:
self.id = freesound.id
self.username = freesound.username
Expand All @@ -22,6 +24,10 @@ def __init__(self, freesound=None, configsound=None):
self.url = configsound["url"]
self.licensename = configsound["license"]
self.file = configsound["file"]
if speechsound is not None:
self.language = speechsound["language"]
self.gender = speechsound["gender"]
self.file = speechsound["file"]

def _translate_license(self, licenseurl):
"""Translate a license URL into a license name"""
Expand Down
75 changes: 74 additions & 1 deletion shabda/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,76 @@ def remove_file(response):
return send_file(tmpfile, as_attachment=True)


@bp.route("/samples/<path:path>")
@bp.route("/speech/<definition>")
async def speech(definition):
"""Download a spoken word"""
gender = request.args.get("gender", "f")
language = request.args.get("language", "en-GB")

definition = definition.replace(" ", "_")
try:
words = dj.parse_definition(definition)
except ValueError as ex:
raise BadRequest(ex) from ex
tasks = []
for word in words:
tasks.append(speak_one(word, language, gender))
results = await asyncio.gather(*tasks)
global_status = "empty"
for status in results:
if status is True:
global_status = "ok"

return jsonify(
{
"status": global_status,
"definition": clean_definition(words),
}
)


@bp.route("/speech/<definition>.json")
async def speech_json(definition):
"""Download a reslist definition"""
gender = request.args.get("gender", "f")
language = request.args.get("language", "en-GB")
definition = definition.replace(" ", "_")

await speech(definition)

url = urlparse(request.base_url)
base = url.scheme + "://" + url.hostname
if url.port:
base += ":" + str(url.port)
try:
words = dj.parse_definition(definition)
except ValueError as ex:
raise BadRequest(ex) from ex
reslist = []
for word in words:
samples = dj.list(word, gender=gender, language=language, soundtype="tts")
sample_num = 0
for sound in samples:
sound_data = {
"url": sound.file,
"type": "audio",
"bank": word,
"n": sample_num,
}
reslist.append(sound_data)
sample_num += 1

return jsonify(reslist)


@bp.route("speech/speech_samples/<path:path>")
def serve_sample(path):
"""Serve a sample"""
return send_from_directory("../speech_samples/", path, as_attachment=False)


@bp.route("/samples/<path:path>")
def serve_speech_sample(path):
"""Serve a sample"""
return send_from_directory("../samples/", path, as_attachment=False)

Expand Down Expand Up @@ -160,6 +228,11 @@ def cors_after(response):
return response


async def speak_one(word, language, gender):
"""Speak a word"""
return await dj.speak(word, language, gender)


async def fetch_one(word, number, licenses):
"""Fetch a single sample set"""
return await dj.fetch(word, number, licenses)
Expand Down
1 change: 1 addition & 0 deletions speech_samples/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*/

0 comments on commit 29faf7a

Please sign in to comment.