Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v0.24.1 #35

Merged
merged 9 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ The following extras allow the installation of optional dependencies:
|------|-------------|
| `all` | All optional dependencies, except `dev` and `docs` |
| `dev` | Development dependencies |
| `dev` | Dependencies for building the documentation |
| `docs` | Dependencies for building the documentation |
| `notebooks` | Dependencies only used in notebooks |
| `server` | Dependencies to run the TTS server |
| `bn` | Bangla G2P |
Expand Down Expand Up @@ -270,11 +270,10 @@ You can find the language ISO codes [here](https://dl.fbaipublicfiles.com/mms/tt
and learn about the Fairseq models [here](https://github.com/facebookresearch/fairseq/tree/main/examples/mms).

```python
# TTS with on the fly voice conversion
# TTS with fairseq models
api = TTS("tts_models/deu/fairseq/vits")
api.tts_with_vc_to_file(
api.tts_to_file(
"Wie sage ich auf Italienisch, dass ich dich liebe?",
speaker_wav="target/speaker.wav",
file_path="output.wav"
)
```
Expand Down
3 changes: 3 additions & 0 deletions TTS/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import importlib.metadata

__version__ = importlib.metadata.version("coqui-tts")
60 changes: 25 additions & 35 deletions TTS/tts/utils/text/phonemizers/espeak_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import logging
import re
import subprocess
import tempfile
from pathlib import Path
from typing import Optional

from packaging.version import Version
Expand Down Expand Up @@ -50,7 +52,7 @@ def get_espeakng_version() -> str:
_DEF_ESPEAK_VER = None


def _espeak_exe(espeak_lib: str, args: list, *, sync: bool = False) -> list[bytes]:
def _espeak_exe(espeak_lib: str, args: list) -> list[str]:
"""Run espeak with the given arguments."""
cmd = [
espeak_lib,
Expand All @@ -59,32 +61,18 @@ def _espeak_exe(espeak_lib: str, args: list, *, sync: bool = False) -> list[byte
"1", # UTF8 text encoding
]
cmd.extend(args)
logger.debug("espeakng: executing %s", repr(cmd))

with subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
) as p:
res = iter(p.stdout.readline, b"")
err = iter(p.stderr.readline, b"")
for line in err:
logger.warning("espeakng: %s", line.decode("utf-8").strip())
if not sync:
p.stdout.close()
if p.stderr:
p.stderr.close()
if p.stdin:
p.stdin.close()
return res
res2 = list(res)
p.stdout.close()
if p.stderr:
p.stderr.close()
if p.stdin:
p.stdin.close()
p.wait()
return res2
logger.debug("Executing: %s", repr(cmd))

p = subprocess.run(cmd, capture_output=True, encoding="utf8", check=True)
for line in p.stderr.strip().split("\n"):
if line.strip() != "":
logger.warning("%s: %s", espeak_lib, line.strip())
res = []
for line in p.stdout.strip().split("\n"):
if line.strip() != "":
logger.debug("%s: %s", espeak_lib, line.strip())
res.append(line.strip())
return res


class ESpeak(BasePhonemizer):
Expand Down Expand Up @@ -198,12 +186,15 @@ def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False
if tie:
args.append("--tie=%s" % tie)

args.append(text)
tmp = tempfile.NamedTemporaryFile(mode="w+t", delete=False, encoding="utf8")
tmp.write(text)
tmp.close()
args.append("-f")
args.append(tmp.name)

# compute phonemes
phonemes = ""
for line in _espeak_exe(self.backend, args, sync=True):
logger.debug("line: %s", repr(line))
ph_decoded = line.decode("utf8").strip()
for line in _espeak_exe(self.backend, args):
# espeak:
# version 1.48.15: " p_ɹ_ˈaɪ_ɚ t_ə n_oʊ_v_ˈɛ_m_b_ɚ t_w_ˈɛ_n_t_i t_ˈuː\n"
# espeak-ng:
Expand All @@ -213,9 +204,10 @@ def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False
# "sɛʁtˈɛ̃ mˈo kɔm (en)fˈʊtbɔːl(fr) ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ."
# phonemize needs to remove the language flags of the returned text:
# "sɛʁtˈɛ̃ mˈo kɔm fˈʊtbɔːl ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ."
ph_decoded = re.sub(r"\(.+?\)", "", ph_decoded)
ph_decoded = re.sub(r"\(.+?\)", "", line)

phonemes += ph_decoded.strip()
Path(tmp.name).unlink()
return phonemes.replace("_", separator)

def _phonemize(self, text: str, separator: str = "") -> str:
Expand All @@ -232,14 +224,12 @@ def supported_languages() -> dict[str, str]:
return {}
args = ["--voices"]
langs = {}
for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args, sync=True)):
line = line.decode("utf8").strip()
for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args)):
if count > 0:
cols = line.split()
lang_code = cols[1]
lang_name = cols[3]
langs[lang_code] = lang_name
logger.debug("line: %s", repr(line))
return langs

def version(self) -> str:
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
autodoc_mock_imports = ["soundfile"]

# -- Project information -----------------------------------------------------
project = "TTS"
project = "coqui-tts"
copyright = "2021 Coqui GmbH, 2020 TTS authors"
author = "Coqui GmbH"

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ include = ["TTS*"]

[project]
name = "coqui-tts"
version = "0.24.0"
version = "0.24.1"
description = "Deep learning for Text to Speech."
readme = "README.md"
requires-python = ">=3.9, <3.13"
Expand Down Expand Up @@ -69,7 +69,7 @@ dependencies = [
"gruut[de,es,fr]==2.2.3",
# Tortoise
"einops>=0.6.0",
"transformers>=4.33.0",
"transformers>=4.33.0,<4.41.0",
# Bark
"encodec>=0.1.1",
# XTTS
Expand Down
6 changes: 6 additions & 0 deletions tests/text_tests/test_phonemizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,12 @@ def setUp(self):
output = self.phonemizer.phonemize(text, separator="")
self.assertEqual(output, gt)

# UTF8 characters
text = "źrebię"
gt = "ʑrˈɛbjɛ"
output = ESpeak("pl").phonemize(text, separator="")
self.assertEqual(output, gt)

def test_name(self):
self.assertEqual(self.phonemizer.name(), "espeak")

Expand Down
Loading