Skip to content

Commit

Permalink
Merge pull request #17 from jhudsl/tests
Browse files Browse the repository at this point in the history
Added tests
  • Loading branch information
howardbaek committed May 23, 2023
2 parents a15c77b + bf5668e commit e93b87e
Show file tree
Hide file tree
Showing 22 changed files with 284 additions and 191 deletions.
36 changes: 22 additions & 14 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,25 +1,29 @@
Type: Package
Package: text2speech
Title: Text to Speech
Title: Text to Speech Conversion
Description: Converts text into speech using various text-to-speech (TTS) engines and provides an unified interface for accessing their functionality.
With this package, users can easily generate audio files of spoken words, phrases, or sentences from plain text data. The package supports multiple TTS engines,
including Google's 'Cloud Text-to-Speech API', 'Amazon Polly', Microsoft's 'Cognitive Services Text to Speech REST API', and a free TTS engine called 'coqui TTS'.
Version: 0.3.0
Authors@R:
Authors@R: c(
person(given = "John",
family = "Muschelli",
role = c("aut", "cre"),
email = "muschellij2@gmail.com",
comment = c(ORCID = "0000-0001-6469-1750"))
Description: Unifies different text to speech engines, such as
Google, Microsoft, and Amazon. Text synthesis can be done
in any engine with a simple switch of an argument denoting
the service requested. The 'aws.polly' package has been
orphaned and can be found from the CRAN archives.
comment = c(ORCID = "0000-0001-6469-1750")),
person(given = "Howard",
family = "Baek",
role = "ctb",
email = "howardbaek@fredhutch.org")
)
License: GPL-3
Suggests:
aws.polly,
covr,
patrick,
rmarkdown,
testthat (>= 3.0.0),
stringi
stringi,
testthat (>= 3.0.0)
Encoding: UTF-8
LazyData: true
VignetteBuilder: knitr
Expand All @@ -28,14 +32,18 @@ URL: https://github.com/muschellij2/text2speech
BugReports: https://github.com/muschellij2/text2speech/issues
Imports:
aws.signature,
cli,
dplyr,
googleAuthR,
googleLanguageR,
httr,
mscstts (>= 0.5.1),
tuneR,
magrittr,
knitr,
utils
magrittr,
mscstts2,
tidyr,
tuneR,
utils,
withr
Remotes: howardbaek/mscstts2
Roxygen: list(markdown = TRUE)
Config/testthat/edition: 3
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ export(tts_amazon_authenticated)
export(tts_amazon_voices)
export(tts_auth)
export(tts_bind_wav)
export(tts_coqui)
export(tts_coqui_installed)
export(tts_coqui_voices)
export(tts_default_voice)
export(tts_google)
Expand All @@ -22,4 +24,7 @@ export(tts_microsoft_authenticated)
export(tts_microsoft_voices)
export(tts_speak_engine)
export(tts_voices)
importFrom(cli,cli_text)
importFrom(magrittr,"%>%")
importFrom(tidyr,separate_wider_delim)
importFrom(withr,with_path)
15 changes: 8 additions & 7 deletions R/aaa_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,20 @@ wav_duration = function(object) {
#'
#' @param path path to the local coqui tts Executable File
#'
#' @details List of possible file path locations for the local coqui tts Executable File
#' @details List of possible file path locations for the local coqui tts
#' Executable File
#' \describe{
#' \item{Linux}{/usr/bin/tts, /usr/local/bin/tts}
#' \item{Mac}{/opt/homebrew/Caskroom/miniforge/base/bin/tts}
#' \item{Windows}{C:\\Program Files\\tts}
#' }
#'
#' @return Returns nothing, function sets the option variable
#' \code{path_to_coqui}.
#' \code{path_to_coqui}.
#' @export
#'
#' @examples \dontrun{
#' set_coqui_path("local/path/to/tts")
#' set_coqui_path("~/path/to/tts")
#' }
set_coqui_path <- function(path) {
stopifnot(is.character(path))
Expand All @@ -81,16 +82,16 @@ set_coqui_path <- function(path) {



# Assert that coqui "tts" exists locally
# Prepare to use coqui "tts" by checking if it exists locally.
# Check option "path_to_coqui". If it's NULL, call coqui_find(), which
# will try to determine the local path to file "tts". If
# coqui_find() is successful, the path to "tts" will be assigned to option
# "path_to_coqui", otherwise an error is thrown.
coqui_assert <- function() {
use_coqui <- function() {
coqui_path <- getOption("path_to_coqui")

if (is.null(coqui_path)) {
coqui_path <- coqui_find()
coqui_path <- find_coqui()
set_coqui_path(coqui_path)
}
}
Expand All @@ -99,7 +100,7 @@ coqui_assert <- function() {
# looking in the known file locations for the current OS. If OS is not Linux,
# OSX, or Windows, an error is thrown. If path to "tts" is not found, an
# error is thrown.
coqui_find <- function() {
find_coqui <- function() {
user_os <- Sys.info()["sysname"]
if (!user_os %in% names(coqui_paths_to_check)) {
stop(coqui_path_missing, call. = FALSE)
Expand Down
9 changes: 9 additions & 0 deletions R/text2speech-package.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#' @keywords internal
"_PACKAGE"

## usethis namespace: start
#' @importFrom cli cli_text
#' @importFrom tidyr separate_wider_delim
#' @importFrom withr with_path
## usethis namespace: end
NULL
6 changes: 3 additions & 3 deletions R/tts_auth.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,11 @@ tts_amazon_authenticated = function() {
#' @export
tts_microsoft_authenticated = function(...) {
res = try({
mscstts::ms_get_tts_token(...)
mscstts2::ms_get_token(...)
})
if (inherits(res, "try-error")) {
return(FALSE)
}
res = res$request
httr::status_code(res) < 400
res_status_code = res$response$status_code
res_status_code < 400
}
61 changes: 36 additions & 25 deletions R/tts_backend.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
#' Convert Text to Speech using Google Cloud Text-to-Speech API
#' @export

#' @param text A character vector of text to be spoken
#' @param output_format Format of output files: "mp3" or "wav"
#' @param voice A full voice name that can be passed to the service, such as the
#' argument `voice` for `get_synthesis`` from \code{aws.polly}, or or
#' [mscstts::ms_synthesize()] or the `name` argument for
#' [googleLanguageR::gl_talk()]
#' @param bind_audio Should the [text2speech::tts_bind_wav()] be run on after
#' the audio has been created, to ensure that the length of text and the
#' number of rows is consistent? This affects the output format of some audio.
#' description
#' @rdname tts
#' @param voice A full voice name that can be passed to the
#' service, such as the
#' argument `voice` for `get_synthesis`` from \code{aws.polly}, or
#' or [mscstts::ms_synthesize()] or the
#' `name` argument for [googleLanguageR::gl_talk()]
#' @examples
#' tts_default_voice("amazon")
#' tts_default_voice("google")
#' tts_default_voice("microsoft")
#' @export
tts_google = function(
text,
output_format = c("mp3", "wav"),
Expand Down Expand Up @@ -62,6 +64,7 @@ tts_google = function(
}

#' Convert Text to Speech using Amazon Polly
#'
#' @export
#' @rdname tts
#' @examples \dontrun{
Expand Down Expand Up @@ -156,37 +159,29 @@ tts_amazon = function(


#' Convert Text to Speech using Microsoft Cognitive Services API
#'
#' @export
#' @rdname tts
tts_microsoft = function(
text,
output_format = c("mp3", "wav"),
voice = "Microsoft Server Speech Text to Speech Voice (en-US, ZiraRUS)",
audio_type = c("mp3", "wav"),
voice = NULL,
bind_audio = TRUE,
...) {

# Set character limit
limit = 800
output_format = match.arg(output_format)
audio_type = output_format

output_format = switch(
output_format,
"mp3" = "audio-24khz-160kbitrate-mono-mp3",
"wav" = "riff-24khz-16bit-mono-pcm")


res = lapply(text, function(string) {
strings = tts_split_text(string,
limit = limit)

res = vapply(strings, function(tt) {
output = tts_temp_audio(audio_type)
out = mscstts::ms_synthesize(
out = mscstts2::ms_synthesize(
tt,
output_format = output_format,
voice = voice,
...)
writeBin(out$content, con = output)
writeBin(out, con = output)
output
}, FUN.VALUE = character(1L))
names(res) = NULL
Expand Down Expand Up @@ -214,6 +209,23 @@ tts_microsoft = function(
}



#' Convert Text to Speech using Coqui TTS
#'
#' @param text A character vector of text to be spoken
#' @param exec_path System path to Coqui TTS
#' @param output_format Format of output files: "mp3" or "wav"
#' @param model_name Deep Learning model for Text-to-Speech Conversion
#' @param vocoder_name Model that generates audio
#' @param bind_audio Should the [text2speech::tts_bind_wav()] be run on after
#' the audio has been created, to ensure that the length of text and the
#' number of rows is consistent? This affects the output format of some audio.
#' description
#' @param save_local Should the output file be saved locally?
#' @param save_local_dest Destination to save output file, if saved locally
#' @param ... Additional arguments
#'
#' @export
tts_coqui <- function(
text,
exec_path,
Expand Down Expand Up @@ -301,7 +313,6 @@ tts_coqui <- function(
file.copy(normalizePath(res$file), save_local_dest)
}
}

res
}

Expand Down
6 changes: 5 additions & 1 deletion R/tts_coqui_installed.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#' Is coqui TTS installed on local system?
#'
#' @return `TRUE` or `FALSE`
#' @export
tts_coqui_installed <- function() {
coqui_assert()
use_coqui()
coqui_path <- getOption("path_to_coqui")

res <- suppressWarnings(withr::with_path(process_coqui_path(coqui_path),
Expand Down
8 changes: 4 additions & 4 deletions R/tts_microsoft_auth.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
#' @rdname tts_auth
#' @export
tts_microsoft_auth = function(key_or_json_file = NULL, ...) {
if (!mscstts::ms_have_tts_key()) {
mscstts::ms_set_tts_key(api_key = key_or_json_file)
res = mscstts::ms_have_tts_key()
if (!mscstts2::ms_exist_key()) {
mscstts2::ms_set_key(api_key = key_or_json_file)
res = mscstts2::ms_exist_key()
}
res = tts_microsoft_authenticated(...)
return(tts_microsoft_authenticated(...))
}
32 changes: 16 additions & 16 deletions R/tts_synthesize.R
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
#' Convert Text to Speech
#'
#' @param text A character vector of text to speak
#' @param output_format Format of output files
#' @param ... Additional arguments to
#' `text2speech::tts_google()`,
#' `text2speech::tts_amazon()`, or
#' `text2speech::tts_microsoft()`
#' @param service service to use
#' @param text A character vector of text
#' @param output_format Format of output files: "mp3" or "wav"
#' @param ... Additional arguments to `text2speech::tts_google()`,
#' `text2speech::tts_amazon()`, `text2speech::tts_microsoft()`, or
#' `text2speech::tts_coqui()`
#' @param service Service to use (Google, Amazon, Microsoft, or Coqui)
#'
#' @note All functions have a `voice`` argument fro a
#' full voice name that can be passed to the
#' service, such as `voice` for `get_synthesis`` from \code{aws.polly}
#' @note `tts_google()`, `tts_amazon()`, and `tts_microsoft()` have a `voice`
#' argument for a full voice name that can be passed to the service, such as
#' `voice` for `get_synthesis` from \code{aws.polly}. `tts_coqui()` has a
#' `model_name` and `vocoder_name` argument which lets you choose the tts and
#' vocoder model.
#'
#' @param bind_audio Should the [text2speech::tts_bind_wav()]
#' be run on after the audio has been created, to ensure that
#' the length of text and the number of rows is consistent?
#' This affects the output format of some audio.
#' @param bind_audio Should the [text2speech::tts_bind_wav()] be run on after
#' the audio has been created, to ensure that the length of text and the
#' number of rows is consistent? This affects the output format of some audio.
#'
#'
#' @return A `data.frame` of text and wav files
Expand Down Expand Up @@ -65,12 +65,12 @@ tts = function(
if (service == "microsoft") {
res = tts_microsoft(
text = text,
output_format = output_format,
audio_type = output_format,
bind_audio = bind_audio,
...)
}
if (service == "coqui") {
coqui_assert()
use_coqui()
coqui_path <- getOption("path_to_coqui")

res <- tts_coqui(
Expand Down
25 changes: 13 additions & 12 deletions R/tts_voices.R
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,14 @@ tts_amazon_voices = function(...) {
#' Get Microsoft Cognitive Services Text to Speech voices
#' @rdname tts_voices
#' @export
tts_microsoft_voices = function(...) {
# tts_microsoft_auth(...)
res = mscstts::ms_locale_df()
tts_microsoft_voices = function(region = "westus") {
res = mscstts2::ms_list_voice()
cn = colnames(res)
cn[ cn == "Gender" ] = "gender"
cn[ cn == "code" ] = "language_code"
cn[ cn == "locale" ] = "voice"
cn[ cn == "language" ] = "language"
colnames(res) = cn
cn[ cn == "Name" ] <- "voice"
cn[ cn == "Locale" ] <- "language_code"
cn[ cn == "LocaleName" ] <- "language"
cn[ cn == "Gender" ] <- "gender"
colnames(res) <- cn
res = res[, c("voice", "language", "language_code", "gender")]
res$service = "microsoft"

Expand Down Expand Up @@ -149,12 +148,14 @@ tts_google_voices = function(...) {
}


#' Get Coqui TTS voices (list models)
#' @rdname tts_voices

#' Get Coqui TTS voices
#'
#' @return A `data.frame` of the language, dataset, and model name.
#' @export
tts_coqui_voices = function() {
# Look for coqui_path
coqui_assert()
use_coqui()
coqui_path <- getOption("path_to_coqui")

# Run command to list models
Expand All @@ -172,6 +173,6 @@ tts_coqui_voices = function() {
delim = "/",
names = c("language", "dataset", "model_name"))

cli::cli_text("Test out different voices on the {.href [CoquiTTS Demo](https://huggingface.co/spaces/coqui/CoquiTTS)}")
cli::cli_alert_info("Test out different voices on the {.href [CoquiTTS Demo](https://huggingface.co/spaces/coqui/CoquiTTS)}")
out
}

0 comments on commit e93b87e

Please sign in to comment.