Merge pull request #17 from jhudsl/tests

Added tests
jhudsl · May 23, 2023 · e93b87e · e93b87e
2 parents a15c77b + bf5668e
commit e93b87e
Show file tree

Hide file tree

Showing 22 changed files with 284 additions and 191 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,25 +1,29 @@
 Type: Package
 Package: text2speech
-Title: Text to Speech
+Title: Text to Speech Conversion
+Description: Converts text into speech using various text-to-speech (TTS) engines and provides an unified interface for accessing their functionality.
+  With this package, users can easily generate audio files of spoken words, phrases, or sentences from plain text data. The package supports multiple TTS engines, 
+  including Google's 'Cloud Text-to-Speech API', 'Amazon Polly', Microsoft's 'Cognitive Services Text to Speech REST API',  and a free TTS engine called 'coqui TTS'.
 Version: 0.3.0
-Authors@R: 
+Authors@R: c( 
     person(given = "John",
            family = "Muschelli",
            role = c("aut", "cre"),
            email = "muschellij2@gmail.com",
-           comment = c(ORCID = "0000-0001-6469-1750"))
-Description: Unifies different text to speech engines, such as
-    Google, Microsoft, and Amazon.  Text synthesis can be done
-    in any engine with a simple switch of an argument denoting
-    the service requested.  The 'aws.polly' package has been
-    orphaned and can be found from the CRAN archives.
+           comment = c(ORCID = "0000-0001-6469-1750")),
+    person(given = "Howard",
+           family = "Baek",
+           role = "ctb",
+           email = "howardbaek@fredhutch.org")
+           )
 License: GPL-3
 Suggests: 
     aws.polly,
     covr,
+    patrick,
     rmarkdown,
-    testthat (>= 3.0.0),
-    stringi
+    stringi,
+    testthat (>= 3.0.0)
 Encoding: UTF-8
 LazyData: true
 VignetteBuilder: knitr
@@ -28,14 +32,18 @@ URL: https://github.com/muschellij2/text2speech
 BugReports: https://github.com/muschellij2/text2speech/issues
 Imports: 
     aws.signature,
+    cli,
     dplyr,
     googleAuthR,
     googleLanguageR,
     httr,
-    mscstts (>= 0.5.1),
-    tuneR,
-    magrittr,
     knitr,
-    utils
+    magrittr,
+    mscstts2,
+    tidyr,
+    tuneR,
+    utils,
+    withr
+Remotes: howardbaek/mscstts2
 Roxygen: list(markdown = TRUE)
 Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
@@ -10,6 +10,8 @@ export(tts_amazon_authenticated)
 export(tts_amazon_voices)
 export(tts_auth)
 export(tts_bind_wav)
+export(tts_coqui)
+export(tts_coqui_installed)
 export(tts_coqui_voices)
 export(tts_default_voice)
 export(tts_google)
@@ -22,4 +24,7 @@ export(tts_microsoft_authenticated)
 export(tts_microsoft_voices)
 export(tts_speak_engine)
 export(tts_voices)
+importFrom(cli,cli_text)
 importFrom(magrittr,"%>%")
+importFrom(tidyr,separate_wider_delim)
+importFrom(withr,with_path)
diff --git a/R/aaa_utils.R b/R/aaa_utils.R
@@ -58,19 +58,20 @@ wav_duration = function(object) {
 #'
 #' @param path path to the local coqui tts Executable File
 #'
-#' @details List of possible file path locations for the local coqui tts Executable File
+#' @details List of possible file path locations for the local coqui tts
+#'   Executable File
 #' \describe{
 #'    \item{Linux}{/usr/bin/tts, /usr/local/bin/tts}
 #'    \item{Mac}{/opt/homebrew/Caskroom/miniforge/base/bin/tts}
 #'    \item{Windows}{C:\\Program Files\\tts}
 #' }
 #'
 #' @return Returns nothing, function sets the option variable
-#'  \code{path_to_coqui}.
+#'   \code{path_to_coqui}.
 #' @export
 #'
 #' @examples \dontrun{
-#' set_coqui_path("local/path/to/tts")
+#' set_coqui_path("~/path/to/tts")
 #' }
 set_coqui_path <- function(path) {
   stopifnot(is.character(path))
@@ -81,16 +82,16 @@ set_coqui_path <- function(path) {
 
 
 
-# Assert that coqui "tts" exists locally
+# Prepare to use coqui "tts" by checking if it exists locally.
 # Check option "path_to_coqui". If it's NULL, call coqui_find(), which
 # will try to determine the local path to file "tts". If
 # coqui_find() is successful, the path to "tts" will be assigned to option
 # "path_to_coqui", otherwise an error is thrown.
-coqui_assert <- function() {
+use_coqui <- function() {
   coqui_path <- getOption("path_to_coqui")
 
   if (is.null(coqui_path)) {
-    coqui_path <- coqui_find()
+    coqui_path <- find_coqui()
     set_coqui_path(coqui_path)
   }
 }
@@ -99,7 +100,7 @@ coqui_assert <- function() {
 # looking in the known file locations for the current OS. If OS is not Linux,
 # OSX, or Windows, an error is thrown. If path to "tts" is not found, an
 # error is thrown.
-coqui_find <- function() {
+find_coqui <- function() {
   user_os <- Sys.info()["sysname"]
   if (!user_os %in% names(coqui_paths_to_check)) {
     stop(coqui_path_missing, call. = FALSE)

diff --git a/R/text2speech-package.R b/R/text2speech-package.R
@@ -0,0 +1,9 @@
+#' @keywords internal
+"_PACKAGE"
+
+## usethis namespace: start
+#' @importFrom cli cli_text
+#' @importFrom tidyr separate_wider_delim
+#' @importFrom withr with_path
+## usethis namespace: end
+NULL
diff --git a/R/tts_auth.R b/R/tts_auth.R
@@ -83,11 +83,11 @@ tts_amazon_authenticated = function() {
 #' @export
 tts_microsoft_authenticated = function(...) {
   res = try({
-    mscstts::ms_get_tts_token(...)
+    mscstts2::ms_get_token(...)
   })
   if (inherits(res, "try-error")) {
     return(FALSE)
   }
-  res = res$request
-  httr::status_code(res) < 400
+  res_status_code = res$response$status_code
+  res_status_code < 400
 }
diff --git a/R/tts_backend.R b/R/tts_backend.R
@@ -1,15 +1,17 @@
 #' Convert Text to Speech using Google Cloud Text-to-Speech API
-#' @export
+
+#' @param text A character vector of text to be spoken
+#' @param output_format Format of output files: "mp3" or "wav"
+#' @param voice A full voice name that can be passed to the service, such as the
+#'   argument `voice` for `get_synthesis`` from \code{aws.polly}, or or
+#'   [mscstts::ms_synthesize()] or the `name` argument for
+#'   [googleLanguageR::gl_talk()]
+#' @param bind_audio Should the [text2speech::tts_bind_wav()] be run on after
+#'   the audio has been created, to ensure that the length of text and the
+#'   number of rows is consistent? This affects the output format of some audio.
+#'   description
 #' @rdname tts
-#' @param voice A full voice name that can be passed to the
-#' service, such as the
-#' argument `voice` for `get_synthesis`` from \code{aws.polly}, or
-#' or [mscstts::ms_synthesize()] or the
-#' `name` argument for [googleLanguageR::gl_talk()]
-#' @examples
-#' tts_default_voice("amazon")
-#' tts_default_voice("google")
-#' tts_default_voice("microsoft")
+#' @export
 tts_google = function(
     text,
     output_format = c("mp3", "wav"),
@@ -62,6 +64,7 @@ tts_google = function(
 }
 
 #' Convert Text to Speech using Amazon Polly
+#'
 #' @export
 #' @rdname tts
 #' @examples \dontrun{
@@ -156,37 +159,29 @@ tts_amazon = function(
 
 
 #' Convert Text to Speech using Microsoft Cognitive Services API
+#'
 #' @export
 #' @rdname tts
 tts_microsoft = function(
     text,
-    output_format = c("mp3", "wav"),
-    voice = "Microsoft Server Speech Text to Speech Voice (en-US, ZiraRUS)",
+    audio_type = c("mp3", "wav"),
+    voice = NULL,
     bind_audio = TRUE,
     ...) {
-
+  # Set character limit
   limit = 800
-  output_format = match.arg(output_format)
-  audio_type = output_format
-
-  output_format = switch(
-    output_format,
-    "mp3" = "audio-24khz-160kbitrate-mono-mp3",
-    "wav" = "riff-24khz-16bit-mono-pcm")
-
 
   res = lapply(text, function(string) {
     strings = tts_split_text(string,
                              limit = limit)
 
     res = vapply(strings, function(tt) {
       output = tts_temp_audio(audio_type)
-      out = mscstts::ms_synthesize(
+      out = mscstts2::ms_synthesize(
         tt,
-        output_format = output_format,
         voice = voice,
         ...)
-      writeBin(out$content, con = output)
+      writeBin(out, con = output)
       output
     }, FUN.VALUE = character(1L))
     names(res) = NULL
@@ -214,6 +209,23 @@ tts_microsoft = function(
 }
 
 
+
+#' Convert Text to Speech using Coqui TTS
+#'
+#' @param text A character vector of text to be spoken
+#' @param exec_path System path to Coqui TTS
+#' @param output_format Format of output files: "mp3" or "wav"
+#' @param model_name Deep Learning model for Text-to-Speech Conversion
+#' @param vocoder_name Model that generates audio
+#' @param bind_audio Should the [text2speech::tts_bind_wav()] be run on after
+#'   the audio has been created, to ensure that the length of text and the
+#'   number of rows is consistent? This affects the output format of some audio.
+#'   description
+#' @param save_local Should the output file be saved locally?
+#' @param save_local_dest Destination to save output file, if saved locally
+#' @param ... Additional arguments
+#'
+#' @export
 tts_coqui <- function(
     text,
     exec_path,
@@ -301,7 +313,6 @@ tts_coqui <- function(
       file.copy(normalizePath(res$file), save_local_dest)
     }
   }
-
   res
 }
 

diff --git a/R/tts_coqui_installed.R b/R/tts_coqui_installed.R
@@ -1,5 +1,9 @@
+#' Is coqui TTS installed on local system?
+#'
+#' @return `TRUE` or `FALSE`
+#' @export
 tts_coqui_installed <- function() {
-  coqui_assert()
+  use_coqui()
   coqui_path <- getOption("path_to_coqui")
 
   res <- suppressWarnings(withr::with_path(process_coqui_path(coqui_path),

diff --git a/R/tts_microsoft_auth.R b/R/tts_microsoft_auth.R
@@ -2,9 +2,9 @@
 #' @rdname tts_auth
 #' @export
 tts_microsoft_auth = function(key_or_json_file = NULL, ...) {
-  if (!mscstts::ms_have_tts_key()) {
-    mscstts::ms_set_tts_key(api_key = key_or_json_file)
-    res = mscstts::ms_have_tts_key()
+  if (!mscstts2::ms_exist_key()) {
+    mscstts2::ms_set_key(api_key = key_or_json_file)
+    res = mscstts2::ms_exist_key()
   }
-  res = tts_microsoft_authenticated(...)
+  return(tts_microsoft_authenticated(...))
 }
diff --git a/R/tts_synthesize.R b/R/tts_synthesize.R
@@ -1,21 +1,21 @@
 #' Convert Text to Speech
 #'
-#' @param text A character vector of text to speak
-#' @param output_format Format of output files
-#' @param ... Additional arguments to
-#' `text2speech::tts_google()`,
-#' `text2speech::tts_amazon()`, or
-#' `text2speech::tts_microsoft()`
-#' @param service service to use
+#' @param text A character vector of text
+#' @param output_format Format of output files: "mp3" or "wav"
+#' @param ... Additional arguments to `text2speech::tts_google()`,
+#'   `text2speech::tts_amazon()`, `text2speech::tts_microsoft()`, or
+#'   `text2speech::tts_coqui()`
+#' @param service Service to use (Google, Amazon, Microsoft, or Coqui)
 #'
-#' @note All functions have a  `voice`` argument fro a
-#' full voice name that can be passed to the
-#' service, such as `voice` for `get_synthesis`` from \code{aws.polly}
+#' @note `tts_google()`, `tts_amazon()`, and `tts_microsoft()` have a  `voice`
+#'   argument for a full voice name that can be passed to the service, such as
+#'   `voice` for `get_synthesis` from \code{aws.polly}. `tts_coqui()` has a
+#'   `model_name` and `vocoder_name` argument which lets you choose the tts and
+#'   vocoder model.
 #'
-#' @param bind_audio Should the [text2speech::tts_bind_wav()]
-#' be run on after the audio has been created, to ensure that
-#' the length of text and the number of rows is consistent?
-#' This affects the output format of some audio.
+#' @param bind_audio Should the [text2speech::tts_bind_wav()] be run on after
+#'   the audio has been created, to ensure that the length of text and the
+#'   number of rows is consistent? This affects the output format of some audio.
 #'
 #'
 #' @return A `data.frame` of text and wav files
@@ -65,12 +65,12 @@ tts = function(
   if (service == "microsoft") {
     res = tts_microsoft(
       text = text,
-      output_format = output_format,
+      audio_type = output_format,
       bind_audio = bind_audio,
       ...)
   }
   if (service == "coqui") {
-    coqui_assert()
+    use_coqui()
     coqui_path <- getOption("path_to_coqui")
 
     res <- tts_coqui(

diff --git a/R/tts_voices.R b/R/tts_voices.R
@@ -109,15 +109,14 @@ tts_amazon_voices = function(...) {
 #' Get Microsoft Cognitive Services Text to Speech voices
 #' @rdname tts_voices
 #' @export
-tts_microsoft_voices = function(...) {
-  # tts_microsoft_auth(...)
-  res = mscstts::ms_locale_df()
+tts_microsoft_voices = function(region = "westus") {
+  res = mscstts2::ms_list_voice()
   cn = colnames(res)
-  cn[ cn == "Gender" ] = "gender"
-  cn[ cn == "code" ] = "language_code"
-  cn[ cn == "locale" ] = "voice"
-  cn[ cn == "language" ] = "language"
-  colnames(res) = cn
+  cn[ cn == "Name" ] <- "voice"
+  cn[ cn == "Locale" ] <- "language_code"
+  cn[ cn == "LocaleName" ] <- "language"
+  cn[ cn == "Gender" ] <- "gender"
+  colnames(res) <- cn
   res = res[, c("voice", "language", "language_code", "gender")]
   res$service = "microsoft"
 
@@ -149,12 +148,14 @@ tts_google_voices = function(...) {
 }
 
 
-#' Get Coqui TTS voices (list models)
-#' @rdname tts_voices
+
+#' Get Coqui TTS voices
+#'
+#' @return A `data.frame` of the language, dataset, and model name.
 #' @export
 tts_coqui_voices = function() {
   # Look for coqui_path
-  coqui_assert()
+  use_coqui()
   coqui_path <- getOption("path_to_coqui")
 
   # Run command to list models
@@ -172,6 +173,6 @@ tts_coqui_voices = function() {
                                              delim = "/",
                                              names = c("language", "dataset", "model_name"))
 
-  cli::cli_text("Test out different voices on the {.href [CoquiTTS Demo](https://huggingface.co/spaces/coqui/CoquiTTS)}")
+  cli::cli_alert_info("Test out different voices on the {.href [CoquiTTS Demo](https://huggingface.co/spaces/coqui/CoquiTTS)}")
   out
 }