From e51105f3f0c494703a05106b5817d6907bc209be Mon Sep 17 00:00:00 2001 From: howardbaek Date: Wed, 3 May 2023 10:24:19 -0700 Subject: [PATCH 01/19] Add myself to Author --- DESCRIPTION | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4b1ea19..5e6d401 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,7 +7,11 @@ Authors@R: family = "Muschelli", role = c("aut", "cre"), email = "muschellij2@gmail.com", - comment = c(ORCID = "0000-0001-6469-1750")) + comment = c(ORCID = "0000-0001-6469-1750")), + person(given = "Howard", + family = "Baek", + role = "ctb", + email = "howardbaek@fredhutch.org") Description: Unifies different text to speech engines, such as Google, Microsoft, and Amazon. Text synthesis can be done in any engine with a simple switch of an argument denoting From 23954ccea5fdecf4e0cf6b90a1996648d5800c17 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Wed, 3 May 2023 10:38:17 -0700 Subject: [PATCH 02/19] Rename functions --- R/aaa_utils.R | 6 +++--- R/tts_coqui_installed.R | 2 +- R/tts_synthesize.R | 2 +- R/tts_voices.R | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/aaa_utils.R b/R/aaa_utils.R index 8a00a69..5ae3d02 100644 --- a/R/aaa_utils.R +++ b/R/aaa_utils.R @@ -86,11 +86,11 @@ set_coqui_path <- function(path) { # will try to determine the local path to file "tts". If # coqui_find() is successful, the path to "tts" will be assigned to option # "path_to_coqui", otherwise an error is thrown. -coqui_assert <- function() { +use_coqui <- function() { coqui_path <- getOption("path_to_coqui") if (is.null(coqui_path)) { - coqui_path <- coqui_find() + coqui_path <- find_coqui() set_coqui_path(coqui_path) } } @@ -99,7 +99,7 @@ coqui_assert <- function() { # looking in the known file locations for the current OS. If OS is not Linux, # OSX, or Windows, an error is thrown. If path to "tts" is not found, an # error is thrown. -coqui_find <- function() { +find_coqui <- function() { user_os <- Sys.info()["sysname"] if (!user_os %in% names(coqui_paths_to_check)) { stop(coqui_path_missing, call. = FALSE) diff --git a/R/tts_coqui_installed.R b/R/tts_coqui_installed.R index cd63b0d..7c71412 100644 --- a/R/tts_coqui_installed.R +++ b/R/tts_coqui_installed.R @@ -1,5 +1,5 @@ tts_coqui_installed <- function() { - coqui_assert() + use_coqui() coqui_path <- getOption("path_to_coqui") res <- suppressWarnings(withr::with_path(process_coqui_path(coqui_path), diff --git a/R/tts_synthesize.R b/R/tts_synthesize.R index 9cf7383..62656f9 100644 --- a/R/tts_synthesize.R +++ b/R/tts_synthesize.R @@ -70,7 +70,7 @@ tts = function( ...) } if (service == "coqui") { - coqui_assert() + use_coqui() coqui_path <- getOption("path_to_coqui") res <- tts_coqui( diff --git a/R/tts_voices.R b/R/tts_voices.R index 4faba37..23575d7 100644 --- a/R/tts_voices.R +++ b/R/tts_voices.R @@ -154,7 +154,7 @@ tts_google_voices = function(...) { #' @export tts_coqui_voices = function() { # Look for coqui_path - coqui_assert() + use_coqui() coqui_path <- getOption("path_to_coqui") # Run command to list models From 26306991188df7fc5b400f32f92fe24643a59b32 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Wed, 3 May 2023 11:19:14 -0700 Subject: [PATCH 03/19] Include Description of package --- DESCRIPTION | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5e6d401..8da23e7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,10 @@ Type: Package Package: text2speech -Title: Text to Speech +Title: Text to Speech Conversion +Description: Converts text into speech using various text-to-speech (TTS) engines and provides an unified interface for accessing their functionality. + With this package, users can easily generate audio files of spoken words, phrases, or sentences from plain text data. + The package supports multiple TTS engines, including Google's 'Cloud Text-to-Speech API', 'Amazon Polly', Microsoft's 'Cognitive Services Text to Speech REST API', + and a free TTS engine, coqui TTS. Version: 0.3.0 Authors@R: person(given = "John", From a059727fae6051316e469b9634e42f40a5365b24 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Wed, 3 May 2023 11:20:05 -0700 Subject: [PATCH 04/19] Revise description --- DESCRIPTION | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8da23e7..2936a1d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,9 +2,8 @@ Type: Package Package: text2speech Title: Text to Speech Conversion Description: Converts text into speech using various text-to-speech (TTS) engines and provides an unified interface for accessing their functionality. - With this package, users can easily generate audio files of spoken words, phrases, or sentences from plain text data. - The package supports multiple TTS engines, including Google's 'Cloud Text-to-Speech API', 'Amazon Polly', Microsoft's 'Cognitive Services Text to Speech REST API', - and a free TTS engine, coqui TTS. + With this package, users can easily generate audio files of spoken words, phrases, or sentences from plain text data. The package supports multiple TTS engines, + including Google's 'Cloud Text-to-Speech API', 'Amazon Polly', Microsoft's 'Cognitive Services Text to Speech REST API', and a free TTS engine called 'coqui TTS'. Version: 0.3.0 Authors@R: person(given = "John", From c9f820e38c0113232b7d2304e94d662a5c28ca68 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Wed, 3 May 2023 15:41:22 -0700 Subject: [PATCH 05/19] Finished documentation --- DESCRIPTION | 17 +++++++-------- NAMESPACE | 5 +++++ R/aaa_utils.R | 9 ++++---- R/text2speech-package.R | 9 ++++++++ R/tts_backend.R | 42 +++++++++++++++++++++++++++---------- R/tts_coqui_installed.R | 4 ++++ R/tts_synthesize.R | 28 ++++++++++++------------- R/tts_voices.R | 9 +++++--- man/set_coqui_path.Rd | 5 +++-- man/text2speech-package.Rd | 28 +++++++++++++++++++++++++ man/tts.Rd | 35 +++++++++++++++---------------- man/tts_coqui.Rd | 43 ++++++++++++++++++++++++++++++++++++++ man/tts_coqui_installed.Rd | 14 +++++++++++++ man/tts_coqui_voices.Rd | 14 +++++++++++++ man/tts_voices.Rd | 5 ----- 15 files changed, 201 insertions(+), 66 deletions(-) create mode 100644 R/text2speech-package.R create mode 100644 man/text2speech-package.Rd create mode 100644 man/tts_coqui.Rd create mode 100644 man/tts_coqui_installed.Rd create mode 100644 man/tts_coqui_voices.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 2936a1d..21e8fd8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,7 +5,7 @@ Description: Converts text into speech using various text-to-speech (TTS) engine With this package, users can easily generate audio files of spoken words, phrases, or sentences from plain text data. The package supports multiple TTS engines, including Google's 'Cloud Text-to-Speech API', 'Amazon Polly', Microsoft's 'Cognitive Services Text to Speech REST API', and a free TTS engine called 'coqui TTS'. Version: 0.3.0 -Authors@R: +Authors@R: c( person(given = "John", family = "Muschelli", role = c("aut", "cre"), @@ -15,11 +15,7 @@ Authors@R: family = "Baek", role = "ctb", email = "howardbaek@fredhutch.org") -Description: Unifies different text to speech engines, such as - Google, Microsoft, and Amazon. Text synthesis can be done - in any engine with a simple switch of an argument denoting - the service requested. The 'aws.polly' package has been - orphaned and can be found from the CRAN archives. + ) License: GPL-3 Suggests: aws.polly, @@ -35,14 +31,17 @@ URL: https://github.com/muschellij2/text2speech BugReports: https://github.com/muschellij2/text2speech/issues Imports: aws.signature, + cli, dplyr, googleAuthR, googleLanguageR, httr, + knitr, + magrittr, mscstts (>= 0.5.1), + tidyr, tuneR, - magrittr, - knitr, - utils + utils, + withr Roxygen: list(markdown = TRUE) Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 325d350..f804642 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,8 @@ export(tts_amazon_authenticated) export(tts_amazon_voices) export(tts_auth) export(tts_bind_wav) +export(tts_coqui) +export(tts_coqui_installed) export(tts_coqui_voices) export(tts_default_voice) export(tts_google) @@ -22,4 +24,7 @@ export(tts_microsoft_authenticated) export(tts_microsoft_voices) export(tts_speak_engine) export(tts_voices) +importFrom(cli,cli_text) importFrom(magrittr,"%>%") +importFrom(tidyr,separate_wider_delim) +importFrom(withr,with_path) diff --git a/R/aaa_utils.R b/R/aaa_utils.R index 5ae3d02..ab3ca44 100644 --- a/R/aaa_utils.R +++ b/R/aaa_utils.R @@ -58,7 +58,8 @@ wav_duration = function(object) { #' #' @param path path to the local coqui tts Executable File #' -#' @details List of possible file path locations for the local coqui tts Executable File +#' @details List of possible file path locations for the local coqui tts +#' Executable File #' \describe{ #' \item{Linux}{/usr/bin/tts, /usr/local/bin/tts} #' \item{Mac}{/opt/homebrew/Caskroom/miniforge/base/bin/tts} @@ -66,11 +67,11 @@ wav_duration = function(object) { #' } #' #' @return Returns nothing, function sets the option variable -#' \code{path_to_coqui}. +#' \code{path_to_coqui}. #' @export #' #' @examples \dontrun{ -#' set_coqui_path("local/path/to/tts") +#' set_coqui_path("~/path/to/tts") #' } set_coqui_path <- function(path) { stopifnot(is.character(path)) @@ -81,7 +82,7 @@ set_coqui_path <- function(path) { -# Assert that coqui "tts" exists locally +# Prepare to use coqui "tts" by checking if it exists locally. # Check option "path_to_coqui". If it's NULL, call coqui_find(), which # will try to determine the local path to file "tts". If # coqui_find() is successful, the path to "tts" will be assigned to option diff --git a/R/text2speech-package.R b/R/text2speech-package.R new file mode 100644 index 0000000..45f3bb8 --- /dev/null +++ b/R/text2speech-package.R @@ -0,0 +1,9 @@ +#' @keywords internal +"_PACKAGE" + +## usethis namespace: start +#' @importFrom cli cli_text +#' @importFrom tidyr separate_wider_delim +#' @importFrom withr with_path +## usethis namespace: end +NULL diff --git a/R/tts_backend.R b/R/tts_backend.R index 4425f72..df8f107 100644 --- a/R/tts_backend.R +++ b/R/tts_backend.R @@ -1,15 +1,17 @@ #' Convert Text to Speech using Google Cloud Text-to-Speech API -#' @export + +#' @param text A character vector of text to be spoken +#' @param output_format Format of output files: "mp3" or "wav" +#' @param voice A full voice name that can be passed to the service, such as the +#' argument `voice` for `get_synthesis`` from \code{aws.polly}, or or +#' [mscstts::ms_synthesize()] or the `name` argument for +#' [googleLanguageR::gl_talk()] +#' @param bind_audio Should the [text2speech::tts_bind_wav()] be run on after +#' the audio has been created, to ensure that the length of text and the +#' number of rows is consistent? This affects the output format of some audio. +#' description #' @rdname tts -#' @param voice A full voice name that can be passed to the -#' service, such as the -#' argument `voice` for `get_synthesis`` from \code{aws.polly}, or -#' or [mscstts::ms_synthesize()] or the -#' `name` argument for [googleLanguageR::gl_talk()] -#' @examples -#' tts_default_voice("amazon") -#' tts_default_voice("google") -#' tts_default_voice("microsoft") +#' @export tts_google = function( text, output_format = c("mp3", "wav"), @@ -62,6 +64,7 @@ tts_google = function( } #' Convert Text to Speech using Amazon Polly +#' #' @export #' @rdname tts #' @examples \dontrun{ @@ -156,6 +159,7 @@ tts_amazon = function( #' Convert Text to Speech using Microsoft Cognitive Services API +#' #' @export #' @rdname tts tts_microsoft = function( @@ -214,6 +218,23 @@ tts_microsoft = function( } + +#' Convert Text to Speech using Coqui TTS +#' +#' @param text A character vector of text to be spoken +#' @param exec_path System path to Coqui TTS +#' @param output_format Format of output files: "mp3" or "wav" +#' @param model_name Deep Learning model for Text-to-Speech Conversion +#' @param vocoder_name Model that generates audio +#' @param bind_audio Should the [text2speech::tts_bind_wav()] be run on after +#' the audio has been created, to ensure that the length of text and the +#' number of rows is consistent? This affects the output format of some audio. +#' description +#' @param save_local Should the output file be saved locally? +#' @param save_local_dest Destination to save output file, if saved locally +#' @param ... Additional arguments +#' +#' @export tts_coqui <- function( text, exec_path, @@ -301,7 +322,6 @@ tts_coqui <- function( file.copy(normalizePath(res$file), save_local_dest) } } - res } diff --git a/R/tts_coqui_installed.R b/R/tts_coqui_installed.R index 7c71412..a4eacd4 100644 --- a/R/tts_coqui_installed.R +++ b/R/tts_coqui_installed.R @@ -1,3 +1,7 @@ +#' Is coqui TTS installed on local system? +#' +#' @return `TRUE` or `FALSE` +#' @export tts_coqui_installed <- function() { use_coqui() coqui_path <- getOption("path_to_coqui") diff --git a/R/tts_synthesize.R b/R/tts_synthesize.R index 62656f9..63b982d 100644 --- a/R/tts_synthesize.R +++ b/R/tts_synthesize.R @@ -1,21 +1,21 @@ #' Convert Text to Speech #' -#' @param text A character vector of text to speak -#' @param output_format Format of output files -#' @param ... Additional arguments to -#' `text2speech::tts_google()`, -#' `text2speech::tts_amazon()`, or -#' `text2speech::tts_microsoft()` -#' @param service service to use +#' @param text A character vector of text +#' @param output_format Format of output files: "mp3" or "wav" +#' @param ... Additional arguments to `text2speech::tts_google()`, +#' `text2speech::tts_amazon()`, `text2speech::tts_microsoft()`, or +#' `text2speech::tts_coqui()` +#' @param service Service to use (Google, Amazon, Microsoft, or Coqui) #' -#' @note All functions have a `voice`` argument fro a -#' full voice name that can be passed to the -#' service, such as `voice` for `get_synthesis`` from \code{aws.polly} +#' @note `tts_google()`, `tts_amazon()`, and `tts_microsoft()` have a `voice` +#' argument for a full voice name that can be passed to the service, such as +#' `voice` for `get_synthesis` from \code{aws.polly}. `tts_coqui()` has a +#' `model_name` and `vocoder_name` argument which lets you choose the tts and +#' vocoder model. #' -#' @param bind_audio Should the [text2speech::tts_bind_wav()] -#' be run on after the audio has been created, to ensure that -#' the length of text and the number of rows is consistent? -#' This affects the output format of some audio. +#' @param bind_audio Should the [text2speech::tts_bind_wav()] be run on after +#' the audio has been created, to ensure that the length of text and the +#' number of rows is consistent? This affects the output format of some audio. #' #' #' @return A `data.frame` of text and wav files diff --git a/R/tts_voices.R b/R/tts_voices.R index 23575d7..f99eabc 100644 --- a/R/tts_voices.R +++ b/R/tts_voices.R @@ -149,8 +149,10 @@ tts_google_voices = function(...) { } -#' Get Coqui TTS voices (list models) -#' @rdname tts_voices + +#' Get Coqui TTS voices +#' +#' @return A `data.frame` of the language, dataset, and model name. #' @export tts_coqui_voices = function() { # Look for coqui_path @@ -172,6 +174,7 @@ tts_coqui_voices = function() { delim = "/", names = c("language", "dataset", "model_name")) - cli::cli_text("Test out different voices on the {.href [CoquiTTS Demo](https://huggingface.co/spaces/coqui/CoquiTTS)}") + cli::cli_text("Test out different voices on the + {.href [CoquiTTS Demo](https://huggingface.co/spaces/coqui/CoquiTTS)}") out } diff --git a/man/set_coqui_path.Rd b/man/set_coqui_path.Rd index 0024a3e..791fdbc 100644 --- a/man/set_coqui_path.Rd +++ b/man/set_coqui_path.Rd @@ -18,7 +18,8 @@ Function to set an option that points to the local coqui tts Executable File \code{tts}. } \details{ -List of possible file path locations for the local coqui tts Executable File +List of possible file path locations for the local coqui tts +Executable File \describe{ \item{Linux}{/usr/bin/tts, /usr/local/bin/tts} \item{Mac}{/opt/homebrew/Caskroom/miniforge/base/bin/tts} @@ -27,6 +28,6 @@ List of possible file path locations for the local coqui tts Executable File } \examples{ \dontrun{ -set_coqui_path("local/path/to/tts") +set_coqui_path("~/path/to/tts") } } diff --git a/man/text2speech-package.Rd b/man/text2speech-package.Rd new file mode 100644 index 0000000..8b329de --- /dev/null +++ b/man/text2speech-package.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/text2speech-package.R +\docType{package} +\name{text2speech-package} +\alias{text2speech} +\alias{text2speech-package} +\title{text2speech: Text to Speech Conversion} +\description{ +Converts text into speech using various text-to-speech (TTS) engines and provides an unified interface for accessing their functionality. With this package, users can easily generate audio files of spoken words, phrases, or sentences from plain text data. The package supports multiple TTS engines, including Google's 'Cloud Text-to-Speech API', 'Amazon Polly', Microsoft's 'Cognitive Services Text to Speech REST API', and a free TTS engine called 'coqui TTS'. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/muschellij2/text2speech} + \item Report bugs at \url{https://github.com/muschellij2/text2speech/issues} +} + +} +\author{ +\strong{Maintainer}: John Muschelli \email{muschellij2@gmail.com} (\href{https://orcid.org/0000-0001-6469-1750}{ORCID}) + +Other contributors: +\itemize{ + \item Howard Baek \email{howardbaek@fredhutch.org} [contributor] +} + +} +\keyword{internal} diff --git a/man/tts.Rd b/man/tts.Rd index 34a2e2e..c76c30f 100644 --- a/man/tts.Rd +++ b/man/tts.Rd @@ -43,25 +43,23 @@ tts( ) } \arguments{ -\item{text}{A character vector of text to speak} +\item{text}{A character vector of text} -\item{output_format}{Format of output files} +\item{output_format}{Format of output files: "mp3" or "wav"} -\item{voice}{A full voice name that can be passed to the -service, such as the -argument \code{voice} for \verb{get_synthesis`` from \code{aws.polly}, or or [mscstts::ms_synthesize()] or the }name` argument for \code{\link[googleLanguageR:gl_talk]{googleLanguageR::gl_talk()}}} +\item{voice}{A full voice name that can be passed to the service, such as the +argument \code{voice} for \verb{get_synthesis`` from \code{aws.polly}, or or [mscstts::ms_synthesize()] or the }name` argument for +\code{\link[googleLanguageR:gl_talk]{googleLanguageR::gl_talk()}}} -\item{bind_audio}{Should the \code{\link[=tts_bind_wav]{tts_bind_wav()}} -be run on after the audio has been created, to ensure that -the length of text and the number of rows is consistent? -This affects the output format of some audio.} +\item{bind_audio}{Should the \code{\link[=tts_bind_wav]{tts_bind_wav()}} be run on after +the audio has been created, to ensure that the length of text and the +number of rows is consistent? This affects the output format of some audio.} -\item{...}{Additional arguments to -\code{text2speech::tts_google()}, -\code{text2speech::tts_amazon()}, or -\code{text2speech::tts_microsoft()}} +\item{...}{Additional arguments to \code{text2speech::tts_google()}, +\code{text2speech::tts_amazon()}, \code{text2speech::tts_microsoft()}, or +\code{text2speech::tts_coqui()}} -\item{service}{service to use} +\item{service}{Service to use (Google, Amazon, Microsoft, or Coqui)} } \value{ A \code{data.frame} of text and wav files @@ -76,12 +74,13 @@ Convert Text to Speech using Microsoft Cognitive Services API Convert Text to Speech } \note{ -All functions have a \verb{voice`` argument fro a full voice name that can be passed to the service, such as }voice\code{for}get_synthesis`` from \code{aws.polly} +\code{tts_google()}, \code{tts_amazon()}, and \code{tts_microsoft()} have a \code{voice} +argument for a full voice name that can be passed to the service, such as +\code{voice} for \code{get_synthesis} from \code{aws.polly}. \code{tts_coqui()} has a +\code{model_name} and \code{vocoder_name} argument which lets you choose the tts and +vocoder model. } \examples{ -tts_default_voice("amazon") -tts_default_voice("google") -tts_default_voice("microsoft") \dontrun{ text=' He was caught up in the game. In the middle of the diff --git a/man/tts_coqui.Rd b/man/tts_coqui.Rd new file mode 100644 index 0000000..e19edf1 --- /dev/null +++ b/man/tts_coqui.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tts_backend.R +\name{tts_coqui} +\alias{tts_coqui} +\title{Convert Text to Speech using Coqui TTS} +\usage{ +tts_coqui( + text, + exec_path, + output_format = c("wav", "mp3"), + model_name = "tacotron2-DDC_ph", + vocoder_name = "ljspeech/univnet", + bind_audio = TRUE, + save_local = FALSE, + save_local_dest = NULL, + ... +) +} +\arguments{ +\item{text}{A character vector of text to be spoken} + +\item{exec_path}{System path to Coqui TTS} + +\item{output_format}{Format of output files: "mp3" or "wav"} + +\item{model_name}{Deep Learning model for Text-to-Speech Conversion} + +\item{vocoder_name}{Model that generates audio} + +\item{bind_audio}{Should the \code{\link[=tts_bind_wav]{tts_bind_wav()}} be run on after +the audio has been created, to ensure that the length of text and the +number of rows is consistent? This affects the output format of some audio. +description} + +\item{save_local}{Should the output file be saved locally?} + +\item{save_local_dest}{Destination to save output file, if saved locally} + +\item{...}{Additional arguments} +} +\description{ +Convert Text to Speech using Coqui TTS +} diff --git a/man/tts_coqui_installed.Rd b/man/tts_coqui_installed.Rd new file mode 100644 index 0000000..5c88a02 --- /dev/null +++ b/man/tts_coqui_installed.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tts_coqui_installed.R +\name{tts_coqui_installed} +\alias{tts_coqui_installed} +\title{Is coqui TTS installed on local system?} +\usage{ +tts_coqui_installed() +} +\value{ +\code{TRUE} or \code{FALSE} +} +\description{ +Is coqui TTS installed on local system? +} diff --git a/man/tts_coqui_voices.Rd b/man/tts_coqui_voices.Rd new file mode 100644 index 0000000..d76b932 --- /dev/null +++ b/man/tts_coqui_voices.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tts_voices.R +\name{tts_coqui_voices} +\alias{tts_coqui_voices} +\title{Get Coqui TTS voices} +\usage{ +tts_coqui_voices() +} +\value{ +A \code{data.frame} of the language, dataset, and model name. +} +\description{ +Get Coqui TTS voices +} diff --git a/man/tts_voices.Rd b/man/tts_voices.Rd index 90735ea..f04c06f 100644 --- a/man/tts_voices.Rd +++ b/man/tts_voices.Rd @@ -5,7 +5,6 @@ \alias{tts_amazon_voices} \alias{tts_microsoft_voices} \alias{tts_google_voices} -\alias{tts_coqui_voices} \title{Text to Speech Voices} \usage{ tts_voices(service = c("amazon", "google", "microsoft", "coqui"), ...) @@ -15,8 +14,6 @@ tts_amazon_voices(...) tts_microsoft_voices(...) tts_google_voices(...) - -tts_coqui_voices() } \arguments{ \item{service}{service to use} @@ -35,8 +32,6 @@ Get Amazon Polly TTS voices Get Microsoft Cognitive Services Text to Speech voices Get Google Cloud TTS voices - -Get Coqui TTS voices (list models) } \examples{ if (tts_microsoft_auth()) { From 9d04c01fe2cee2ddd11d424fd3dae3356b5c1b43 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Wed, 3 May 2023 16:13:28 -0700 Subject: [PATCH 06/19] Remove `testthat::context()` --- tests/testthat/test-translate.R | 2 -- tests/testthat/test-tts_coqui.R | 4 ++-- tests/testthat/test-voices.R | 2 -- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-translate.R b/tests/testthat/test-translate.R index 4bb92c8..54faada 100644 --- a/tests/testthat/test-translate.R +++ b/tests/testthat/test-translate.R @@ -1,5 +1,3 @@ -testthat::context("Translate") - fixed_names = c("index", "original_text", "text", "wav", "file", "audio_type", "duration", diff --git a/tests/testthat/test-tts_coqui.R b/tests/testthat/test-tts_coqui.R index f494c0e..9c2c3ad 100644 --- a/tests/testthat/test-tts_coqui.R +++ b/tests/testthat/test-tts_coqui.R @@ -18,12 +18,12 @@ testthat::test_that("coqui TTS works with wav as output_format", { response_df = tts("Algorithmic complexity is a key consideration when designing efficient solutions for large-scale data processing", service = "coqui", - output_format = "wav") + output_format = "mp3") testthat::expect_s3_class(response_df, "data.frame") testthat::expect_named(response_df, fixed_names) testthat::expect_s4_class(response_df$wav[[1]], "Wave") # Check if audio_type is mp3 - testthat::expect_equal(response_df$audio_type, "wav") + testthat::expect_equal(response_df$audio_type, "mp3") } ) diff --git a/tests/testthat/test-voices.R b/tests/testthat/test-voices.R index 156b56a..fceaff2 100644 --- a/tests/testthat/test-voices.R +++ b/tests/testthat/test-voices.R @@ -1,5 +1,3 @@ -testthat::context("List the Voices") - fixed_names = c("voice", "language", "language_code", "gender", "service") From 76cd513d9520e19c4427add4c586863aacbafcaf Mon Sep 17 00:00:00 2001 From: howardbaek Date: Thu, 4 May 2023 11:30:10 -0700 Subject: [PATCH 07/19] Rename files to "test-tts_XXX.R" --- R/tts_microsoft_auth.R | 2 +- tests/testthat/test-translate.R | 17 ----------------- tests/testthat/test-tts_amazon.R | 17 +++++++++++++++++ tests/testthat/test-tts_coqui.R | 18 +++++++++--------- tests/testthat/test-tts_google.R | 17 +++++++++++++++++ tests/testthat/test-tts_microsoft.R | 17 +++++++++++++++++ 6 files changed, 61 insertions(+), 27 deletions(-) delete mode 100644 tests/testthat/test-translate.R create mode 100644 tests/testthat/test-tts_amazon.R create mode 100644 tests/testthat/test-tts_google.R create mode 100644 tests/testthat/test-tts_microsoft.R diff --git a/R/tts_microsoft_auth.R b/R/tts_microsoft_auth.R index 3859fd9..4d8bc72 100644 --- a/R/tts_microsoft_auth.R +++ b/R/tts_microsoft_auth.R @@ -6,5 +6,5 @@ tts_microsoft_auth = function(key_or_json_file = NULL, ...) { mscstts::ms_set_tts_key(api_key = key_or_json_file) res = mscstts::ms_have_tts_key() } - res = tts_microsoft_authenticated(...) + return(tts_microsoft_authenticated(...)) } diff --git a/tests/testthat/test-translate.R b/tests/testthat/test-translate.R deleted file mode 100644 index 54faada..0000000 --- a/tests/testthat/test-translate.R +++ /dev/null @@ -1,17 +0,0 @@ -fixed_names = c("index", "original_text", "text", "wav", - "file", "audio_type", - "duration", - "service") - -testthat::test_that( - "Google Translation", { - if (tts_google_auth()) { - df = tts("hey what's up?", service = "google") - testthat::expect_is(df, "data.frame") - testthat::expect_named(df, fixed_names) - testthat::expect_is(df$wav[[1]], "Wave") - wav = df$wav[[1]] - testthat::expect_true(length(wav)/wav@samp.rate >= 0.5) - } - } -) diff --git a/tests/testthat/test-tts_amazon.R b/tests/testthat/test-tts_amazon.R new file mode 100644 index 0000000..c0b18f8 --- /dev/null +++ b/tests/testthat/test-tts_amazon.R @@ -0,0 +1,17 @@ +fixed_names = c("index", "original_text", "text", "wav", + "file", "audio_type", + "duration", + "service") + +test_that( + "Amazon Polly Translation", { + if (tts_google_auth()) { + response_df = tts("Algorithmic complexity is a key consideration when + designing efficient solutions for large-scale data processing", + service = "amazon") + expect_s3_class(response_df, "data.frame") + expect_named(response_df, fixed_names) + expect_s4_class(response_df$wav[[1]], "Wave") + } + } +) diff --git a/tests/testthat/test-tts_coqui.R b/tests/testthat/test-tts_coqui.R index 9c2c3ad..8ef94c5 100644 --- a/tests/testthat/test-tts_coqui.R +++ b/tests/testthat/test-tts_coqui.R @@ -1,29 +1,29 @@ fixed_names = c("index", "original_text", "text", "wav", "file", "audio_type", "duration", "service") -testthat::test_that("Vanilla coqui TTS works", { +test_that("Vanilla coqui TTS works", { response_df = tts("Algorithmic complexity is a key consideration when designing efficient solutions for large-scale data processing", service = "coqui") # Check x is a data.frame - testthat::expect_s3_class(response_df, "data.frame") + expect_s3_class(response_df, "data.frame") # Check column names - testthat::expect_named(response_df, fixed_names) + expect_named(response_df, fixed_names) # Check Wave - testthat::expect_s4_class(response_df$wav[[1]], "Wave") + expect_s4_class(response_df$wav[[1]], "Wave") } ) -testthat::test_that("coqui TTS works with wav as output_format", { +test_that("coqui TTS works with wav as output_format", { response_df = tts("Algorithmic complexity is a key consideration when designing efficient solutions for large-scale data processing", service = "coqui", output_format = "mp3") - testthat::expect_s3_class(response_df, "data.frame") - testthat::expect_named(response_df, fixed_names) - testthat::expect_s4_class(response_df$wav[[1]], "Wave") + expect_s3_class(response_df, "data.frame") + expect_named(response_df, fixed_names) + expect_s4_class(response_df$wav[[1]], "Wave") # Check if audio_type is mp3 - testthat::expect_equal(response_df$audio_type, "mp3") + expect_equal(response_df$audio_type, "mp3") } ) diff --git a/tests/testthat/test-tts_google.R b/tests/testthat/test-tts_google.R new file mode 100644 index 0000000..6224420 --- /dev/null +++ b/tests/testthat/test-tts_google.R @@ -0,0 +1,17 @@ +fixed_names = c("index", "original_text", "text", "wav", + "file", "audio_type", + "duration", + "service") + +test_that( + "Google Cloud Text-to-Speech", { + if (tts_google_auth()) { + response_df = tts("Algorithmic complexity is a key consideration when + designing efficient solutions for large-scale data processing", + service = "google") + expect_s3_class(response_df, "data.frame") + expect_named(response_df, fixed_names) + expect_s4_class(response_df$wav[[1]], "Wave") + } + } +) diff --git a/tests/testthat/test-tts_microsoft.R b/tests/testthat/test-tts_microsoft.R new file mode 100644 index 0000000..df8e1cd --- /dev/null +++ b/tests/testthat/test-tts_microsoft.R @@ -0,0 +1,17 @@ +fixed_names = c("index", "original_text", "text", "wav", + "file", "audio_type", + "duration", + "service") + +test_that( + "Microsoft Cognitive Services Translation", { + if (tts_microsoft_auth()) { + response_df = tts("Algorithmic complexity is a key consideration when + designing efficient solutions for large-scale data processing", + service = "microsoft") + expect_s3_class(response_df, "data.frame") + expect_named(response_df, fixed_names) + expect_s4_class(response_df$wav[[1]], "Wave") + } + } +) From 3118f2574137646b6c59748a0b7c66f6e5d57574 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Wed, 10 May 2023 15:21:10 -0700 Subject: [PATCH 08/19] Remove tests for Microsoft TTS --- tests/testthat/test-tts_amazon.R | 2 +- tests/testthat/test-tts_coqui.R | 4 ++-- tests/testthat/test-tts_microsoft.R | 17 ----------------- 3 files changed, 3 insertions(+), 20 deletions(-) delete mode 100644 tests/testthat/test-tts_microsoft.R diff --git a/tests/testthat/test-tts_amazon.R b/tests/testthat/test-tts_amazon.R index c0b18f8..827d033 100644 --- a/tests/testthat/test-tts_amazon.R +++ b/tests/testthat/test-tts_amazon.R @@ -5,7 +5,7 @@ fixed_names = c("index", "original_text", "text", "wav", test_that( "Amazon Polly Translation", { - if (tts_google_auth()) { + if (tts_amazon_auth()) { response_df = tts("Algorithmic complexity is a key consideration when designing efficient solutions for large-scale data processing", service = "amazon") diff --git a/tests/testthat/test-tts_coqui.R b/tests/testthat/test-tts_coqui.R index 8ef94c5..f71f8ac 100644 --- a/tests/testthat/test-tts_coqui.R +++ b/tests/testthat/test-tts_coqui.R @@ -18,12 +18,12 @@ test_that("coqui TTS works with wav as output_format", { response_df = tts("Algorithmic complexity is a key consideration when designing efficient solutions for large-scale data processing", service = "coqui", - output_format = "mp3") + output_format = "wav") expect_s3_class(response_df, "data.frame") expect_named(response_df, fixed_names) expect_s4_class(response_df$wav[[1]], "Wave") # Check if audio_type is mp3 - expect_equal(response_df$audio_type, "mp3") + expect_equal(response_df$audio_type, "wav") } ) diff --git a/tests/testthat/test-tts_microsoft.R b/tests/testthat/test-tts_microsoft.R deleted file mode 100644 index df8e1cd..0000000 --- a/tests/testthat/test-tts_microsoft.R +++ /dev/null @@ -1,17 +0,0 @@ -fixed_names = c("index", "original_text", "text", "wav", - "file", "audio_type", - "duration", - "service") - -test_that( - "Microsoft Cognitive Services Translation", { - if (tts_microsoft_auth()) { - response_df = tts("Algorithmic complexity is a key consideration when - designing efficient solutions for large-scale data processing", - service = "microsoft") - expect_s3_class(response_df, "data.frame") - expect_named(response_df, fixed_names) - expect_s4_class(response_df$wav[[1]], "Wave") - } - } -) From 5bd5ac96c92fed233ed8844fe8ebc2ca26604c0e Mon Sep 17 00:00:00 2001 From: howardbaek Date: Thu, 11 May 2023 14:20:36 -0700 Subject: [PATCH 09/19] Get rid of testing for Microsoft voices --- tests/testthat/test-voices.R | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/testthat/test-voices.R b/tests/testthat/test-voices.R index fceaff2..349dfef 100644 --- a/tests/testthat/test-voices.R +++ b/tests/testthat/test-voices.R @@ -1,14 +1,6 @@ fixed_names = c("voice", "language", "language_code", "gender", "service") -testthat::test_that( - "Microsoft Voices", { - df = tts_voices(service = "microsoft") - testthat::expect_is(df, "data.frame") - testthat::expect_named(df, fixed_names) - } -) - testthat::test_that( "Google Voices", { if (tts_google_auth()) { From 5659e6159680d594e1bfa0902ccfbcd0b940e350 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Thu, 11 May 2023 15:48:44 -0700 Subject: [PATCH 10/19] Parameterize testing --- DESCRIPTION | 1 + tests/testthat/test-tts_backend.R | 47 +++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 tests/testthat/test-tts_backend.R diff --git a/DESCRIPTION b/DESCRIPTION index 21e8fd8..fc6ac18 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,6 +23,7 @@ Suggests: rmarkdown, testthat (>= 3.0.0), stringi + patrick Encoding: UTF-8 LazyData: true VignetteBuilder: knitr diff --git a/tests/testthat/test-tts_backend.R b/tests/testthat/test-tts_backend.R new file mode 100644 index 0000000..e6e3bce --- /dev/null +++ b/tests/testthat/test-tts_backend.R @@ -0,0 +1,47 @@ +fixed_names = c("index", "original_text", "text", "wav", + "file", "audio_type", + "duration", + "service") + +patrick::with_parameters_test_that("Google Cloud Text-to-Speech / Amazon Polly Translation", + { + if (tts_auth) { + response_df = tts("Algorithmic complexity is a key consideration + when designing efficient solutions for large-scale data processing", + service = company) + expect_s3_class(response_df, char_value) + } + }, + tts_auth = c(tts_amazon_auth(), tts_google_auth()), + company = c("amazon", "google"), + char_value = "data.frame" +) + +patrick::with_parameters_test_that("Google Cloud Text-to-Speech / Amazon Polly Translation", + { + if (tts_auth) { + response_df = tts("Algorithmic complexity is a key consideration + when designing efficient solutions for large-scale data processing", + service = company) + expect_equal(response_df$service, char_value) + } + }, + tts_auth = c(tts_amazon_auth(), tts_google_auth()), + company = c("amazon", "google"), + char_value = c("amazon", "google") +) + +patrick::with_parameters_test_that("Google Cloud Text-to-Speech / Amazon Polly Translation", + { + if (tts_auth) { + response_df = tts("Algorithmic complexity is a key consideration + when designing efficient solutions for large-scale data processing", + service = company) + audio_value = response_df$wav[[1]] + expect_s4_class(audio_value, char_value) + } + }, + tts_auth = c(tts_amazon_auth(), tts_google_auth()), + company = c("amazon", "google"), + char_value = "Wave" +) From 8acac076824a6aba0424f4682c52ecd801bd08ff Mon Sep 17 00:00:00 2001 From: howardbaek Date: Thu, 11 May 2023 15:53:29 -0700 Subject: [PATCH 11/19] Alphabetize Suggests field --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index fc6ac18..291d533 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,10 +20,10 @@ License: GPL-3 Suggests: aws.polly, covr, + patrick, rmarkdown, - testthat (>= 3.0.0), stringi - patrick + testthat (>= 3.0.0) Encoding: UTF-8 LazyData: true VignetteBuilder: knitr From adcc9eb454a7821539a2d2fe7619964ba409ab2f Mon Sep 17 00:00:00 2001 From: howardbaek Date: Mon, 22 May 2023 14:39:21 -0700 Subject: [PATCH 12/19] mscstts --> mscstts2 --- R/tts_auth.R | 6 +++--- R/tts_backend.R | 19 +++++-------------- R/tts_microsoft_auth.R | 6 +++--- R/tts_synthesize.R | 2 +- 4 files changed, 12 insertions(+), 21 deletions(-) diff --git a/R/tts_auth.R b/R/tts_auth.R index e15dfb4..0fda3f4 100644 --- a/R/tts_auth.R +++ b/R/tts_auth.R @@ -83,11 +83,11 @@ tts_amazon_authenticated = function() { #' @export tts_microsoft_authenticated = function(...) { res = try({ - mscstts::ms_get_tts_token(...) + mscstts2::ms_get_token(...) }) if (inherits(res, "try-error")) { return(FALSE) } - res = res$request - httr::status_code(res) < 400 + res_status_code = res$response$status_code + res_status_code < 400 } diff --git a/R/tts_backend.R b/R/tts_backend.R index df8f107..1b59fd5 100644 --- a/R/tts_backend.R +++ b/R/tts_backend.R @@ -164,20 +164,12 @@ tts_amazon = function( #' @rdname tts tts_microsoft = function( text, - output_format = c("mp3", "wav"), - voice = "Microsoft Server Speech Text to Speech Voice (en-US, ZiraRUS)", + audio_type = c("mp3", "wav"), + voice = NULL, bind_audio = TRUE, ...) { - + # Set character limit limit = 800 - output_format = match.arg(output_format) - audio_type = output_format - - output_format = switch( - output_format, - "mp3" = "audio-24khz-160kbitrate-mono-mp3", - "wav" = "riff-24khz-16bit-mono-pcm") - res = lapply(text, function(string) { strings = tts_split_text(string, @@ -185,12 +177,11 @@ tts_microsoft = function( res = vapply(strings, function(tt) { output = tts_temp_audio(audio_type) - out = mscstts::ms_synthesize( + out = mscstts2::ms_synthesize( tt, - output_format = output_format, voice = voice, ...) - writeBin(out$content, con = output) + writeBin(out, con = output) output }, FUN.VALUE = character(1L)) names(res) = NULL diff --git a/R/tts_microsoft_auth.R b/R/tts_microsoft_auth.R index 4d8bc72..d73508b 100644 --- a/R/tts_microsoft_auth.R +++ b/R/tts_microsoft_auth.R @@ -2,9 +2,9 @@ #' @rdname tts_auth #' @export tts_microsoft_auth = function(key_or_json_file = NULL, ...) { - if (!mscstts::ms_have_tts_key()) { - mscstts::ms_set_tts_key(api_key = key_or_json_file) - res = mscstts::ms_have_tts_key() + if (!mscstts2::ms_exist_key()) { + mscstts2::ms_set_key(api_key = key_or_json_file) + res = mscstts2::ms_exist_key() } return(tts_microsoft_authenticated(...)) } diff --git a/R/tts_synthesize.R b/R/tts_synthesize.R index 63b982d..2172239 100644 --- a/R/tts_synthesize.R +++ b/R/tts_synthesize.R @@ -65,7 +65,7 @@ tts = function( if (service == "microsoft") { res = tts_microsoft( text = text, - output_format = output_format, + audio_type = output_format, bind_audio = bind_audio, ...) } From e0cdd50fb5e03e772e5f8ccc13caaab0e540e982 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Mon, 22 May 2023 14:39:33 -0700 Subject: [PATCH 13/19] Finalize tests for tts --- tests/testthat/test-tts_amazon.R | 17 ----- tests/testthat/test-tts_backend.R | 73 +++++++++---------- tests/testthat/test-tts_google.R | 17 ----- .../{test-voices.R => test-tts_voices.R} | 0 4 files changed, 34 insertions(+), 73 deletions(-) delete mode 100644 tests/testthat/test-tts_amazon.R delete mode 100644 tests/testthat/test-tts_google.R rename tests/testthat/{test-voices.R => test-tts_voices.R} (100%) diff --git a/tests/testthat/test-tts_amazon.R b/tests/testthat/test-tts_amazon.R deleted file mode 100644 index 827d033..0000000 --- a/tests/testthat/test-tts_amazon.R +++ /dev/null @@ -1,17 +0,0 @@ -fixed_names = c("index", "original_text", "text", "wav", - "file", "audio_type", - "duration", - "service") - -test_that( - "Amazon Polly Translation", { - if (tts_amazon_auth()) { - response_df = tts("Algorithmic complexity is a key consideration when - designing efficient solutions for large-scale data processing", - service = "amazon") - expect_s3_class(response_df, "data.frame") - expect_named(response_df, fixed_names) - expect_s4_class(response_df$wav[[1]], "Wave") - } - } -) diff --git a/tests/testthat/test-tts_backend.R b/tests/testthat/test-tts_backend.R index e6e3bce..889225f 100644 --- a/tests/testthat/test-tts_backend.R +++ b/tests/testthat/test-tts_backend.R @@ -1,47 +1,42 @@ -fixed_names = c("index", "original_text", "text", "wav", - "file", "audio_type", - "duration", - "service") - -patrick::with_parameters_test_that("Google Cloud Text-to-Speech / Amazon Polly Translation", - { - if (tts_auth) { - response_df = tts("Algorithmic complexity is a key consideration +patrick::with_parameters_test_that("tts() returns a data.frame", + { + if (tts_auth) { + response_df = tts("Algorithmic complexity is a key consideration when designing efficient solutions for large-scale data processing", - service = company) - expect_s3_class(response_df, char_value) - } - }, - tts_auth = c(tts_amazon_auth(), tts_google_auth()), - company = c("amazon", "google"), - char_value = "data.frame" + service = company) + testthat::expect_s3_class(response_df, char_value) + } + }, + tts_auth = c(tts_amazon_auth(), tts_google_auth(), tts_microsoft_auth(region = "westus")), + company = c("amazon", "google", "microsoft"), + char_value = "data.frame" ) -patrick::with_parameters_test_that("Google Cloud Text-to-Speech / Amazon Polly Translation", - { - if (tts_auth) { - response_df = tts("Algorithmic complexity is a key consideration +patrick::with_parameters_test_that("tts() returns a data.frame with an Wave object", + { + if (tts_auth) { + response_df = tts("Algorithmic complexity is a key consideration when designing efficient solutions for large-scale data processing", - service = company) - expect_equal(response_df$service, char_value) - } - }, - tts_auth = c(tts_amazon_auth(), tts_google_auth()), - company = c("amazon", "google"), - char_value = c("amazon", "google") + service = company) + audio_value = response_df$wav[[1]] + testthat::expect_s4_class(audio_value, char_value) + } + }, + tts_auth = c(tts_amazon_auth(), tts_google_auth(), tts_microsoft_auth(region = "westus")), + company = c("amazon", "google", "microsoft"), + char_value = "Wave" ) -patrick::with_parameters_test_that("Google Cloud Text-to-Speech / Amazon Polly Translation", - { - if (tts_auth) { - response_df = tts("Algorithmic complexity is a key consideration +patrick::with_parameters_test_that("tts() successfully created an audio output in a file path", + { + if (tts_auth) { + response_df = tts("Algorithmic complexity is a key consideration when designing efficient solutions for large-scale data processing", - service = company) - audio_value = response_df$wav[[1]] - expect_s4_class(audio_value, char_value) - } - }, - tts_auth = c(tts_amazon_auth(), tts_google_auth()), - company = c("amazon", "google"), - char_value = "Wave" + service = company) + audio_path = response_df$file[[1]] + testthat::expect_equal(file.exists(audio_path), TRUE) + } + }, + tts_auth = c(tts_amazon_auth(), tts_google_auth(), tts_microsoft_auth(region = "westus")), + company = c("amazon", "google", "microsoft") ) diff --git a/tests/testthat/test-tts_google.R b/tests/testthat/test-tts_google.R deleted file mode 100644 index 6224420..0000000 --- a/tests/testthat/test-tts_google.R +++ /dev/null @@ -1,17 +0,0 @@ -fixed_names = c("index", "original_text", "text", "wav", - "file", "audio_type", - "duration", - "service") - -test_that( - "Google Cloud Text-to-Speech", { - if (tts_google_auth()) { - response_df = tts("Algorithmic complexity is a key consideration when - designing efficient solutions for large-scale data processing", - service = "google") - expect_s3_class(response_df, "data.frame") - expect_named(response_df, fixed_names) - expect_s4_class(response_df$wav[[1]], "Wave") - } - } -) diff --git a/tests/testthat/test-voices.R b/tests/testthat/test-tts_voices.R similarity index 100% rename from tests/testthat/test-voices.R rename to tests/testthat/test-tts_voices.R From 5d7273467fdcae8e39323f6e6d968eeb5c234910 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Mon, 22 May 2023 14:40:23 -0700 Subject: [PATCH 14/19] Delete unecessary file --- tests/testthat/test-tts_coqui.R | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 tests/testthat/test-tts_coqui.R diff --git a/tests/testthat/test-tts_coqui.R b/tests/testthat/test-tts_coqui.R deleted file mode 100644 index f71f8ac..0000000 --- a/tests/testthat/test-tts_coqui.R +++ /dev/null @@ -1,29 +0,0 @@ -fixed_names = c("index", "original_text", "text", "wav", - "file", "audio_type", "duration", "service") - -test_that("Vanilla coqui TTS works", { - response_df = tts("Algorithmic complexity is a key consideration when - designing efficient solutions for large-scale data processing", - service = "coqui") - # Check x is a data.frame - expect_s3_class(response_df, "data.frame") - # Check column names - expect_named(response_df, fixed_names) - # Check Wave - expect_s4_class(response_df$wav[[1]], "Wave") -} -) - -test_that("coqui TTS works with wav as output_format", { - response_df = tts("Algorithmic complexity is a key consideration when - designing efficient solutions for large-scale data processing", - service = "coqui", - output_format = "wav") - expect_s3_class(response_df, "data.frame") - expect_named(response_df, fixed_names) - expect_s4_class(response_df$wav[[1]], "Wave") - - # Check if audio_type is mp3 - expect_equal(response_df$audio_type, "wav") -} -) From 67d915b06664f3cb4614a00d29bffb681d3193d3 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Mon, 22 May 2023 14:44:06 -0700 Subject: [PATCH 15/19] Fix documentation --- DESCRIPTION | 2 +- man/tts.Rd | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 291d533..1fec9fd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,7 +39,7 @@ Imports: httr, knitr, magrittr, - mscstts (>= 0.5.1), + mscstts2, tidyr, tuneR, utils, diff --git a/man/tts.Rd b/man/tts.Rd index c76c30f..7e5a4da 100644 --- a/man/tts.Rd +++ b/man/tts.Rd @@ -26,8 +26,8 @@ tts_amazon( tts_microsoft( text, - output_format = c("mp3", "wav"), - voice = "Microsoft Server Speech Text to Speech Voice (en-US, ZiraRUS)", + audio_type = c("mp3", "wav"), + voice = NULL, bind_audio = TRUE, ... ) From dad28954240e203a5b9f3e2654084e5722d85278 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Mon, 22 May 2023 15:56:31 -0700 Subject: [PATCH 16/19] Write tests for `tts_voices()` --- R/tts_voices.R | 18 +++++------- tests/testthat/test-tts_backend.R | 49 +++++++++++++------------------ tests/testthat/test-tts_voices.R | 37 +++++++++++------------ 3 files changed, 47 insertions(+), 57 deletions(-) diff --git a/R/tts_voices.R b/R/tts_voices.R index f99eabc..d5d0730 100644 --- a/R/tts_voices.R +++ b/R/tts_voices.R @@ -109,15 +109,14 @@ tts_amazon_voices = function(...) { #' Get Microsoft Cognitive Services Text to Speech voices #' @rdname tts_voices #' @export -tts_microsoft_voices = function(...) { - # tts_microsoft_auth(...) - res = mscstts::ms_locale_df() +tts_microsoft_voices = function(region = "westus") { + res = mscstts2::ms_list_voice() cn = colnames(res) - cn[ cn == "Gender" ] = "gender" - cn[ cn == "code" ] = "language_code" - cn[ cn == "locale" ] = "voice" - cn[ cn == "language" ] = "language" - colnames(res) = cn + cn[ cn == "Name" ] <- "voice" + cn[ cn == "Locale" ] <- "language_code" + cn[ cn == "LocaleName" ] <- "language" + cn[ cn == "Gender" ] <- "gender" + colnames(res) <- cn res = res[, c("voice", "language", "language_code", "gender")] res$service = "microsoft" @@ -174,7 +173,6 @@ tts_coqui_voices = function() { delim = "/", names = c("language", "dataset", "model_name")) - cli::cli_text("Test out different voices on the - {.href [CoquiTTS Demo](https://huggingface.co/spaces/coqui/CoquiTTS)}") + cli::cli_alert_info("Test out different voices on the {.href [CoquiTTS Demo](https://huggingface.co/spaces/coqui/CoquiTTS)}") out } diff --git a/tests/testthat/test-tts_backend.R b/tests/testthat/test-tts_backend.R index 889225f..5701c84 100644 --- a/tests/testthat/test-tts_backend.R +++ b/tests/testthat/test-tts_backend.R @@ -1,42 +1,33 @@ -patrick::with_parameters_test_that("tts() returns a data.frame", - { - if (tts_auth) { - response_df = tts("Algorithmic complexity is a key consideration - when designing efficient solutions for large-scale data processing", - service = company) - testthat::expect_s3_class(response_df, char_value) - } - }, - tts_auth = c(tts_amazon_auth(), tts_google_auth(), tts_microsoft_auth(region = "westus")), - company = c("amazon", "google", "microsoft"), - char_value = "data.frame" -) - -patrick::with_parameters_test_that("tts() returns a data.frame with an Wave object", +patrick::with_parameters_test_that("test tts() on Amazon, Google, and Microsoft engines) ", { if (tts_auth) { response_df = tts("Algorithmic complexity is a key consideration when designing efficient solutions for large-scale data processing", service = company) audio_value = response_df$wav[[1]] - testthat::expect_s4_class(audio_value, char_value) - } - }, - tts_auth = c(tts_amazon_auth(), tts_google_auth(), tts_microsoft_auth(region = "westus")), - company = c("amazon", "google", "microsoft"), - char_value = "Wave" -) - -patrick::with_parameters_test_that("tts() successfully created an audio output in a file path", - { - if (tts_auth) { - response_df = tts("Algorithmic complexity is a key consideration - when designing efficient solutions for large-scale data processing", - service = company) audio_path = response_df$file[[1]] + + testthat::expect_s3_class(response_df, "data.frame") + testthat::expect_s4_class(audio_value, "Wave") testthat::expect_equal(file.exists(audio_path), TRUE) } }, tts_auth = c(tts_amazon_auth(), tts_google_auth(), tts_microsoft_auth(region = "westus")), company = c("amazon", "google", "microsoft") ) + +fixed_names = c("index", "original_text", "text", "wav", + "file", "audio_type", "duration", "service") + +test_that("test tts() on Coqui engine", { + response_df = tts("Algorithmic complexity is a key consideration when + designing efficient solutions for large-scale data processing", + service = "coqui") + # Check x is a data.frame + expect_s3_class(response_df, "data.frame") + # Check column names + expect_named(response_df, fixed_names) + # Check Wave + expect_s4_class(response_df$wav[[1]], "Wave") +} +) diff --git a/tests/testthat/test-tts_voices.R b/tests/testthat/test-tts_voices.R index 349dfef..cbe9e78 100644 --- a/tests/testthat/test-tts_voices.R +++ b/tests/testthat/test-tts_voices.R @@ -1,24 +1,25 @@ fixed_names = c("voice", "language", "language_code", "gender", "service") -testthat::test_that( - "Google Voices", { - if (tts_google_auth()) { - df = tts_voices(service = "google") - testthat::expect_is(df, "data.frame") - testthat::expect_named(df, fixed_names) - } - } +patrick::with_parameters_test_that("test tts_voices() on Amazon, Google, and Microsoft engines", + { + if (tts_auth) { + response_df = tts_voices(service = company) + testthat::expect_s3_class(response_df, "data.frame") + testthat::expect_named(response_df, fixed_names) + } + }, + tts_auth = c(tts_amazon_auth(), tts_google_auth(), tts_microsoft_auth(region = "westus")), + company = c("amazon", "google", "microsoft") ) -if (requireNamespace("aws.polly", quietly = TRUE)) { - testthat::test_that( - "Amazon Voices", { - if (tts_amazon_auth()) { - df = tts_voices(service = "amazon") - testthat::expect_is(df, "data.frame") - testthat::expect_named(df, fixed_names) - } - } - ) +fixed_names_coqui <- c("language", "dataset", "model_name") + +test_that("test tts_voices() on Coqui engine", { + response_df = tts_voices(service = "coqui") + # Check x is a data.frame + expect_s3_class(response_df, "data.frame") + # Check column names + expect_named(response_df, fixed_names_coqui) } +) From 6e6629ecf3d555e9acfbd2a00ed744f4909f6ed6 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Tue, 23 May 2023 10:04:22 -0700 Subject: [PATCH 17/19] Add comma to DESCRIPTION --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1fec9fd..b5aa173 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,7 @@ Suggests: covr, patrick, rmarkdown, - stringi + stringi, testthat (>= 3.0.0) Encoding: UTF-8 LazyData: true From c524a760e486a820a6364b542731d893d65b371a Mon Sep 17 00:00:00 2001 From: howardbaek Date: Tue, 23 May 2023 10:21:38 -0700 Subject: [PATCH 18/19] Add Remotes Field --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index b5aa173..c0e3afd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -44,5 +44,6 @@ Imports: tuneR, utils, withr +Remotes: howardbaek/mscstts2 Roxygen: list(markdown = TRUE) Config/testthat/edition: 3 From bf5668e934b9d6d4436036be9ccf3529f5c3c8d5 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Tue, 23 May 2023 11:57:54 -0700 Subject: [PATCH 19/19] Put tts coqui engine inside parametrized test --- tests/testthat/test-tts_backend.R | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/tests/testthat/test-tts_backend.R b/tests/testthat/test-tts_backend.R index 5701c84..4bdd852 100644 --- a/tests/testthat/test-tts_backend.R +++ b/tests/testthat/test-tts_backend.R @@ -12,22 +12,6 @@ patrick::with_parameters_test_that("test tts() on Amazon, Google, and Microsoft testthat::expect_equal(file.exists(audio_path), TRUE) } }, - tts_auth = c(tts_amazon_auth(), tts_google_auth(), tts_microsoft_auth(region = "westus")), - company = c("amazon", "google", "microsoft") -) - -fixed_names = c("index", "original_text", "text", "wav", - "file", "audio_type", "duration", "service") - -test_that("test tts() on Coqui engine", { - response_df = tts("Algorithmic complexity is a key consideration when - designing efficient solutions for large-scale data processing", - service = "coqui") - # Check x is a data.frame - expect_s3_class(response_df, "data.frame") - # Check column names - expect_named(response_df, fixed_names) - # Check Wave - expect_s4_class(response_df$wav[[1]], "Wave") -} + tts_auth = c(tts_amazon_auth(), tts_google_auth(), tts_microsoft_auth(region = "westus"), TRUE), + company = c("amazon", "google", "microsoft", "coqui") )