-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e8f700d
commit c77e478
Showing
20 changed files
with
710 additions
and
775 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,19 @@ | ||
3e198e2101752d7c0de7551407f39cde *DESCRIPTION | ||
657fb94b84fa73d5314fbbfb350e87b0 *DESCRIPTION | ||
ee3fce724fcee2b608b6541d1f005f34 *LICENSE | ||
982c34dc8c426c8dc84dbea20ca258a2 *NAMESPACE | ||
03a659a4dcc2e5d6c26349baf4a52c4c *R/pinyin.R | ||
9973a04402c7b707d5e715c0a5615ac7 *R/translate.R | ||
aa495bd4bf21fe8e649ded6989e96272 *R/translate_view.R | ||
099973f5cb3b8e1324ff88a24b808b8d *README.md | ||
f108f73346fb1522c2f49bd061f50860 *build/vignette.rds | ||
69bdd1329628be724d7c584f30d88b2c *inst/doc/ecce.R | ||
4c4f83ab5f48fb552f833d21fee0e121 *inst/doc/ecce.Rmd | ||
ca9fb55070dde7e2886c19bef6294d15 *inst/doc/ecce.html | ||
1db377d2dbde77cb37b9a302950eec9b *R/pinyin.R | ||
b9fd99092a900e4c0f3470c5ec2867a2 *R/translate.R | ||
cff90d00ee615b7469dac0324d0c3b67 *R/translate_view.R | ||
cc20f4ad6c43c6eec60236a3f2874fa7 *README.md | ||
813eac56e50f4fbcf2e3e17e3bb74bac *build/vignette.rds | ||
3585301a963a524c342c30f62e041121 *inst/doc/ecce.R | ||
c38defd2febb67857e2b8b6d2798d62c *inst/doc/ecce.Rmd | ||
4a563fca86aefc833831d5e7d6645e19 *inst/doc/ecce.html | ||
5da18116c0dfdffe9f72608d3958b9dc *man/figures/logo.png | ||
961fd2e570079d3359112b2621edc096 *man/pinyin.Rd | ||
7ac16a960a44ee1fa8fe5aa92ef88123 *man/translate.Rd | ||
ac0754baf915ac0ac98f289ff4af3e08 *man/translate_view.Rd | ||
2119e256272b9f0df05489cc3d78390b *man/pinyin.Rd | ||
dd7bdf853078cb2d41b2ffc999b1f7c0 *man/translate.Rd | ||
942c3309e2784e6fb64eda0e20edf92f *man/translate_view.Rd | ||
7dc66ea6fcf56b47bddc498e627c5bac *tests/testthat.R | ||
1c1aa7840557e71482f5ca62c20cb2f0 *tests/testthat/test-pinyin.R | ||
06cfe0e4f11299a2c313512ce7d81c2a *tests/testthat/test-translate.R | ||
4c4f83ab5f48fb552f833d21fee0e121 *vignettes/ecce.Rmd | ||
c38defd2febb67857e2b8b6d2798d62c *vignettes/ecce.Rmd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,166 +1,167 @@ | ||
#' @title Obtain and label Chinese pinyin | ||
#' | ||
#' @description | ||
#' When you pass in a Chinese character, you can obtain the pinyin of the | ||
#' Chinese character, so that you can more easily understand the pronunciation | ||
#' of the Chinese character. | ||
#' | ||
#' @param input A string consisting of Chinese character or sentences. | ||
#' | ||
#' @return A string consisting of pinyin and input Chinese character. | ||
#' | ||
#' @examples | ||
#' # Example(Not run) | ||
#' # pinyin("type Chinese character") | ||
#' | ||
#' @export | ||
|
||
#------------------------------------------------------------------------------# | ||
|
||
pinyin = function(input) { | ||
|
||
q = input | ||
|
||
# Check network connection | ||
test_internet = curl::has_internet() | ||
if (!test_internet) { | ||
stop('No network connection found...') | ||
} | ||
|
||
# Set Youdao pinyin upload API address | ||
youdao_url_upload = 'https://openapi.youdao.com/getPinYin' | ||
|
||
# Get the ID and PASSWORD for the Youdao API | ||
app_key = Sys.getenv("app_key") | ||
app_secret = Sys.getenv("app_secret") | ||
if (app_key == "" | app_secret == "") { | ||
stop('You need to provide the ID and PASSWORD of the Youdao API.') | ||
} | ||
|
||
# Set a unique universal identifier | ||
salt = as.character(uuid::UUIDgenerate()) | ||
|
||
# Handle timestamp issues, it must be UTC time | ||
curtime = as.character( | ||
as.integer( | ||
as.POSIXct(Sys.time(), tz = "UTC") | ||
) | ||
) | ||
|
||
# Set the rule for input characters | ||
truncate = function(x) { | ||
if (is.null(x)) { | ||
return(NULL) | ||
} | ||
x = iconv(x, to = 'UTF-8') | ||
size = nchar(x) | ||
if (size <= 20) { | ||
return(x) | ||
} else { | ||
truncated = paste0( | ||
substr(x, 1, 10), | ||
size, | ||
substr(x, size - 9, size) | ||
) | ||
return(truncated) | ||
} | ||
} | ||
|
||
# Processing signature information | ||
sign = tolower( | ||
digest::digest( | ||
paste0(app_key, truncate(q), salt, curtime, app_secret), | ||
algo = "sha256", | ||
serialize = FALSE | ||
) | ||
) | ||
|
||
# Setting the signature type | ||
sign_type = "v3" | ||
|
||
# Generate parameter list | ||
params = list( | ||
q = q, | ||
appKey = app_key, | ||
salt = salt, | ||
sign = sign, | ||
signType = sign_type, | ||
curtime = curtime | ||
) | ||
|
||
response = httr::POST( | ||
url = youdao_url_upload, | ||
body = params, | ||
encode = "form" | ||
) | ||
|
||
convert_data = httr::content( | ||
response, "text", encoding = "UTF-8" | ||
) | ||
json_data = jsonlite::fromJSON(convert_data) | ||
|
||
# Separate pinyin and tone | ||
split_pinyin = strsplit(json_data$data$data$pinyins, ":") | ||
|
||
# Extract pinyin part | ||
input_info = sapply(split_pinyin, function(x) x[1]) | ||
pinyin_info = sapply(split_pinyin, function(x) x[2]) | ||
|
||
df = data.frame( | ||
ym = c("a", "e", "i", "o", "u", "_"), | ||
tones = c( | ||
"\u0101\u00e1\u01ce\u00e0a", | ||
"\u0113\u00e9\u011b\u00e8e", | ||
"\u012b\u00ed\u01d0\u00eci", | ||
"\u014d\u00f3\u01d2\u00f2o", | ||
"\u016b\u00fa\u01d4\u00f9u", | ||
"_" | ||
) | ||
) | ||
|
||
info = data.frame( | ||
input_info = input_info, | ||
pinyin_info = pinyin_info | ||
) | ||
|
||
info$letter = gsub("[0-9]", "", info$pinyin_info) | ||
info$num = gsub("[^0-9]", "", info$pinyin_info) | ||
info$aux = ifelse( | ||
info$pinyin_info == "_", "_", | ||
gsub("[^aeiou]", "", info$pinyin_info) | ||
) | ||
info$ym = substr( | ||
info$aux, | ||
nchar(info$aux), | ||
nchar(info$aux) | ||
) | ||
|
||
info$id = 1:nrow(info) | ||
info = merge(info, df, by = "ym") | ||
info = info[order(info$id), ] | ||
|
||
info$tone = ifelse( | ||
info$pinyin_info == "_", "_", | ||
substr(info$tones, info$num, info$num) | ||
) | ||
|
||
info$pinyin = character( | ||
length = nrow(info) | ||
) | ||
|
||
for (i in 1:nrow(info)) { | ||
info$pinyin[i] = gsub( | ||
info$ym[i], | ||
info$tone[i], | ||
info$letter[i] | ||
) | ||
} | ||
|
||
words = paste(info$input_info, collapse = " ") | ||
pinyin = paste(info$pinyin, collapse = " ") | ||
|
||
cat(pinyin, "\n", words) | ||
|
||
} | ||
|
||
#------------------------------------------------------------------------------# | ||
#' @title Obtain and label Chinese pinyin | ||
#' | ||
#' @description | ||
#' When you pass in a Chinese character, you can obtain the pinyin of the | ||
#' Chinese character, so that you can more easily understand the pronunciation | ||
#' of the Chinese character. | ||
#' | ||
#' @param input A string consisting of Chinese character or sentences. | ||
#' | ||
#' @return A string consisting of pinyin and input Chinese character. | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' pinyin("type Chinese character") | ||
#' } | ||
#' | ||
#' @export | ||
|
||
#------------------------------------------------------------------------------# | ||
|
||
pinyin = function(input) { | ||
|
||
q = input | ||
|
||
# Check network connection | ||
test_internet = curl::has_internet() | ||
if (!test_internet) { | ||
stop('No network connection found...') | ||
} | ||
|
||
# Set Youdao pinyin upload API address | ||
youdao_url_upload = 'https://openapi.youdao.com/getPinYin' | ||
|
||
# Get the ID and PASSWORD for the Youdao API | ||
app_key = Sys.getenv("app_key") | ||
app_secret = Sys.getenv("app_secret") | ||
if (app_key == "" | app_secret == "") { | ||
stop('You need to provide the ID and PASSWORD of the Youdao API.') | ||
} | ||
|
||
# Set a unique universal identifier | ||
salt = as.character(uuid::UUIDgenerate()) | ||
|
||
# Handle timestamp issues, it must be UTC time | ||
curtime = as.character( | ||
as.integer( | ||
as.POSIXct(Sys.time(), tz = "UTC") | ||
) | ||
) | ||
|
||
# Set the rule for input characters | ||
truncate = function(x) { | ||
if (is.null(x)) { | ||
return(NULL) | ||
} | ||
x = iconv(x, to = 'UTF-8') | ||
size = nchar(x) | ||
if (size <= 20) { | ||
return(x) | ||
} else { | ||
truncated = paste0( | ||
substr(x, 1, 10), | ||
size, | ||
substr(x, size - 9, size) | ||
) | ||
return(truncated) | ||
} | ||
} | ||
|
||
# Processing signature information | ||
sign = tolower( | ||
digest::digest( | ||
paste0(app_key, truncate(q), salt, curtime, app_secret), | ||
algo = "sha256", | ||
serialize = FALSE | ||
) | ||
) | ||
|
||
# Setting the signature type | ||
sign_type = "v3" | ||
|
||
# Generate parameter list | ||
params = list( | ||
q = q, | ||
appKey = app_key, | ||
salt = salt, | ||
sign = sign, | ||
signType = sign_type, | ||
curtime = curtime | ||
) | ||
|
||
response = httr::POST( | ||
url = youdao_url_upload, | ||
body = params, | ||
encode = "form" | ||
) | ||
|
||
convert_data = httr::content( | ||
response, "text", encoding = "UTF-8" | ||
) | ||
json_data = jsonlite::fromJSON(convert_data) | ||
|
||
# Separate pinyin and tone | ||
split_pinyin = strsplit(json_data$data$data$pinyins, ":") | ||
|
||
# Extract pinyin part | ||
input_info = sapply(split_pinyin, function(x) x[1]) | ||
pinyin_info = sapply(split_pinyin, function(x) x[2]) | ||
|
||
df = data.frame( | ||
ym = c("a", "e", "i", "o", "u", "_"), | ||
tones = c( | ||
"\u0101\u00e1\u01ce\u00e0a", | ||
"\u0113\u00e9\u011b\u00e8e", | ||
"\u012b\u00ed\u01d0\u00eci", | ||
"\u014d\u00f3\u01d2\u00f2o", | ||
"\u016b\u00fa\u01d4\u00f9u", | ||
"_" | ||
) | ||
) | ||
|
||
info = data.frame( | ||
input_info = input_info, | ||
pinyin_info = pinyin_info | ||
) | ||
|
||
info$letter = gsub("[0-9]", "", info$pinyin_info) | ||
info$num = gsub("[^0-9]", "", info$pinyin_info) | ||
info$aux = ifelse( | ||
info$pinyin_info == "_", "_", | ||
gsub("[^aeiou]", "", info$pinyin_info) | ||
) | ||
info$ym = substr( | ||
info$aux, | ||
nchar(info$aux), | ||
nchar(info$aux) | ||
) | ||
|
||
info$id = 1:nrow(info) | ||
info = merge(info, df, by = "ym") | ||
info = info[order(info$id), ] | ||
|
||
info$tone = ifelse( | ||
info$pinyin_info == "_", "_", | ||
substr(info$tones, info$num, info$num) | ||
) | ||
|
||
info$pinyin = character( | ||
length = nrow(info) | ||
) | ||
|
||
for (i in 1:nrow(info)) { | ||
info$pinyin[i] = gsub( | ||
info$ym[i], | ||
info$tone[i], | ||
info$letter[i] | ||
) | ||
} | ||
|
||
words = paste(info$input_info, collapse = " ") | ||
pinyin = paste(info$pinyin, collapse = " ") | ||
|
||
cat(pinyin, "\n", words) | ||
|
||
} | ||
|
||
#------------------------------------------------------------------------------# |
Oops, something went wrong.