diff --git a/NAMESPACE b/NAMESPACE index 22e314b7e..f8022f4bf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,8 +1,8 @@ -export("%+%") export("%==%") +export("%+%") +export(stri_charcategories) export(stri_char_getcategoryid) export(stri_char_getpropertyid) -export(stri_charcategories) export(stri_chartype) export(stri_dup) export(stri_encinfo) @@ -12,6 +12,7 @@ export(stri_encset) export(stri_flatten) export(stri_info) export(stri_join) +export(stri_length) export(stri_localeinfo) export(stri_localelist) export(stri_localeset) @@ -20,8 +21,8 @@ export(stri_locate_first_class) export(stri_locate_last_class) export(stri_nfc) export(stri_nfd) -export(stri_nfkc_casefold) export(stri_nfkc) +export(stri_nfkc_casefold) export(stri_nfkd) export(stri_numbytes) export(stri_split) diff --git a/R/length.R b/R/length.R index 7bd85b3e9..2477e97bc 100644 --- a/R/length.R +++ b/R/length.R @@ -27,3 +27,13 @@ stri_numbytes <- function(str) { # prepare_arg done internally .Call("stri_numbytes", str, PACKAGE="stringi") } + + +#' The number of characters in a string +#' +#' @param str character vector +#' @return integer vector giving the number of characters in each element of character vector +#' @export +stri_length <- function(str) { + .Call("stri_length", str, PACKAGE="stringi") +} diff --git a/src/length.cpp b/src/length.cpp index 99b11df11..162678bc2 100644 --- a/src/length.cpp +++ b/src/length.cpp @@ -66,3 +66,28 @@ SEXP stri_numbytes(SEXP s) UNPROTECT(1); return ret; } + +SEXP stri_length(SEXP s) +{ + s = stri_prepare_arg_string(s); + int ns = LENGTH(s); + UChar32 c; + + SEXP ret; + PROTECT(ret = allocVector(INTSXP, ns)); + + for (int k = 0; k < ns; k++) { + SEXP q = STRING_ELT(s, k); + if (q == NA_STRING) + INTEGER(ret)[k] = NA_INTEGER; + else { + int j = 0; // number of code points + int nq = LENGTH(q); + for (int i = 0; i < nq; j++) + U8_NEXT(CHAR(q), i, nq, c); + INTEGER(ret)[k] = j; + } + } + UNPROTECT(1); + return ret; +} diff --git a/src/stringi.cpp b/src/stringi.cpp index 2660f71dd..21dc73165 100644 --- a/src/stringi.cpp +++ b/src/stringi.cpp @@ -44,6 +44,7 @@ static const R_CallMethodDef cCallMethods[] = { {"stri_info", (DL_FUNC)&stri_info, 0}, {"stri_join", (DL_FUNC)&stri_join, 1}, {"stri_join2", (DL_FUNC)&stri_join2, 2}, + {"stri_length", (DL_FUNC)&stri_length, 1}, {"stri_localeinfo", (DL_FUNC)&stri_localeinfo, 1}, {"stri_localelist", (DL_FUNC)&stri_localelist, 0}, {"stri_localeset", (DL_FUNC)&stri_localeset, 1}, diff --git a/src/stringi.h b/src/stringi.h index 318dceed4..6244cef46 100644 --- a/src/stringi.h +++ b/src/stringi.h @@ -164,6 +164,7 @@ SEXP stri_char_getpropertyid(SEXP x); // length.cpp SEXP stri_numbytes(SEXP s); R_len_t stri__numbytes_max(SEXP s); +SEXP stri_length(SEXP s); // wrap.cpp SEXP stri_wrap_greedy(SEXP count, SEXP width, SEXP spacecost);