Skip to content

Commit

Permalink
version 0.0.2
Browse files Browse the repository at this point in the history
  • Loading branch information
Shusei Eshima authored and cran-robot committed Jun 8, 2023
1 parent 78d295a commit 6ac0d7b
Show file tree
Hide file tree
Showing 18 changed files with 133 additions and 106 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
@@ -1,6 +1,6 @@
Package: RcppJagger
Title: An R Wrapper for Jagger
Version: 0.0.1
Version: 0.0.2
Authors@R: c(
person("Shusei", "Eshima", email = "shuseieshima@gmail.com", role = c("aut","cre"), comment = c(ORCID = "0000-0003-3613-4046")),
person("Naoki", "Yoshinaga", role = c("ctb"))
Expand All @@ -21,9 +21,9 @@ Suggests: dplyr (>= 1.1.0), testthat (>= 3.1.5), tibble
Config/testthat/edition: 3
LazyData: TRUE
NeedsCompilation: yes
Packaged: 2023-06-04 13:28:58 UTC; shusei
Packaged: 2023-06-08 19:03:16 UTC; shusei
Author: Shusei Eshima [aut, cre] (<https://orcid.org/0000-0003-3613-4046>),
Naoki Yoshinaga [ctb]
Maintainer: Shusei Eshima <shuseieshima@gmail.com>
Repository: CRAN
Date/Publication: 2023-06-06 07:00:06 UTC
Date/Publication: 2023-06-08 22:22:56 UTC
34 changes: 17 additions & 17 deletions MD5
@@ -1,38 +1,38 @@
e4ab7751d95cc499025f796dd5835db0 *DESCRIPTION
a6bb715255a378bab04efe6c7071c538 *DESCRIPTION
8bb48ef08475ae0cb3e0852924bd54ee *NAMESPACE
72068cd54fcf95d6eae221b46120eb8c *NEWS.md
f9f7af660472a402911b86dab482e9b2 *R/RcppExports.R
40b0fd51a66ff0f4fe0e07c08e997208 *NEWS.md
1eab0d1046382d4e44cb01d8bce7ec97 *R/RcppExports.R
dc6f159d77512c9b39bb9df2a6211966 *R/RcppJagger.R
49ab70a4e1432ae366ac3f1b3ed573a8 *R/data.R
a1123c56bfe1d024bd9b4177d2a03e0f *R/lemmatizer.R
9666151174d17bfd01a2463aa09644d6 *R/pos.R
00ca98c3b4ce1f0136935b29699e3d79 *R/tokenizer.R
02a4b2b92d572134845d668ae12aaa69 *R/lemmatizer.R
ba70802ce80891d3a8977e3cb718257a *R/pos.R
3b4efa985838a5b4e3718edfe556766c *R/tokenizer.R
80fcba092843dccd645184befeb5c35f *R/util.R
fa7b592dc30adfd4d804f6b365c728a3 *data/sentence_example.rda
6429b333f6879da564935edf00802f7c *inst/include/ccedar_core.h
83aa264c4503d476805cae347dfcf226 *inst/include/jagger.cc
1ff928aad2d761f6580473fda8bba1d2 *inst/include/jagger.h
bb5e4ec24ecaed6c71be0d76836eedba *inst/include/mman.h
b296159a609862232c8fc2136140e08d *man/lemmatize.Rd
a5abcde7cded3808464f4e3c118fc7ef *man/lemmatize.Rd
9cfd165a7b6b42bc8928c7bc49a51ec8 *man/lemmatize_cpp_vec.Rd
329a9e0c25db5fc2b250ef78661736a4 *man/lemmatize_tbl.Rd
7c59709815d427b02ffaf6247cb6198c *man/lemmatize_tbl.Rd
9cfc71ce88c41c1194b2426fe7e0fef3 *man/pos.Rd
d41d1ec436d7777c0dd5477a6a26982f *man/pos_cpp_vec.Rd
b6d398dc9ed399dd1cf16d4eab24c1b6 *man/pos_cpp_vec.Rd
910b500dd766df96908f2908c035fa8f *man/pos_simple.Rd
698357db8eb2731dabf6f6578dfe4e88 *man/pos_simple_cpp_vec.Rd
859e55c9f42c04894903b45d5315074a *man/pos_simple_cpp_vec.Rd
f026c0964eac7216fbe1650737573bae *man/sentence_example.Rd
7abe7d690f6471024dea9ee17a898ee1 *man/tokenize.Rd
1c20a20a1ac4b8a95febac0e75fc6dbe *man/tokenize.Rd
a3f87da0a42e58fe8b2bb48ce563913d *man/tokenize_cpp_vec.Rd
5ad3d74d8e56c53ad324184d55cf28bf *man/tokenize_tbl.Rd
c1b4a373e6630576f922d4391312cb5b *src/RcppExports.cpp
b020b7503215ce3884f7d19e4516ab5f *src/lemmatizer.h
8fd27335b165bca5f2a1aa7f2ca99863 *src/main.cpp
a8ed2fb3d8b00a7c1c70298d904b662e *src/pos.h
01a357bad718092e0beb2a561afebdc4 *src/RcppExports.cpp
192d9de22d0dd43048cf3f60ed2e3323 *src/lemmatizer.h
f6ee5d871a44fc6fa1e007bbc7c244fb *src/main.cpp
9272de4b3ee203551d47a5cfea916a71 *src/pos.h
a34ca3124494098634b0a434998a8106 *src/pos_simple.h
dd9a9a7825a8e4764ea26282793a1616 *src/reader.h
2294072a6814593ae4b60c6f4d948f65 *src/tokenizer.h
1199cbae84522095d5b3c49bebbadfb9 *tests/testthat.R
e77d910f914dea04b374c5759f2c9b0f *tests/testthat/test-lemmatizer.R
e5dd6c1eacfb9c5f52fd05ad18429538 *tests/testthat/test-pos.R
1ccbc80765a129be582e2d3db1e677cc *tests/testthat/test-sanity.R
9dd4dd39f70331fa5a933d328af45328 *tests/testthat/test-pos.R
7416e5a9070a9c6bfa6a59fc713cc411 *tests/testthat/test-sanity.R
91b0056a14954bcfb9d4be16bc1fa48a *tests/testthat/test-tokenizer.R
5 changes: 4 additions & 1 deletion NEWS.md
@@ -1,2 +1,5 @@
# RcppJagger 0.0.2
* Improvement of performance and stability.

# RcppJagger 0.0.1
* First submission to CRAN
* First submission to CRAN.
8 changes: 4 additions & 4 deletions R/RcppExports.R
Expand Up @@ -4,15 +4,15 @@
#' POS tagging in C++
#'
#' @keywords internal
pos_cpp_vec <- function(inputs, model_path) {
.Call('_RcppJagger_pos_cpp_vec', PACKAGE = 'RcppJagger', inputs, model_path)
pos_cpp_vec <- function(inputs, model_path, keep_vec, keep_all) {
.Call('_RcppJagger_pos_cpp_vec', PACKAGE = 'RcppJagger', inputs, model_path, keep_vec, keep_all)
}

#' POS tagging in C++ (only token and pos)
#'
#' @keywords internal
pos_simple_cpp_vec <- function(inputs, model_path) {
.Call('_RcppJagger_pos_simple_cpp_vec', PACKAGE = 'RcppJagger', inputs, model_path)
pos_simple_cpp_vec <- function(inputs, model_path, keep_vec, keep_all) {
.Call('_RcppJagger_pos_simple_cpp_vec', PACKAGE = 'RcppJagger', inputs, model_path, keep_vec, keep_all)
}

#' Tokenizer (a vector input)
Expand Down
4 changes: 2 additions & 2 deletions R/lemmatizer.R
Expand Up @@ -4,7 +4,7 @@
#' @param model_path a path to the model.
#' @param keep a vector of POS(s) to keep. Default is `NULL`.
#' @param concat logical. If TRUE, the function returns a concatenated string. Default is `TRUE`.
#' @return a list.
#' @return a vector (if `concat = TRUE`) or a list (if `concat = FALSE`).
#' @examples
#' data(sentence_example)
#' res_lemmatize <- lemmatize(sentence_example$text)
Expand Down Expand Up @@ -40,7 +40,7 @@ lemmatize <- function(input, model_path = NULL, keep = NULL, concat = TRUE) {
#' @param column a column name of the tibble to tokenize.
#' @param model_path a path to the model.
#' @param keep a vector of POS(s) to keep. Default is `NULL`.
#' @return a vector.
#' @return a tibble.
#' @examples
#' data(sentence_example)
#' res_lemmatize <- lemmatize_tbl(tibble::as_tibble(sentence_example), "text")
Expand Down
33 changes: 14 additions & 19 deletions R/pos.R
Expand Up @@ -21,19 +21,15 @@ pos <- function(input, model_path = NULL, keep = NULL, format = c("list", "data.
}
format <- rlang::arg_match(format)

result <- pos_cpp_vec(input, model_path)

if (!is.null(keep)) {
result <- purrr::map(result, function(x) {
idx <- x$pos %in% keep
x$token <- x$token[idx]
x$lemma <- x$lemma[idx]
x$subtype <- x$subtype[idx]
x$pos <- x$pos[idx]
return(x)
})
if (is.null(keep)) {
keep_all <- TRUE
keep <- c("")
} else {
keep_all <- FALSE
}

result <- pos_cpp_vec(input, model_path, keep, keep_all)

if (format == "data.frame") {
result <- purrr::map(result, function(x) {
return(data.frame(
Expand Down Expand Up @@ -70,17 +66,16 @@ pos_simple <- function(input, model_path = NULL, keep = NULL, format = c("list",
}

format <- rlang::arg_match(format)
result <- pos_simple_cpp_vec(input, model_path)

if (!is.null(keep)) {
result <- purrr::map(result, function(x) {
idx <- x$pos %in% keep
x$token <- x$token[idx]
x$pos <- x$pos[idx]
return(x)
})
if (is.null(keep)) {
keep_all <- TRUE
keep <- c("")
} else {
keep_all <- FALSE
}

result <- pos_simple_cpp_vec(input, model_path, keep, keep_all)

if (format == "data.frame") {
result <- purrr::map(result, function(x) {
return(data.frame(
Expand Down
2 changes: 1 addition & 1 deletion R/tokenizer.R
Expand Up @@ -4,7 +4,7 @@
#' @param model_path a path to the model.
#' @param keep a vector of POS(s) to keep. Default is `NULL`.
#' @param concat logical. If TRUE, the function returns a concatenated string. Default is `TRUE`.
#' @return a list.
#' @return a vector (if `concat = TRUE`) or a list (if `concat = FALSE`).
#' @examples
#' data(sentence_example)
#' res_tokenize <- tokenize(sentence_example$text)
Expand Down
2 changes: 1 addition & 1 deletion man/lemmatize.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/lemmatize_tbl.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/pos_cpp_vec.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/pos_simple_cpp_vec.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/tokenize.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 12 additions & 8 deletions src/RcppExports.cpp
Expand Up @@ -11,26 +11,30 @@ Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
#endif

// pos_cpp_vec
List pos_cpp_vec(StringVector& inputs, std::string model_path);
RcppExport SEXP _RcppJagger_pos_cpp_vec(SEXP inputsSEXP, SEXP model_pathSEXP) {
List pos_cpp_vec(StringVector& inputs, std::string model_path, StringVector& keep_vec, bool keep_all);
RcppExport SEXP _RcppJagger_pos_cpp_vec(SEXP inputsSEXP, SEXP model_pathSEXP, SEXP keep_vecSEXP, SEXP keep_allSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< StringVector& >::type inputs(inputsSEXP);
Rcpp::traits::input_parameter< std::string >::type model_path(model_pathSEXP);
rcpp_result_gen = Rcpp::wrap(pos_cpp_vec(inputs, model_path));
Rcpp::traits::input_parameter< StringVector& >::type keep_vec(keep_vecSEXP);
Rcpp::traits::input_parameter< bool >::type keep_all(keep_allSEXP);
rcpp_result_gen = Rcpp::wrap(pos_cpp_vec(inputs, model_path, keep_vec, keep_all));
return rcpp_result_gen;
END_RCPP
}
// pos_simple_cpp_vec
List pos_simple_cpp_vec(StringVector& inputs, std::string model_path);
RcppExport SEXP _RcppJagger_pos_simple_cpp_vec(SEXP inputsSEXP, SEXP model_pathSEXP) {
List pos_simple_cpp_vec(StringVector& inputs, std::string model_path, StringVector& keep_vec, bool keep_all);
RcppExport SEXP _RcppJagger_pos_simple_cpp_vec(SEXP inputsSEXP, SEXP model_pathSEXP, SEXP keep_vecSEXP, SEXP keep_allSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< StringVector& >::type inputs(inputsSEXP);
Rcpp::traits::input_parameter< std::string >::type model_path(model_pathSEXP);
rcpp_result_gen = Rcpp::wrap(pos_simple_cpp_vec(inputs, model_path));
Rcpp::traits::input_parameter< StringVector& >::type keep_vec(keep_vecSEXP);
Rcpp::traits::input_parameter< bool >::type keep_all(keep_allSEXP);
rcpp_result_gen = Rcpp::wrap(pos_simple_cpp_vec(inputs, model_path, keep_vec, keep_all));
return rcpp_result_gen;
END_RCPP
}
Expand Down Expand Up @@ -64,8 +68,8 @@ END_RCPP
}

static const R_CallMethodDef CallEntries[] = {
{"_RcppJagger_pos_cpp_vec", (DL_FUNC) &_RcppJagger_pos_cpp_vec, 2},
{"_RcppJagger_pos_simple_cpp_vec", (DL_FUNC) &_RcppJagger_pos_simple_cpp_vec, 2},
{"_RcppJagger_pos_cpp_vec", (DL_FUNC) &_RcppJagger_pos_cpp_vec, 4},
{"_RcppJagger_pos_simple_cpp_vec", (DL_FUNC) &_RcppJagger_pos_simple_cpp_vec, 4},
{"_RcppJagger_tokenize_cpp_vec", (DL_FUNC) &_RcppJagger_tokenize_cpp_vec, 4},
{"_RcppJagger_lemmatize_cpp_vec", (DL_FUNC) &_RcppJagger_lemmatize_cpp_vec, 4},
{NULL, NULL, 0}
Expand Down
2 changes: 1 addition & 1 deletion src/lemmatizer.h
Expand Up @@ -81,7 +81,7 @@ class RcppJaggerLemmatize : public jagger::tagger {
// Add the final part after the last comma to `parts`.
parts.emplace_back(pos_info.substr(start));

if (parts[0] != "*" && parts.size() >= 7) { // first appearance of the token (i.e. not a concatenation)
if (parts[0] != "*" && parts.size() >= 6) { // first appearance of the token (i.e. not a concatenation)
pos_vec.emplace_back(parts[0]);
lemma_vec.emplace_back(parts[parts.size() - 3]);
} else if (parts[0] != "*" && parts.size() == 4) { // concatenation
Expand Down

0 comments on commit 6ac0d7b

Please sign in to comment.