- Context
- Benchmarking
stringr::str_replace_all()
stringr::str_trim()
stringr::str_dup()
stringr::str_split()
stringr::str_sub()
- Variant
replacement
str_insert()
instead orstr_replace()
stringr::str_detect()
stringr::str_wrap()
stringr::str_pad()
stringr::str_count()
stringr::str_extract_all
stringr::str_locate()
stringr::str_locate_all()
stringr::str_match_all()
As part of #1549, we removed stringr dependency by replacing all functions used with base R equivalent.
This document aim to be a benchmark of each replacement. We’ll use bench to do the benchmark, and compare stringr function with its replacement. Each benchmark will be done in the context of there initial usage in knitr.
xfun::session_info(c("knitr", "bench", "stringr"))
## R version 4.2.0 (2022-04-22 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
##
## Locale:
## LC_COLLATE=French_France.utf8 LC_CTYPE=French_France.utf8
## LC_MONETARY=French_France.utf8 LC_NUMERIC=C
## LC_TIME=French_France.utf8
##
## Package version:
## bench_1.1.2 cli_3.6.0 evaluate_0.20 fansi_1.0.3
## glue_1.6.2 graphics_4.2.0 grDevices_4.2.0 highr_0.10
## knitr_1.41.9 lifecycle_1.0.3 magrittr_2.0.3 methods_4.2.0
## pillar_1.8.1 pkgconfig_2.0.3 profmem_0.6.0 rlang_1.0.6
## stats_4.2.0 stringi_1.7.12 stringr_1.5.0 tibble_3.1.8
## tools_4.2.0 utf8_1.2.2 utils_4.2.0 vctrs_0.5.1
## xfun_0.36 yaml_2.3.6
knitr_example = function(...) system.file('examples', ..., package = 'knitr')
Commit | PR | File |
---|---|---|
9c92eff1 | #2174 | utils-vignettes.R#L158 |
Before:
x[!i] = stringr::str_replace_all(x[!i], p$inline.code, '') # remove inline code
After:
x[!i] = gsub(p$inline.code, '', x[!i], perl = TRUE) # remove inline code
file = knitr_example('knitr-minimal.Rnw')
x = xfun::read_utf8(file)
p = knitr:::detect_pattern(x, tolower(xfun::file_ext(file)))
p = knitr:::all_patterns[[p]]
p1 = p$chunk.begin; p2 = p$chunk.end
m = knitr:::group_indices(grepl(p1, x), grepl(p2, x))
i = m %% 2 == 0
res = bench::mark(
stringr = stringr::str_replace_all(x[!i], p$inline.code, ''),
new = gsub(p$inline.code, '', x[!i], perl = TRUE),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 1.29 1.38 1 1226. 5.99
## 2 new 1 1 1.34 1 1
ggplot2::autoplot(res)
## Le chargement a nécessité le package : tidyr
Commit | PR | File |
---|---|---|
8fa7d17 | #2177 |
Before:
labels = stringr::str_trim(gsub(lab, '\\3', sapply(groups, `[`, 1)))
After:
x[!i] = gsub(p$inline.code, '', x[!i], perl = TRUE) # remove inline code
path = "https://raw.githubusercontent.com/yihui/knitr-examples/46c8d1db0cf0c9ab04432444079927324c4c3688/113-foo.R"
lines = xfun::read_utf8(path)
lab = knitr:::.sep.label
idx = cumsum(grepl(lab, lines))
groups = unname(split(lines, idx))
content = gsub(lab, '\\3', sapply(groups, `[`, 1))
res = bench::mark(
stringr = stringr::str_trim(content),
new = trimws(content),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 1 1 2.33 Inf 7.76
## 2 new 2.48 2.54 1 NaN 1
ggplot2::autoplot(res)
Commit | PR | File |
---|---|---|
8fa7d17 | #2177 | utils.R#L77 |
Before:
stringr::str_trim(stringr::str_split(string, ';|,')[[1]])
After:
trimws(stringr::str_split(string, ';|,')[[1]])
string = ' .5,.6 , .7; .9 '
splitted = stringr::str_split(string, ';|,')[[1]]
res = bench::mark(
stringr = stringr::str_trim(splitted),
new = trimws(splitted),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 1 1 2.32 Inf 7.74
## 2 new 2.47 2.48 1 NaN 1
ggplot2::autoplot(res)
Commit | PR | File |
---|---|---|
1ce8286 | #2186 | parser.R#L348 | |
Before:
cat(' ', stringr::str_dup('~', getOption('width') - 10L), '\n')
After:
rep_str = function(x, n, sep = '') paste(rep(x, n), collapse = sep)
cat(' ', rep_str('~', getOption('width') - 10L), '\n')
res = bench::mark(
stringr = stringr::str_dup('~', getOption('width') - 10L),
new = paste(rep('~', getOption('width') - 10L), collapse = ''),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 1 1 1.20 5.96 Inf
## 2 new 1.17 1.14 1 1 NaN
ggplot2::autoplot(res)
Later, usage of stringr::str_dup()
and its replacement rep_str()
have been removed completely in
(https://github.com/yihui/knitr/commit/0efb7914a1be93460e510bb283f9d4d72c49b360)
as code was simplified in
https://github.com/yihui/knitr/commit/07bf3adf49f67bd528be4e6a60b3284a7ea6f8a9
and
https://github.com/yihui/knitr/commit/7ef7be00711de949fcf2a299d6381189669bb753
so no more impact.
Commit | PR | File |
---|---|---|
67b973 | #2187 |
Before:
x = stringr::str_split(x, '\n')
After:
# patch strsplit() to split '' into '' instead of character(0)
str_split = function(x, split, ...) {
y = strsplit(x, split, ...)
y[x == ''] = list('')
y
}
x = str_split(x, '\n')
con = url("https://raw.githubusercontent.com/yihui/knitr-examples/46c8d1db0cf0c9ab04432444079927324c4c3688/084-pandoc.pandoc")
x = read.dcf(con)
close(con)
x = x[!is.na(x)]
str_split = function(x, split, ...) {
y = strsplit(x, split, ...)
y[x == ''] = list('')
y
}
res = bench::mark(
stringr = stringr::str_split(x, '\n'),
new = str_split(x, '\n'),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 2.48 2.48 1 2.38 1
## 2 new 1 1 2.38 1 1.19
ggplot2::autoplot(res)
Commit | PR | File |
---|---|---|
5a2cb72 | #2195 | utils-string.R block.R#L564 utils.R#L92-L93 |
Before:
stringr::str_sub(input, loc[i, 1], loc[i, 2]) = if (length(res)) {
paste(hook(res), collapse = '')
} else ''
After:
# replace parts of a string with new values; `pos` is a matrix of positions and
# each row is a pair of [start, end]
str_replace = function(x, pos, value) {
if (length(x) != 1) stop("Only a character scalar is supported.")
# extract parts of the string that are outside [start, end]
m = rbind(pos[, 1] - 1, pos[, 2] + 1)
m = matrix(c(1, m, nchar(x)), nrow = 2)
y = substring(x, m[1, ], m[2, ])
paste(rbind(y, c(value, '')), collapse = '')
}
str_replace(input, block$location, ans)
input = "inline `r (function() 1)()`"
location = matrix(c(8,27), ncol = 2, byrow = TRUE)
ans = "1"
str_replace = function(x, pos, value) {
if (length(x) != 1) stop("Only a character scalar is supported.")
# extract parts of the string that are outside [start, end]
m = rbind(pos[, 1] - 1, pos[, 2] + 1)
m = matrix(c(1, m, nchar(x)), nrow = 2)
y = substring(x, m[1, ], m[2, ])
paste(rbind(y, c(value, '')), collapse = '')
}
# wrapper to approximate what is done internally
old_replace = function(input, loc, ans) {
stringr::str_sub(input, loc[1, 1], loc[1, 2]) = ans
return(input)
}
res = bench::mark(
stringr = old_replace(input, location, ans),
new = str_replace(input, location, ans),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 1 1 1.21 9.19 1.21
## 2 new 1.25 1.20 1 1 1
ggplot2::autoplot(res)
Commit | PR | File |
---|---|---|
5a2cb72 | #2195 | header.R#L59-L60 header.R#L90-L91 |
Before:
tmp = stringr::str_sub(doc[i], l[, 1], l[, 2])
stringr::str_sub(doc[i], l[,1], l[,2]) = paste0(tmp, '\n', make_header_html())
After :
tmp = substr(doc[i], l[, 1], l[, 2])
doc[i] = str_replace(doc[i], l, paste0(tmp, '\n', make_header_html()))
b = knitr::all_patterns$html$header.begin
doc = c("<head>", "<!--content-->", "</head>", "<body>", "</body>")
i = grep(b, doc)
l = stringr::str_locate(doc[i], b)
res = bench::mark(
stringr = stringr::str_sub(doc[i], l[, 1], l[, 2]),
new = substr(doc[i], l[, 1], l[, 2]),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 3.48 3.45 1 Inf 1
## 2 new 1 1 3.59 NaN 3.59
ggplot2::autoplot(res)
tmp = stringr::str_sub(doc[i], l[, 1], l[, 2])
header = knitr:::make_header_html()
old_fun = function(doc, l, tmp, header) {
stringr::str_sub(doc[i], l[,1], l[,2]) = paste0(tmp, header)
doc
}
new_fun = function(doc, l, tmp, header) {
doc[i] = str_replace(doc[i], l, paste0(tmp, header))
doc
}
res = bench::mark(
stringr = old_fun(doc, l, tmp, header),
new = new_fun(doc, l, tmp, header),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 1 1 1.13 Inf 1
## 2 new 1.19 1.17 1 NaN 1.33
ggplot2::autoplot(res)
Before:
tmp = stringr::str_sub(doc[i], l[, 1], l[, 2])
stringr::str_sub(doc[i], l[,1], l[,2]) = paste0(tmp, make_header_latex(doc))
After :
tmp = substr(doc[i], l[, 1], l[, 2])
doc[i] = str_replace(doc[i], l, paste0(tmp, make_header_latex(doc)))
b = knitr::all_patterns$tex$header.begin
doc = c("\\documentclass[opt]{article}", "some tex content")
i = grep(b, doc)
l = stringr::str_locate(doc[i], b)
res = bench::mark(
stringr = stringr::str_sub(doc[i], l[, 1], l[, 2]),
new = substr(doc[i], l[, 1], l[, 2]),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 3.55 3.39 1 Inf 1
## 2 new 1 1 3.42 NaN 3.42
ggplot2::autoplot(res)
tmp = stringr::str_sub(doc[i], l[, 1], l[, 2])
header = knitr:::make_header_latex(doc)
old_fun = function(doc, l, tmp, header) {
stringr::str_sub(doc[i], l[,1], l[,2]) = paste0(tmp, header)
doc
}
new_fun = function(doc, l, tmp, header) {
doc[i] = str_replace(doc[i], l, paste0(tmp, header))
doc
}
res = bench::mark(
stringr = old_fun(doc, l, tmp, header),
new = new_fun(doc, l, tmp, header),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 1 1 1.20 Inf 1
## 2 new 1.25 1.25 1 NaN 1.25
ggplot2::autoplot(res)
Commit | PR | File |
---|---|---|
5a2cb72 | #2195 | header.R#L59-L60 header.R#L90-L91 |
Before:
tmp = stringr::str_sub(doc[i], l[, 1], l[, 2])
stringr::str_sub(doc[i], l[,1], l[,2]) = paste0(tmp, '\n', make_header_html())
After :
doc[i] = str_insert(doc[i], l[, 2], paste0('\n', make_header_html()))
b = knitr::all_patterns$html$header.begin
doc = c("<head>", "<!--content-->", "</head>", "<body>", "</body>")
i = grep(b, doc)
l = stringr::str_locate(doc[i], b)
header = knitr:::make_header_html()
str_insert = function(x, i, value) {
if (i <= 0) return(paste0(value, x))
n = nchar(x)
if (n == 0 || i >= n) return(paste0(x, value))
paste0(substr(x, 1, i), value, substr(x, i + 1, n))
}
old_str_sub = function(doc, l, header) {
tmp = stringr::str_sub(doc[i], l[, 1], l[, 2])
stringr::str_sub(doc[i], l[,1], l[,2]) = paste0(tmp, header)
doc
}
new_str_replace = function(doc, l, header) {
tmp = substr(doc[i], l[, 1], l[, 2])
doc[i] = str_replace(doc[i], l, paste0(tmp, header))
doc
}
new_str_insert = function(doc, l, header) {
doc[i] = str_insert(doc[i], l[, 2], paste0('\n', header))
doc
}
res = bench::mark(
stringr = old_str_sub(doc, l, header),
str_replace = new_str_replace(doc, l, header),
str_insert = new_str_insert(doc, l, header),
min_time = Inf,
# we get a small difference in output
check = FALSE
)
summary(res, relative = TRUE)
## # A tibble: 3 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 2.81 2.74 1 Inf 1.28
## 2 str_replace 2.47 2.41 1.11 NaN 1.89
## 3 str_insert 1 1 2.34 NaN 1
ggplot2::autoplot(res)
Special edge case here: Our replacement function is not completely equivalent.
- New version gives
<head>\n<style type="text/css">\n\n</style>
- stringr version gives
<head><style type="text/css">\n\n</style>
Before:
tmp = stringr::str_sub(doc[i], l[, 1], l[, 2])
stringr::str_sub(doc[i], l[,1], l[,2]) = paste0(tmp, make_header_latex(doc))
After :
doc[i] = str_insert(doc[i], l[, 2], make_header_latex(doc))
b = knitr::all_patterns$tex$header.begin
doc = c("\\documentclass[opt]{article}", "some tex content")
i = grep(b, doc)
l = stringr::str_locate(doc[i], b)
header = knitr:::make_header_latex(doc)
old_str_sub = function(doc, l, header) {
tmp = stringr::str_sub(doc[i], l[, 1], l[, 2])
stringr::str_sub(doc[i], l[,1], l[,2]) = paste0(tmp, header)
doc
}
new_str_replace = function(doc, l, header) {
tmp = substr(doc[i], l[, 1], l[, 2])
doc[i] = str_replace(doc[i], l, paste0(tmp, header))
doc
}
new_str_insert = function(doc, l, header) {
doc[i] = str_insert(doc[i], l[, 2], header)
doc
}
res = bench::mark(
stringr = old_str_sub(doc, l, header),
str_replace = new_str_replace(doc, l, header),
str_insert = new_str_insert(doc, l, header),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 3 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 3.34 3.31 1 Inf Inf
## 2 str_replace 3.05 3.02 1.14 NaN Inf
## 3 str_insert 1 1 3.44 NaN NaN
ggplot2::autoplot(res)
Commit | PR | File |
---|---|---|
1a0f2cc | #2202 | pandoc.R#L125 |
Before:
if (length(pat) && any(stringr::str_detect(text, pat))) return(p)
After:
if (length(pat) && any(grepl(pat, text, perl = TRUE))) return(p)
pat = knitr:::all_patterns$md$chunk.begin
text = xfun::read_utf8(knitr_example("knitr-spin.Rmd"))
res = bench::mark(
stringr = stringr::str_detect(text, pat),
new = grepl(pat, text, perl = TRUE),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 1 1 2.13 76.1 Inf
## 2 new 2.20 2.24 1 1 NaN
ggplot2::autoplot(res)
Commit | PR | File |
---|---|---|
1a0f2cc | #2202 | citation.R#L135 |
Before:
b[-idx] = stringr::str_wrap(b[-idx], width, 2, 4)
After:
str_wrap = function(...) {
res = strwrap(..., simplify = FALSE)
unlist(lapply(res, one_string))
}
b[-idx] = str_wrap(b[-idx], width, 2, 4)
x = "knitr"
lib.loc = NULL
tweak = TRUE
prefix = 'R-'
.tweak.bib = knitr:::.tweak.bib
citation = function(...) utils::citation(..., lib.loc = lib.loc)
bib = sapply(x, function(pkg) {
cite = citation(pkg, auto = if (pkg != 'base') {
meta = packageDescription(pkg, lib.loc = lib.loc)
# don't use the CRAN URL if the package has provided its own URL
if (identical(meta$Repository, 'CRAN') && !is.null(meta$URL)) {
# however, the package may have provided multiple URLs, in which case we
# still use the CRAN URL
if (!grepl('[, ]', meta$URL)) meta$Repository = NULL
}
meta
})
entry = toBibtex(cite)
entry[1] = sub('\\{,$', sprintf('{%s%s,', prefix, pkg), entry[1])
entry
}, simplify = FALSE)
str_wrap = function(...) {
res = strwrap(..., simplify = FALSE)
unlist(lapply(res, knitr:::one_string))
}
res = bench::mark(
stringr = stringr::str_wrap(bib$knitr, 20, 2, 4),
new = str_wrap(bib$knitr, 20, 2, 4),
min_time = Inf,
check = FALSE # functions are not completely equivalent
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 1 1 5.20 1.74 4.87
## 2 new 5.12 5.48 1 1 1
ggplot2::autoplot(res)
Special edge case here: Our replacement function is not completely equivalent.
- New version gives
note = {R package\n version\n 1.41.9},
- stringr version gives
note = {R package\n version 1.41.9},
Commit | PR | File |
---|---|---|
1a0f2cc | #2202 | output.R#L503 | |
Before:
stringr::str_wrap(message, width = getOption('width'))
After:
str_wrap = function(...) {
res = strwrap(..., simplify = FALSE)
unlist(lapply(res, one_string))
}
str_wrap(message, width = getOption('width'))
warn_msg = "Warning function: This is a warning message quite loooooooooooong"
str_wrap = function(...) {
res = strwrap(..., simplify = FALSE)
unlist(lapply(res, knitr:::one_string))
}
res = bench::mark(
stringr = stringr::str_wrap(warn_msg, 10),
new = str_wrap(warn_msg, 10),
min_time = Inf,
check = FALSE # functions are not completely equivalent
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 1 1 2.12 Inf 2.33
## 2 new 2.14 2.23 1 NaN 1
ggplot2::autoplot(res)
Special edge case here: Our replacement function is not completely equivalent.
- New version gives
Warning\nfunction:\nThis is\na warning\nmessage\nquite\nloooooooooooong
- stringr version gives
Warning\nfunction:\nThis is a\nwarning\nmessage\nquite\nloooooooooooong
Commit | PR | File |
---|---|---|
1a0f2cc | #2202 | block.R#L619 |
Before:
paste0('## ----', stringr::str_pad(label, max(getOption('width') - 11L, 0L), 'right', '-'), '----', code)
After:
paste0('## ----', label, strrep('-', max(getOption('width') - 11L - nchar(label), 0L)), '----', code)
label = "my-first-chunk"
res = bench::mark(
stringr = paste0('## ----', stringr::str_pad(label, max(getOption('width') - 11L, 0L), 'right', '-'), '----'),
new = paste0('## ----', label, strrep('-', max(getOption('width') - 11L - nchar(label), 0L)), '----'),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 13.0 12.9 1 Inf 1
## 2 new 1 1 13.9 NaN 1.16
ggplot2::autoplot(res)
Commit | PR | File |
---|---|---|
1a0f2cc | #2202 | utils.R#L583-L590 |
Before:
line_count_str = function(x) stringr::str_count(x, '\n') + 1L
After:
line_count = function(x) {
res = gregexpr('\n', x, fixed = TRUE)
unlist(lapply(res, function(x) {
n = length(x)
if (n == 1 && x == -1) n = 0
n + 1
}))
}
text = xfun::read_utf8(knitr_example("knitr-spin.Rmd"))
res = bench::mark(
stringr = line_count_str(text),
new = line_count(text),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 1 1 2.83 1 1
## 2 new 2.27 2.97 1 23.7 18.4
ggplot2::autoplot(res)
Commit | PR | File |
---|---|---|
cc3b92a | #2205 | block.R#L587 |
Before:
eval(parse_only(unlist(stringr::str_extract_all(code, 'read_chunk\\(([^)]+)\\)'))))
After:
str_extract = function(x, pattern) {
m = gregexpr(pattern, x, perl = TRUE)
regmatches(x, m)
}
eval(parse_only(unlist(str_extract(code, 'read_chunk\\(([^)]+)\\)'))))
code = "knitr::read_chunk('113-foo.R')"
str_extract = function(x, pattern) {
m = gregexpr(pattern, x, perl = TRUE)
regmatches(x, m)
}
res = bench::mark(
stringr = stringr::str_extract_all(code, 'read_chunk\\(([^)]+)\\)'),
new = str_extract(code, 'read_chunk\\(([^)]+)\\)'),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 1 1 3.75 1 1.50
## 2 new 3.81 3.98 1 4.83 1
ggplot2::autoplot(res)
Commit | PR | File |
---|---|---|
cc3b92a | #2205 | template.R#L123 |
Before:
mat = stringr::str_extract_all(txt, delim)[[1L]]
After:
str_extract = function(x, pattern) {
m = gregexpr(pattern, x, perl = TRUE)
regmatches(x, m)
}
mat = str_extract(txt, delim)[[1L]]
txt = 'This is the value of `x`: {{x}}'
delim = c('{{', '}}')
delim = gsub('([.|()\\^{}+$*?]|\\[|\\])', '\\\\\\1', delim)
delim = paste0(delim[1L], '((.|\n)+?)', delim[2L])
str_extract = function(x, pattern) {
m = gregexpr(pattern, x, perl = TRUE)
regmatches(x, m)
}
res = bench::mark(
stringr = stringr::str_extract_all(txt, delim),
new = str_extract(txt, delim),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 1 1 4.06 1 1.35
## 2 new 4.01 4.30 1 93.8 1
ggplot2::autoplot(res)
Commit | PR | File |
---|---|---|
cc3b92a | #2205 | header.R#L58 header.R#L89 |
Before:
stringr::str_locate(doc[i], b)
After:
location = function(x) {
len = attr(x, 'match.length')
if (length(x) == 1 && x == -1) x = integer()
cbind(start = x, end = x + len - 1L)
}
str_locate = function(x, pattern, all = TRUE) {
out = (if (all) gregexpr else regexpr)(pattern, x, perl = TRUE)
if (all) lapply(out, location) else location(out)
}
str_locate(doc[i], b, FALSE)
b = knitr::all_patterns$html$header.begin
doc = c("<head>", "<!--content-->", "</head>", "<body>", "</body>")
i = grep(b, doc)
location = function(x) {
len = attr(x, 'match.length')
if (length(x) == 1 && x == -1) x = integer()
cbind(start = x, end = x + len - 1L)
}
str_locate = function(x, pattern, all = TRUE) {
out = (if (all) gregexpr else regexpr)(pattern, x, perl = TRUE)
if (all) lapply(out, location) else location(out)
}
res = bench::mark(
stringr = stringr::str_locate(doc[i], b),
new = str_locate(doc[i], b, FALSE),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 1 1 3.59 1 7.17
## 2 new 3.55 3.74 1 75.8 1
ggplot2::autoplot(res)
Commit | PR | File |
---|---|---|
cc3b92a | #2205 | parser.R#L358 |
Before:
loc = stringr::str_locate_all(input, inline.code)[[1]]
After:
location = function(x) {
len = attr(x, 'match.length')
if (length(x) == 1 && x == -1) x = integer()
cbind(start = x, end = x + len - 1L)
}
str_locate = function(x, pattern, all = TRUE) {
out = (if (all) gregexpr else regexpr)(pattern, x, perl = TRUE)
if (all) lapply(out, location) else location(out)
}
loc = str_locate(input, inline.code)[[1]]
input = 'Inline expressions such as `r "the following"`'
inline.code = knitr:::all_patterns$md$inline.code
res = bench::mark(
stringr = stringr::str_locate_all(input, inline.code),
new = str_locate(input, inline.code),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 1 1 3.05 1 1
## 2 new 3.22 3.30 1 1.63 2.30
ggplot2::autoplot(res)
Commit | PR | File |
---|---|---|
cc3b92a | #2205 | utils.R#L94 |
Before:
idx = stringr::str_locate(txt, hb) # locate documentclass
After:
location = function(x) {
len = attr(x, 'match.length')
if (length(x) == 1 && x == -1) x = integer()
cbind(start = x, end = x + len - 1L)
}
str_locate = function(x, pattern, all = TRUE) {
out = (if (all) gregexpr else regexpr)(pattern, x, perl = TRUE)
if (all) lapply(out, location) else location(out)
}
idx = str_locate(txt, hb, FALSE) # locate documentclass
input = xfun::read_utf8(knitr_example("child/knitr-main.Rnw"))
patterns = knitr:::all_patterns$tex
db = patterns$document.begin
hb = patterns$header.begin
idx2 = grep(db, input)[1]
idx1 = grep(hb, input)[1]
txt = knitr:::one_string(input[idx1:(idx2 - 1L)])
location = function(x) {
len = attr(x, 'match.length')
if (length(x) == 1 && x == -1) x = integer()
cbind(start = x, end = x + len - 1L)
}
str_locate = function(x, pattern, all = TRUE) {
out = (if (all) gregexpr else regexpr)(pattern, x, perl = TRUE)
if (all) lapply(out, location) else location(out)
}
res = bench::mark(
stringr = stringr::str_locate(txt, hb),
new = str_locate(txt, hb, FALSE),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 1 1 3.22 Inf 6.44
## 2 new 3.17 3.36 1 NaN 1
ggplot2::autoplot(res)
Commit | PR | File |
---|---|---|
cc3b92a | #2205 | template.R#L121 |
Before:
loc = stringr::str_locate_all(txt, delim)[[1L]]
After:
location = function(x) {
len = attr(x, 'match.length')
if (length(x) == 1 && x == -1) x = integer()
cbind(start = x, end = x + len - 1L)
}
str_locate = function(x, pattern, all = TRUE) {
out = (if (all) gregexpr else regexpr)(pattern, x, perl = TRUE)
if (all) lapply(out, location) else location(out)
}
loc = str_locate(txt, delim)[[1L]]
txt = 'This is the value of `x`: {{x}}'
delim = c('{{', '}}')
delim = gsub('([.|()\\^{}+$*?]|\\[|\\])', '\\\\\\1', delim)
delim = paste0(delim[1L], '((.|\n)+?)', delim[2L])
location = function(x) {
len = attr(x, 'match.length')
if (length(x) == 1 && x == -1) x = integer()
cbind(start = x, end = x + len - 1L)
}
str_locate = function(x, pattern, all = TRUE) {
out = (if (all) gregexpr else regexpr)(pattern, x, perl = TRUE)
if (all) lapply(out, location) else location(out)
}
res = bench::mark(
stringr = stringr::str_locate_all(txt, delim),
new = str_locate(txt, delim),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 1 1 3.81 1 1
## 2 new 3.85 3.96 1 93.8 1.05
ggplot2::autoplot(res)
Commit | PR | File |
---|---|---|
cc3b92a | #2205 | parser.R#L360 |
Before:
code = stringr::str_match_all(input, inline.code)[[1L]]
After:
str_match = function(x, pattern) {
# gregexec() was added in R 4.1.0; for lower versions of R, use fallback
if (is.function(gregexec <- baseenv()[['gregexec']])) {
m = gregexec(pattern, x, perl = TRUE)
} else {
x = unlist(str_extract(x, pattern))
m = regexec(pattern, x, perl = TRUE)
}
do.call(cbind, regmatches(x, m))
}
code = t(str_match(input, inline.code))
input = 'Inline expressions such as `r "the following"`'
inline.code = knitr:::all_patterns$md$inline.code
str_match = function(x, pattern) {
if (is.function(gregexec <- baseenv()[['gregexec']])) {
m = gregexec(pattern, x, perl = TRUE)
} else {
x = unlist(str_extract(x, pattern))
m = regexec(pattern, x, perl = TRUE)
}
do.call(cbind, regmatches(x, m))
}
res = bench::mark(
stringr = {
code = stringr::str_match_all(input, inline.code)[[1L]]
code[is.na(code)] = ''
code
},
new = t(str_match(input, inline.code)),
min_time = Inf
)
summary(res, relative = TRUE)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stringr 1 1 4.45 1 1.11
## 2 new 4.26 4.75 1 6.75 1
ggplot2::autoplot(res)