Skip to content

Commit

Permalink
version 0.5-3
Browse files Browse the repository at this point in the history
  • Loading branch information
ggrothendieck authored and gaborcsardi committed Jun 12, 2010
1 parent 853fd68 commit f82a016
Show file tree
Hide file tree
Showing 13 changed files with 171 additions and 47 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
@@ -1,6 +1,6 @@
Package: gsubfn
Version: 0.5-2
Date: 2010-03-23
Version: 0.5-3
Date: 2010-06-12
Title: Utilities for strings and function arguments.
Author: G. Grothendieck
Maintainer: G. Grothendieck <ggrothendieck@gmail.com>
Expand All @@ -22,6 +22,6 @@ Suggests: boot, chron, doBy, grid, lattice, quantreg, reshape, tcltk,
zoo
License: GPL (>= 2)
URL: http://gsubfn.googlecode.com
Packaged: 2010-03-23 16:04:02 UTC; Louis
Packaged: 2010-06-12 23:22:39 UTC; Louis
Repository: CRAN
Date/Publication: 2010-03-23 19:33:14
Date/Publication: 2010-06-13 08:22:04
3 changes: 3 additions & 0 deletions R/fn.R
Expand Up @@ -9,6 +9,9 @@ eval.with.vis <- function (expr) {

as.function.formula <- function(x, ...) {
vars <- setdiff(all.vars(x[[2]]), c("letters", "LETTERS", "pi"))
dotdot <- grepl("^[.][.][1-9.]$", vars)
if (any(dotdot)) vars <- c(setdiff(vars[!dotdot], "..."), "...")
if ("&" %in% vars) vars <- c("&", setdiff(vars, c("...", "&")), "...")
if (length(vars) == 0) {
f0 <- function() {}
body(f0) <- x[[length(x)]]
Expand Down
121 changes: 91 additions & 30 deletions R/gsubfn.R
Expand Up @@ -21,16 +21,37 @@
# makes all letters except first in word lower case
#
gsubfn <- function(pattern, replacement, x, backref, USE.NAMES = FALSE,
ignore.case = FALSE, engine = getOption("gsubfn.engine"),
env = parent.frame(), ...)
{

if (missing(engine) || is.null(engine))
engine <- if (isTRUE(capabilities()[["tcltk"]])) "tcl" else "R"
engine <- match.arg(engine, c("tcl", "R"))
if (engine == "tcl") stopifnot(require(tcltk))

here <- environment()

if (missing(replacement)) here$replacement <- function(...)
eval(parse(text = paste(..., sep = "")), env)

if (is.character(replacement))
return(base::gsub(pattern, replacement, x, ...))
if (is.character(replacement)) {
if (engine == "R")
return(base::gsub(pattern, replacement, x, ...))
else {
f <- function(x) {
tcl("set", "pattern", pattern)
tcl("set", "replacement", replacement)
tcl("set", "x", x)
s <- if (ignore.case) {
'set r [regsub -all -nocase -- $pattern $x $replacement]'
} else 'set r [regsub -all -- $pattern $x $replacement]'
tclvalue(.Tcl(s))
}
x[] <- sapply(x, f)
return(x)
}
}

if (is.list(replacement)) {
values.replacement <- replacement
Expand All @@ -45,18 +66,8 @@ gsubfn <- function(pattern, replacement, x, backref, USE.NAMES = FALSE,
# if (inherits(replacement, "formula")) replacement <- as.function(replacement)
if (missing(pattern)) pattern <- "[$]([[:alpha:]][[:alnum:].]*)|`([^`]+)`"
pattern <- as.character(pattern)
# i is 1 if the entire match is passed and 2 otherwise.
# j is 1 plus the number of backreferences
if (missing(backref) || is.null(backref)) {
noparen <- base::gsub("\\\\.", "", pattern)
noparen <- base::gsub("\\[[^\\]]*\\]", "", noparen)
j <- nchar(base::gsub("[^(]","", noparen))+1
i <- min(2, j)
} else {
i <- as.numeric(backref < 0) + 1
j <- abs(backref)+1
}

# proto object as replacement
e <- NULL
if (!inherits(replacement, "formula") && !is.function(replacement)) {
e <- replacement
Expand All @@ -77,50 +88,92 @@ gsubfn <- function(pattern, replacement, x, backref, USE.NAMES = FALSE,
}
here$replacement <- e$replacement
}

here$replacement <- match.funfn(replacement)

if (missing(backref) || is.null(backref)) {
noparen <- base::gsub("\\\\.", "", pattern)
noparen <- base::gsub("\\[[^\\]]*\\]", "", noparen)
backref <- nchar(base::gsub("[^(]","", noparen))
}

# if `&` is an argument then force backref to be 0 or negative
if (names(formals(here$replacement))[[1]] == "&") {
backref <- - abs(backref)
if (!is.null(e)) e$backref <- backref
}

# i is 1 if the entire match is passed and 2 otherwise.
# an extra set of parens are inserted if engine is R and backref <= 0
# no paren is the number of parentheses excluding escaped parentheses
j <- (engine == "R" && !is.null(backref) && backref <= 0) + abs(backref)
# i <- min(1, j)
i <- if (engine == "tcl" && backref <= 0) 0 else 1
# check if this next line is actually needed
j <- max(i, j)

stopifnot(is.character(pattern), is.character(x), is.function(replacement))
force(env)
gsub.function <- function(x) {
# x <- base::gsub('"', '\\\\"', x)
# x <- chartr('"', '\b', x)
# pattern <- chartr('"', '\b', pattern)
pattern <- paste("(", pattern, ")", sep = "")
if (engine == "R" && !is.null(backref) && backref <=0) {
pattern <- paste("(", pattern, ")", sep = "")
}
if (!is.null(e)) {
e$count <- 0
if ("pre" %in% ls(e)) e$pre()
}
# replace match with \1\2 \\1 \2 \\2 \2 ... \1
# replace each backref in regexp with
# \1\2 followed by backrefs separated by \2 all followed by \1
# replace each substring of x that matches pattern with
# \1\2 followed by backrefs separated by \2 all followed by \1.
# Note \\1 refers to entire match, \\2 to 1st backref, \\3 to 2nd etc.
# Using that create a string \1\2 first backref \2 second ... \1
# and perform replacement
# and perform replacement.
# For example, z <- gsub("((.)/(.))", "\001\002\\2\002\\3\001", "5/6 8/9")
# gives z = "\001\0025\0026\001 \001\0028\0029\001"
# and then split z on \1

repl <- function(i,j) {
rs <- paste('\\', seq(i,j), collapse = "\2", sep = "")
rs <- paste('\1\2', rs, '\1', sep = "")
# if backref= is too large, reduce by 1 and try again
tryCatch(base::gsub(pattern, rs, x, ...),
error = function(x) if (j > i) repl(i,j-1) else stop(x))
if (engine == "R")
tryCatch(base::gsub(pattern, rs, x, ...),
error = function(x) if (j > i) repl(i,j-1) else stop(x))
else {
tcl("set", "pattern", pattern)
tcl("set", "replacement", rs)
tcl("set", "x", x)
s <- if (ignore.case) {
'set r [regsub -all -nocase -- $pattern $x $replacement]'
} else 'set r [regsub -all -- $pattern $x $replacement]'
tryCatch(tclvalue(.Tcl(s)),
error = function(x) if (j > i) repl(i,j-1) else stop(x))
}
}
z <- repl(i,j)
z <- strsplit(z, "\1")[[1]]
# f splits string s into back references passing them to replacement fn
f <- function(s) {
if (nchar(s) > 0 && substring(s,1,1) == "\2") {
s <- sub("\2$", "\2\2", s)
L <- as.list(strsplit(s, "\2")[[1]][-1])
if (nchar(s) > 0 && substring(s,1,1) == "\2") {
s <- sub("\2$", "\2\2", s)
L <- as.list(strsplit(s, "\2")[[1]][-1])
# if (!is.null(e)) L <- c(list(e), L)
do.call(replacement, L)
do.call(replacement, L)
} else s
}
z <- paste(sapply(z, f), collapse = "")
if (!is.null(e) && "post" %in% ls(e)) e$post()
z
# gsub('\b', '\\\\"', z)
}
# debug(gsub.function)
sapply(x, gsub.function, USE.NAMES = USE.NAMES)
}

ostrapply <-
function (X, pattern, FUN = function(x, ...) x, ...,
function (X, pattern, FUN = function(x, ...) x, ..., empty = NULL,
simplify = FALSE, USE.NAMES = FALSE, combine = c) {
here <- environment()
combine <- match.funfn(combine)
Expand Down Expand Up @@ -179,7 +232,7 @@ function (X, pattern, FUN = function(x, ...) x, ...,
}
)
}
ff <- function(x) { gsubfn(pattern, p, x, ...); p$v }
ff <- function(x) { gsubfn(pattern, p, x, engine = "R", ...); p$v }
result <- sapply(X, ff,
simplify = is.logical(simplify) && simplify, USE.NAMES = USE.NAMES)
if (is.logical(simplify)) result else {
Expand All @@ -189,15 +242,21 @@ function (X, pattern, FUN = function(x, ...) x, ...,

strapply <-
function (X, pattern, FUN = function(x, ...) x, backref = NULL, ...,
ignore.case = FALSE, perl = FALSE,
engine = if (isTRUE(capabilities()[["tcltk"]])) "tcl" else "R",
empty = NULL,
ignore.case = FALSE, perl = FALSE, engine = getOption("gsubfn.engine"),
simplify = FALSE, USE.NAMES = FALSE, combine = c) {
combine <- match.funfn(combine)
stopifnot(!missing(pattern))
pattern <- as.character(pattern)

if (missing(engine) || is.null(engine))
engine <- if (isTRUE(capabilities()[["tcltk"]])) "tcl" else "R"
engine <- match.arg(engine, c("tcl", "R"))
if (engine == "tcl") stopifnot(require(tcltk))

if (engine == "R" || is.proto(FUN) || perl) return(ostrapply(X = X,
pattern = pattern, FUN = FUN, backref = backref,
..., perl = perl, simplify = simplify, USE.NAMES = USE.NAMES,
..., empty = empty, perl = perl, simplify = simplify, USE.NAMES = USE.NAMES,
combine = combine))
stopifnot(engine == "tcl", require(tcltk))
if (is.proto(FUN)) {
Expand All @@ -216,8 +275,10 @@ function (X, pattern, FUN = function(x, ...) x, backref = NULL, ...,
} else {
FUN <- match.funfn(FUN)
}
# ff is called for each component of the vector of strings
ff <- function(x) {
s <- strapply1(x, pattern, backref, ignore.case)
if (length(s) == 0 && !is.null(empty)) s <- matrix(empty, 1)
L <- lapply(seq_len(ncol(s)), function(j) {
combine(do.call(FUN, as.list(s[, j]))) })
# combine(do.call(FUN, list(s[, j]))) })
Expand Down Expand Up @@ -248,7 +309,7 @@ strapply1 <- function(x, e, backref, ignore.case = FALSE) {
if (about > 1) out[-1,, drop = FALSE]
else out
} else {
mn <- 1 + backref > 0
mn <- 1 + (backref < 0)
mx <- min(abs(backref) + 1, about)
out[seq(mn, mx),, drop = FALSE]
}
Expand Down
24 changes: 24 additions & 0 deletions inst/NEWS
@@ -1,3 +1,27 @@
Changes in 0.5-3

o new argument, empty, on strapply

o gsubfn now has an engine argument. This was already the case
with strapply.

o new global option "gsubfn.engine" whose value can be "tcl" or "R". It is
used as the default value for the gsubfn and strappy engine argument. If
the option is not set then gsubfn and strapply default to the "tcl"
regular expression engine if the current R installation has tcltk
capability and "R" otherwise.

o gsubfn pattern is no longer parenthesized internally if (1) engine = "tcl"
or if (2) engine = "R" and backref > 0. See the example of replacing
double characters in ?gsubfn .

o if the function in gsubfn or strapply has an `&` argument then backref
will be taken to be negative and the entire match passed through that arg.

o in as.function.formula if any of ..1 through ..9 are found as free
variables then ... is used as a trailng argument instead. If `&` is a
free variable then it is placed at the beginning of the argument list.

Changes in 0.5-2

o bug fixes
Expand Down
Binary file modified inst/doc/Rplots.pdf
Binary file not shown.
Binary file added inst/doc/gsubfn-24.05.2010--0.02.16.66.bck.pdf
Binary file not shown.
1 change: 0 additions & 1 deletion inst/doc/gsubfn.R
Expand Up @@ -179,7 +179,6 @@ set.seed(1)
X <- data.frame(X = rnorm(24), W = runif(24), A = gl(2, 1, 24), B = gl(2, 2, 24))
fn$aggregate(1:nrow(X), X[3:4], i ~ weighted.mean(X[i,1], X[i,2]))

fn$by(X, X[3:4], ~ data.frame(wmean = weighted.mean(x[1], x[2]), x[1, 3:4]), simplify = rbind)


###################################################
Expand Down
2 changes: 1 addition & 1 deletion inst/doc/gsubfn.Rnw
Expand Up @@ -477,7 +477,7 @@ passing it to \code{paste0}.
It uses the zero-lookahead perl style pattern matching expression.
<<gsubfn-paste0>>=
strapply(' a b c d e f ', ' [a-z](?=( [a-z] ))', paste0, perl = TRUE)[[1]]
strapply(' a b c d e f ', ' [a-z](?=( [a-z] ))', paste0)[[1]]
@
Expand Down
Binary file modified inst/doc/gsubfn.pdf
Binary file not shown.
13 changes: 13 additions & 0 deletions man/as.function.formula.Rd
Expand Up @@ -19,6 +19,13 @@
no left hand side the free variables on the right, in the
order encountered are used as the arguments. \code{letters},
\code{LETTERS} and \code{pi} are ignored and not used as arguments.
If there is no left hand side and
any of \code{..1}, \code{..2}, ..., \code{..9} are found
as free variables then they are not used as arguments but \code{...}
is used as a trailing argument instead.
If there is no left hand side and \code{`&`} is found as a free variable
then that variable is used as the first argument and \code{...} is added
as the last argument.
If the left hand side is \code{0} then the function is created as
a zero argument function.
}
Expand All @@ -27,8 +34,13 @@ all have lower operator precdence than \code{~} so function bodies that
contain them typically must be surrounded with \code{{...}}.}
\seealso{ \code{\link[base]{Syntax}}.}
\examples{
old.options <- options(keep.source = FALSE)
as.function(~ as.numeric(x) + as.numeric(y))
as.function(x + y ~ as.numeric(x) + as.numeric(y)) # same
as.function(~ ..1 + ..2)
# the replacement function in gsubfn uses as.function.formula to
# interpret formulas as functions. Here we insert ! after each digit.
gsubfn("[0-9]", ~ paste0(`&`, "!"), "ab4cd5")
\dontrun{
# example where function body must be surrounded with {...}
# due to use of <<-. See warning section above.
Expand All @@ -37,5 +49,6 @@ fn$tryCatch( warning("a warning"),
warning = w ~ { mywarn <<- conditionMessage(w)})
print(mywarn)
}
options(old.options)
}
\keyword{ character }
4 changes: 0 additions & 4 deletions man/gsubfn-package.Rd
Expand Up @@ -71,10 +71,6 @@ gsubfn("[[:digit:]]+", function(x) as.numeric(x)+1, "(10 20)(100 30)")
# same
gsubfn("[[:digit:]]+", ~ as.numeric(x)+1, "(10 20)(100 30)")

# place {1} after first word, {2} after second word
p <- proto(fun = function(this, x) paste0(x, "{", count, "}" ))
gsubfn("\\\\w+", p, "hello world")

# replace each number with its cumulative sum
pcumsum <- proto(pre = function(this) this$sum <- 0,
fun = function(this, x) { sum <<- sum + as.numeric(x) }
Expand Down

0 comments on commit f82a016

Please sign in to comment.