Skip to content

Commit

Permalink
version 0.6-2
Browse files Browse the repository at this point in the history
  • Loading branch information
ggrothendieck authored and gaborcsardi committed Mar 23, 2012
1 parent 33d33b6 commit e4f7c88
Show file tree
Hide file tree
Showing 20 changed files with 369 additions and 265 deletions.
15 changes: 8 additions & 7 deletions DESCRIPTION
@@ -1,6 +1,6 @@
Package: gsubfn
Version: 0.5-7
Date: 2011-06-06
Version: 0.6-2
Date: 2012-03-23
Title: Utilities for strings and function arguments.
Author: G. Grothendieck
Maintainer: G. Grothendieck <ggrothendieck@gmail.com>
Expand All @@ -17,11 +17,12 @@ Description: gsubfn is like gsub but can take a replacement function or
that expects another function as an input argument or functions
like cat or sql calls that may involve strings where
substitution is desirable.
Depends: R (>= 2.5.0), proto
Suggests: boot, chron, doBy, grid, lattice, quantreg, reshape2, tcltk,
zoo
ByteCompile: true
Depends: R (>= 2.14.0), proto
Suggests: boot, chron, doBy, grid, lattice, quantreg, reshape, svUnit,
tcltk, zoo
License: GPL (>= 2)
URL: http://gsubfn.googlecode.com
Packaged: 2011-06-06 21:08:59 UTC; Louis
Packaged: 2012-03-28 17:20:17 UTC; Louis
Repository: CRAN
Date/Publication: 2011-06-07 06:35:56
Date/Publication: 2012-03-28 18:13:11
30 changes: 30 additions & 0 deletions MD5
@@ -0,0 +1,30 @@
46aaf69a91703493b666f212a04f2d8d *COPYING
ec7a366d68a859e989142f1cd58e77b8 *COPYRIGHTS
d87872761fefe42e37e48234ff0d9f16 *DESCRIPTION
67ec5287bfca442488c725e68e24c155 *NAMESPACE
a32c8ee5562489c5ea6bc1b4e122562a *R/fn.R
8418e40994eb3a5d585d3ed7ebf62b58 *R/gsubfn.R
cb4b5ed44d603df198ba67c497e60a93 *R/strapplyc.R
4ee0d28766ebc2084c278e456996b633 *R/zzz.R
fd368f5b6ac08f97596a66a7041109b4 *demo/00Index
b02f0d688c7af2f2f012ebca2c3b2f66 *demo/gsubfn-chron.R
dad9498d256e19cf92af106f3bed306b *demo/gsubfn-cut.R
64e6b35d218602a5bcabf8df1861d205 *demo/gsubfn-gries.R
97109cd369ca5a2fc15b2be55177aa61 *demo/gsubfn-lower.R
93ceb63b260bf9f08398a1564a369cb2 *demo/gsubfn-si.R
10af9aa7492b9c6beb4b939a3bc4da5d *demo/gsubfn-unitTests.R
04f8a6fcfee3a5640e02cd9a324255ed *inst/ANNOUNCE
c90a8201535f86b66f213f1ffce0f202 *inst/NEWS
cd4675587428f7b6c4a52c1ff1303ee6 *inst/THANKS
5e676f40f47703f3849c9c54c13d7fc4 *inst/WISHLIST
cb61f6371fa5ba46bb6123a0c7994d59 *inst/doc/gsubfn-23.03.2012--8.55.13.11.bck.pdf
a872d9493c1b5f73eabf439f297531fd *inst/doc/gsubfn.Rnw
4e5c2749bdba3c88dad156c4d3e32fb5 *inst/doc/gsubfn.pdf
c9e1d04a2d77929861d6c4944abff5c3 *inst/sample.txt
e5c058b39406ad9f8245cce20411072f *inst/unitTests/runit.all.R
1863bf11ffc8bf378f37688240f46e18 *man/as.function.formula.Rd
7a9a388b5afa4a69bd43f326616eb7dd *man/fn.Rd
10c46a80236ea6aff5c5d3b2c1062cad *man/gsubfn-package.Rd
ce43e1646cb6ae897dd030ed30cf5a38 *man/gsubfn.Rd
b53a82b0503f4e35bf5c3858fc4c76a1 *man/match.funfn.Rd
20334eaf9d3b243dd7afd30a22e4f092 *man/strapply.Rd
10 changes: 10 additions & 0 deletions NAMESPACE
@@ -0,0 +1,10 @@
# Default NAMESPACE created by R
# Remove the previous line if you edit this file

# Export all names
exportPattern(".")

# Import all packages listed as Imports or Depends
import(
proto
)
20 changes: 5 additions & 15 deletions R/fn.R
@@ -1,12 +1,3 @@
# eval.with.vis is no longer used - withVisible is used instead
eval.with.vis <- function (expr) {
expr <- substitute(expr)
pf <- parent.frame()
tmp <- .Internal(eval.with.vis(expr, pf,
baseenv()))
tmp
}

as.function.formula <- function(x, ...) {
vars <- setdiff(all.vars(x[[2]]), c("letters", "LETTERS", "pi"))
dotdot <- grepl("^[.][.][1-9.]$", vars)
Expand Down Expand Up @@ -107,7 +98,7 @@ fn <- structure(NA, class = "fn")
if (any.chara)
for(i in seq(along = mcListE))
if (is.chara[i])
mcListE[[i]] <- gsubfn(x = substring(mcListE[[i]], 2), env = p)
mcListE[[i]] <- gsubfn(x = substring(mcListE[[i]], 2), envir = p)

# if no ~~ formulas and no \1 strings use default strategy
# of converting all formulas to functions and if no formulas
Expand All @@ -121,19 +112,18 @@ fn <- structure(NA, class = "fn")
if (any.char)
for(i in seq(along = mcListE))
if (is.char[i])
mcListE[[i]] <- gsubfn(x = mcListE[[i]], env = p)
mcListE[[i]] <- gsubfn(x = mcListE[[i]], envir = p)
}
}

# out <- do.call(FUN, args)
# thanks Duncan for eval.with.vis !!!
# out <- eval.with.vis(do.call(FUN, mcListE, env=p))
out <- withVisible(do.call(FUN, mcListE, env=p))
# out <- withVisible(FUN, mcListE, env=p))
out <- withVisible(do.call(FUN, mcListE, envir=p))
vis <- out$visible
out <- out $value
if (!is.null(simplify)) {
if(!is.list(out)) out <- list(out)
out <- eval.with.vis(do.call(simplify, out))
out <- withVisible(do.call(simplify, out))
vis <- out$visible
out <- out$value
}
Expand Down
39 changes: 19 additions & 20 deletions R/gsubfn.R
Expand Up @@ -24,19 +24,19 @@ gsubfn <- function(pattern, replacement, x, backref, USE.NAMES = FALSE,
ignore.case = FALSE, engine = getOption("gsubfn.engine"),
env = parent.frame(), ...)
{
if (isTRUE(list(...)$perl)) engine <- "R"
R.engine <- identical(engine, "R")

if (is.null(engine))
engine <- if (isTRUE(capabilities()[["tcltk"]])) "tcl" else "R"
engine <- match.arg(engine, c("tcl", "R"))
if (engine == "tcl") stopifnot(require(tcltk))

here <- environment()
here <- environment()
.Tcl <- tcltk::.Tcl
tcl <- tcltk::tcl
tclvalue <- tcltk::tclvalue

if (missing(replacement)) here$replacement <- function(...)
eval(parse(text = paste(..., sep = "")), env)

if (is.character(replacement)) {
if (engine == "R")
if (R.engine)
return(base::gsub(pattern, replacement, x, ...))
else {
f <- function(x) {
Expand Down Expand Up @@ -105,11 +105,11 @@ gsubfn <- function(pattern, replacement, x, backref, USE.NAMES = FALSE,

# cat("backref:", backref, "\n")
# Note. an extra set of parens are inserted if engine is R and backref <= 0
# no paren is the number of parentheses excluding escaped parentheses
# no of parens is the number of parentheses excluding escaped parentheses
# if engine=="R" then i=1 and j=no of backrefs + 1 for match if backref>=0
# if engine=="tcl" then i=0 if backref<0 and i=1 otherwise. j=abs(backref)
j <- (engine == "R" && !is.null(backref) && backref >= 0) + abs(backref)
i <- if (engine == "tcl" && backref >= 0) 0 else 1
# if engine!="R" then i=0 if backref<0 and i=1 otherwise. j=abs(backref)
j <- (identical(engine, "R") && !is.null(backref) && backref >= 0) + abs(backref)
i <- if (!R.engine && backref >= 0) 0 else 1
# check if this next line is actually needed
j <- max(i, j)

Expand All @@ -121,7 +121,7 @@ gsubfn <- function(pattern, replacement, x, backref, USE.NAMES = FALSE,
# x <- base::gsub('"', '\\\\"', x)
# x <- chartr('"', '\b', x)
# pattern <- chartr('"', '\b', pattern)
if (engine == "R" && !is.null(backref) && backref >=0) {
if (R.engine && !is.null(backref) && backref >=0) {
pattern <- paste("(", pattern, ")", sep = "")
}
if (!is.null(e)) {
Expand All @@ -141,7 +141,7 @@ gsubfn <- function(pattern, replacement, x, backref, USE.NAMES = FALSE,
rs <- paste('\\', seq(i,j), collapse = "\2", sep = "")
rs <- paste('\1\2', rs, '\1', sep = "")
# if backref= is too large, reduce by 1 and try again
if (engine == "R")
if (R.engine)
tryCatch(base::gsub(pattern, rs, x, ignore.case = ignore.case, ...),
error = function(x) if (j > i) repl(i,j-1) else stop(x))
else {
Expand Down Expand Up @@ -237,7 +237,7 @@ function (X, pattern, FUN = function(x, ...) x, ignore.case = FALSE, ..., empty
}
ff <- function(x, ...) { gsubfn(pattern, p, x, engine = "R", ignore.case = ignore.case, ...); p$v }
result <- sapply(X, ff, ...,
simplify = is.logical(simplify) && simplify, USE.NAMES = USE.NAMES)
simplify = isTRUE(simplify), USE.NAMES = USE.NAMES)
if (is.logical(simplify)) result else {
do.call(match.funfn(simplify), result)
}
Expand All @@ -252,17 +252,13 @@ function (X, pattern, FUN = function(x, ...) x, backref = NULL, ...,
stopifnot(!missing(pattern))
pattern <- as.character(pattern)

if (missing(engine) || is.null(engine))
engine <- if (isTRUE(capabilities()[["tcltk"]])) "tcl" else "R"
engine <- match.arg(engine, c("tcl", "R"))
if (engine == "tcl") stopifnot(require(tcltk))
if (is.proto(FUN) || perl) engine <- "R"

if (engine == "R" || is.proto(FUN) || perl)
if (identical(engine, "R"))
return(ostrapply(X = X, ignore.case = ignore.case,
pattern = pattern, FUN = FUN, backref = backref,
..., empty = empty, perl = perl, simplify = simplify, USE.NAMES = USE.NAMES,
combine = combine))
stopifnot(engine == "tcl", require(tcltk))
if (is.proto(FUN)) {
# TODO
} else if (is.character(FUN)) {
Expand Down Expand Up @@ -297,6 +293,9 @@ function (X, pattern, FUN = function(x, ...) x, backref = NULL, ...,
}

strapply1 <- function(x, e, backref, ignore.case = FALSE) {
.Tcl <- tcltk::.Tcl
tcl <- tcltk::tcl
tclvalue <- tcltk::tclvalue
tcl("set", "e", e)
tcl("set", "x", x)
.Tcl('set about [regexp -about -- $e]')
Expand Down
64 changes: 64 additions & 0 deletions R/strapplyc.R
@@ -0,0 +1,64 @@
library(tcltk)

# x is name of a tcl variable holding list of character vectors
tclList2R <- function(x, convert = as.character) {
.Tcl <- tcltk::.Tcl
len <- as.integer(.Tcl(sprintf("llength $%s", x)))
f <- function(i) convert(.Tcl(sprintf("lindex $%s %d", x, i)))
lapply(seq(0, len-1), f)
}

# high performance strapply with hard coded FUN=c. Guts in tcl.
strapplyc <- function(X, pattern, backref, ignore.case = FALSE, simplify = FALSE, USE.NAMES = FALSE, engine = getOption("gsubfn.engine")) {
if (identical(engine, "R")) return(
strapply(X = X, pattern = pattern, FUN = "c", backref = backref,
ignore.case = ignore.case, simplify = simplify,
USE.NAMES = USE.NAMES, engine = engine)
)
.Tcl <- tcltk::.Tcl
tcl <- tcltk::tcl
tcl("set", "X", as.tclObj(X))
tcl("set", "pattern", pattern)
tcl("set", "nocase", if (ignore.case) "-nocase" else "")
if (missing(backref) || is.null(backref) || is.na(backref)) backref <- 999
tcl("set", "backref", backref)
.Tcl("set about [regexp -about -- $pattern]")
.Tcl("set about [lindex $about 0]")
.Tcl("if { min($about, $backref) <= 0 } { set mn 0 } else { set mn 1 }")
.Tcl("set mx [expr min($about, abs($backref))]")
s <- paste('set result {}
set k [expr $about + 1]
if { $about == 0 || $about <= -$backref} {
# this leg of the "if" returns everything from regexp so we
# can avoid the extraction subloop of the "else" leg for speed
foreach item $X {
# {*} is new feature in tcl 8.5 to add level of substitution
set cmd [list regexp -all -inline {*}$nocase -- $pattern $item]
set res [{*}$cmd]
lappend result $res
}
} else {
foreach item $X {
# {*} is new feature in tcl 8.5 that adds level of substitution
set cmd [list regexp -all -inline {*}$nocase -- $pattern $item]
set cmdout [{*}$cmd]
set imin $mn
set imax $mx
set res {}
while {$imax < [llength $cmdout]} {
lappend res [lrange $cmdout $imin $imax]
incr imin $k
incr imax $k
}
lappend result [concat {*}$res]
}
}')
.Tcl(s)
out <- tclList2R("result")

result <- sapply(out, identity, simplify = isTRUE(simplify),
USE.NAMES = USE.NAMES)
if (is.logical(simplify)) result else {
do.call(match.funfn(simplify), result)
}
}
16 changes: 16 additions & 0 deletions R/zzz.R
@@ -0,0 +1,16 @@

.onLoad <- function(libname, pkgname) {
gsubfn.engine <- getOption("gsubfn.engine")
# if gsubfn.engine was not set to "R" then check if tcltk can be used
if ( ! identical(gsubfn.engine, "R") ) {
tcltk.ok <- isTRUE(capabilities()[["tcltk"]]) &&
requireNamespace("tcltk", quietly = TRUE)
if ( ! tcltk.ok ) {
options(gsubfn.engine = "R")
warning('Could not load tcltk. Will use slower R code instead.')
}
}

}

# .onUnload <- function(libpath) {}
2 changes: 2 additions & 0 deletions demo/00Index
@@ -1,4 +1,6 @@
gsubfn-chron read in zoo data with chron datetimes.
gsubfn-cut Use strapply to extract endpoints from cut labels.
gsubfn-gries Stefan Gries' Linguistics code.
gsubfn-lower Makes all letters in words lower case except first.
gsubfn-si Replace SI scale letter with number.
gsubfn-unitTests Run gsubfn unit test suite.
4 changes: 4 additions & 0 deletions demo/gsubfn-lower.R
@@ -0,0 +1,4 @@

# makes all letters except first in word lower case
gsubfn("\\B.", tolower, "I LIKE A BANANA SPLIT", perl = TRUE)

8 changes: 8 additions & 0 deletions demo/gsubfn-unitTests.R
@@ -0,0 +1,8 @@
library(gsubfn)
library(svUnit)
gsubfn.tests <- system.file("unitTests", "runit.all.R", package = "gsubfn")
cat("Running:", gsubfn.tests, "\n")
source(gsubfn.tests)
clearLog()
test.all()
Log()
26 changes: 24 additions & 2 deletions inst/NEWS
@@ -1,6 +1,28 @@
Changes in 0.5-7
Changes in 0.6-2

o minor changes to address R CMD check. No change in functionality.
o package is now byte compiled hence depends on 2.14.0 or higher

o if tcltk can be loaded it is loaded at startup. If not, the gsubfn.engine
option is set to "R", a message is given and subsequent calls will use
slower R code instead of tcltk. Previously this check was done at run
time in the individual functions.

o strapplyc now calls strapply if gsubfn.engine = "R"

o now uses withVisible instead of internal function eval.with.vis. This
internal change was made to satisfy R 2.15.0 and should not affect users.

o added more unit tests

Changes in 0.6

o strapplyc added, a fast tcl-only version of strapply specialized to FUN=c.

o bug fix. perl was ignored when engine = "tcl".

o unit test suite added. demo("gsubfn-unitTests") will run it.

o added NAMESPACE

Changes in 0.5-6

Expand Down

0 comments on commit e4f7c88

Please sign in to comment.