Permalink
Cannot retrieve contributors at this time
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
433 lines (417 sloc)
14.3 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # checking for valid unreserved names | |
| # this one is not vectorized | |
| # also don't allow dot to be used here as remapping that is problem in magrittr pipelines | |
| # this is essentially treating "." as reserved (which is more compatible with magrittr) | |
| # from: https://stackoverflow.com/questions/8396577/check-if-character-value-is-a-valid-r-object-name | |
| isValidAndUnreservedName <- function(string) { | |
| if(is.null(string)) { | |
| return(FALSE) | |
| } | |
| (is.character(string)) && | |
| (length(string)==1) && | |
| (string!='.') && | |
| (make.names(string,unique = FALSE, allow_ = TRUE) == string) | |
| } | |
| #' Restrict an alias mapping list to things that look like name assignments | |
| #' | |
| #' @param alias mapping list | |
| #' @param restrictToAllCaps logical, if true only use all-capitalized keys | |
| #' @return string to string mapping | |
| #' | |
| #' @examples | |
| #' | |
| #' alias <- list(region= 'east', str= "'seven'") | |
| #' aliasR <- restrictToNameAssignments(alias) | |
| #' print(aliasR) | |
| #' | |
| #' | |
| #' @export | |
| #' | |
| restrictToNameAssignments <- function(alias, restrictToAllCaps= FALSE) { | |
| # make sure alias is a list (not a named vector) | |
| alias <- as.list(alias) | |
| usableEntries <- vapply(names(alias), | |
| function(ai) { | |
| vi <- alias[[ai]] | |
| if(is.name(vi)) { | |
| vi <- as.character(vi) | |
| } | |
| isValidAndUnreservedName(ai) && isValidAndUnreservedName(vi) && | |
| ( (!restrictToAllCaps) || (toupper(ai)==ai)) | |
| }, | |
| logical(1)) | |
| # return sublist | |
| alias[usableEntries] | |
| } | |
| prepareAlias <- function(alias, strict) { | |
| # make sure alias is a list (not a named vector) | |
| alias <- as.list(alias) | |
| # skip any NULL slots | |
| nulls <- vapply(names(alias), is.null, logical(1)) | | |
| vapply(alias, is.null, logical(1)) | |
| alias <- alias[!nulls] | |
| if (length(unique(names(alias))) != length(names(alias))) { | |
| stop('wrapr::prepareAlias alias keys must be unique') | |
| } | |
| if(strict) { | |
| if ('.' %in% c(names(alias),as.character(alias))) { | |
| stop("wrapr::prepareAlias can not map to/from '.'") | |
| } | |
| } | |
| for (ni in names(alias)) { | |
| if (is.null(ni)) { | |
| stop('wrapr:let alias keys must not be null') | |
| } | |
| if (is.na(ni)) { | |
| stop('wrapr:let alias keys must not be NA') | |
| } | |
| if (!is.character(ni)) { | |
| stop('wrapr:let alias keys must all be strings') | |
| } | |
| if (length(ni) != 1) { | |
| stop('wrapr:let alias keys must all be scalars') | |
| } | |
| if (nchar(ni) <= 0) { | |
| stop('wrapr:let alias keys must be non-empty string') | |
| } | |
| if (strict && (!isValidAndUnreservedName(ni))) { | |
| stop(paste('wrapr:let alias key not a valid name: "', ni, '"')) | |
| } | |
| vi <- alias[[ni]] | |
| if (is.null(vi)) { | |
| stop('wrapr:let alias values must not be null') | |
| } | |
| if (is.name(vi)) { | |
| vi <- as.character(vi) | |
| } | |
| if (is.na(vi)) { | |
| stop('wrapr:let alias values must not be NA') | |
| } | |
| if (!is.character(vi)) { | |
| stop(paste('wrapr:let alias values must all be strings or names (', | |
| ni,'is class:', paste(class(vi), collapse=', '), ')')) | |
| } | |
| if (length(vi) != 1) { | |
| stop('wrapr:let alias values must all be single strings (not arrays or null)') | |
| } | |
| if (nchar(vi) <= 0) { | |
| stop('wrapr:let alias values must not be empty string') | |
| } | |
| if (strict && (!isValidAndUnreservedName(vi))) { | |
| stop(paste('wrapr:let alias value not a valid name: "', vi, '"')) | |
| } | |
| } | |
| alias <- lapply(alias, as.character) | |
| alias | |
| } | |
| #' Substitute text (note text can be a vector). | |
| #' | |
| #' @param alias mapping named list/vector to strings/names or general | |
| #' @param strexpr character vector source text to be re-written | |
| #' @return parsed R expression with substitutions | |
| #' | |
| #' @noRd | |
| #' | |
| letprep_str <- function(alias, strexpr) { | |
| if(!is.character(strexpr)) { | |
| stop("wrapr::letprep_str strexpr must be a character array") | |
| } | |
| body <- strexpr | |
| if(length(alias)>0) { | |
| # find a token not in alias or block | |
| testText <- paste(paste(c(names(alias), as.character(alias)), collapse = ' '), | |
| strexpr) | |
| tok <- "WRAPR_TOK" | |
| while(length(grep(tok,testText))>0) { | |
| tok <- paste0("WRAPR_TOK_",paste(sample(LETTERS, 15, replace = TRUE), | |
| collapse = '')) | |
| } | |
| # re-write the parse tree and prepare for execution in 2 stages to allows swaps | |
| alias1 <- paste(tok, seq_len(length(alias)), sep= '_') | |
| names(alias1) <- names(alias) | |
| alias2 <- as.character(alias) | |
| names(alias2) <- as.character(alias1) | |
| for(aliasi in list(alias1, alias2)) { | |
| for (ni in names(aliasi)) { | |
| value <- aliasi[[ni]] | |
| if(!is.null(value)) { | |
| value <- as.character(value) | |
| if(ni!=value) { | |
| pattern <- paste0("\\b", ni, "\\b") | |
| body <- gsub(pattern, value, body) | |
| } | |
| } | |
| } | |
| } | |
| } | |
| parse(text = body) | |
| } | |
| #' Substitute language elements. | |
| #' | |
| #' @param alias mapping named list/vector to strings/names or general | |
| #' @param lexpr language item | |
| #' @return R language element with substitutions | |
| #' | |
| #' @noRd | |
| #' | |
| letprep_lang <- function(alias, lexpr) { | |
| nexpr <- lexpr | |
| n <- length(nexpr) | |
| # just in case (establishes an invarient of n>=1) | |
| if(n<=0) { | |
| return(nexpr) | |
| } | |
| # left-hand sides of lists/calls are represented as keys | |
| nms <- names(nexpr) | |
| if(length(nms)>0) { | |
| for(i in seq_len(length(nms))) { | |
| ki <- as.character(nms[[i]]) | |
| if((length(ki)>0)&&(nchar(ki)>0)) { | |
| ri <- alias[[ki]] | |
| if((length(ri)>0)&&(ri!=ki)) { | |
| nms[[i]] <- ri | |
| } | |
| } | |
| } | |
| names(nexpr) <- nms | |
| } | |
| # special cases | |
| if(is.call(nexpr)) { | |
| callName <- as.character(nexpr[[1]]) | |
| if(length(callName)==1) { | |
| # get into special cases, | |
| # detect them very strictly and return out of them | |
| if((callName=='$') && (n==3)) { | |
| # special case a$"b" | |
| # let(c(x='y'), d$"x", eval=FALSE) | |
| # know length should be 3 from: | |
| # do.call('$',list(data.frame(x=1:3),'x','z')) | |
| # # Error in list(x = 1:3)$x : 3 arguments passed to '$' which requires 2 | |
| # know the 3rd argument can be treated as a name from: | |
| # data.frame(x=1:3)$1 | |
| # # Error: unexpected numeric constant in "data.frame(x=1:3)$1" | |
| nexpr[[2]] <- letprep_lang(alias, nexpr[[2]]) | |
| nexpr[[3]] <- letprep_lang(alias, as.name(nexpr[[3]])) | |
| return(nexpr) | |
| } | |
| } | |
| } | |
| # basic recurse, establish invariant n==1 | |
| if(n>1) { | |
| for(i in seq_len(n)) { | |
| # empty symbol (used to denote blank arguments like d[,1]) imitates missing on self call | |
| # also assigning null shortens expressions | |
| blankSym <- is.null(nexpr[[i]]) || | |
| (is.symbol(nexpr[[i]]) && (nchar(as.character(nexpr[[i]]))<=0)) | |
| if(!blankSym) { | |
| subi <- letprep_lang(alias, nexpr[[i]]) | |
| if(!is.null(subi)) { | |
| nexpr[[i]] <- subi | |
| } | |
| } | |
| } | |
| return(nexpr) | |
| } | |
| # don't re-map quoted strings (except above) | |
| if(is.character(nexpr)) { | |
| return(nexpr) | |
| } | |
| # this is the main re-mapper | |
| if(is.symbol(nexpr)) { # same as is.name() | |
| # symbol is not subsettable, so length==1 | |
| # as.name('x')[[1]] | |
| # # Error in as.name("x")[[1]] : object of type 'symbol' is not subsettable | |
| # and can't have names | |
| # names(as.name("x")) <- 'a' | |
| # ## Error in names(as.name("x")) <- "a" : | |
| # ## target of assignment expands to non-language object | |
| ki <- as.character(nexpr) | |
| ri <- alias[[ki]] | |
| if((length(ri)>0)&&(ri!=ki)) { | |
| return(as.name(ri)) | |
| } | |
| return(nexpr) | |
| } | |
| # fall-back | |
| return(nexpr) | |
| } | |
| #' Execute expr with name substitutions specified in alias. | |
| #' | |
| #' \code{let} implements a mapping from desired names (names used directly in the expr code) to names used in the data. | |
| #' Mnemonic: "expr code symbols are on the left, external data and function argument names are on the right." | |
| #' | |
| #' Please see the \code{wrapr} \code{vignette} for some discussion of let and crossing function call boundaries: \code{vignette('wrapr','wrapr')}. | |
| #' For formal documentation please see \url{https://github.com/WinVector/wrapr/blob/master/extras/wrapr_let.pdf}. | |
| #' Transformation is performed by substitution, so please be wary of unintended name collisions or aliasing. | |
| #' | |
| #' Something like \code{let} is only useful to get control of a function that is parameterized | |
| #' (in the sense it take column names) but non-standard (in that it takes column names from | |
| #' non-standard evaluation argument name capture, and not as simple variables or parameters). So \code{wrapr:let} is not | |
| #' useful for non-parameterized functions (functions that work only over values such as \code{base::sum}), | |
| #' and not useful for functions take parameters in straightforward way (such as \code{base::merge}'s "\code{by}" argument). | |
| #' \code{dplyr::mutate} is an example where | |
| #' we can use a \code{let} helper. \code{dplyr::mutate} is | |
| #' parameterized (in the sense it can work over user supplied columns and expressions), but column names are captured through non-standard evaluation | |
| #' (and it rapidly becomes unwieldy to use complex formulas with the standard evaluation equivalent \code{dplyr::mutate_}). | |
| #' \code{alias} can not include the symbol "\code{.}". | |
| #' | |
| #' | |
| #' The intent from is from the user perspective to have (if | |
| #' \code{a <- 1; b <- 2}): | |
| #' \code{let(c(z = 'a'), z+b)} to behave a lot like | |
| #' \code{eval(substitute(z+b, c(z=quote(a))))}. | |
| #' | |
| #' \code{let} deliberately checks that it is mapping only to legal \code{R} names; | |
| #' this is to discourage the use of \code{let} to make names to arbitrary values, as | |
| #' that is the more properly left to \code{R}'s environment systems. | |
| #' \code{let} is intended to transform | |
| #' "tame" variable and column names to "tame" variable and column names. Substitution | |
| #' outcomes that are not valid simple \code{R} variable names (produced with out use of | |
| #' back-ticks) are forbidden. It is suggested that substitution targets be written | |
| #' \code{ALL_CAPS} style to make them stand out. | |
| #' | |
| #' \code{let} was inspired by \code{gtools:strmacro()}. | |
| #' Please see \url{https://github.com/WinVector/wrapr/blob/master/extras/MacrosInR.md} for a discussion of macro tools in \code{R}. | |
| #' | |
| #' | |
| #' @param alias mapping from free names in expr to target names to use (mapping have both unique names and unique values). | |
| #' @param expr block to prepare for execution. | |
| #' @param ... force later arguments to be bound by name. | |
| #' @param envir environment to work in. | |
| #' @param subsMethod character substitution method, one of 'langsubs' (preferred), 'subsubs', or 'stringsubs'. | |
| #' @param strict logical if TRUE names and values must be valid un-quoted names, and not dot. | |
| #' @param eval logical if TRUE execute the re-mapped expression (else return it). | |
| #' @param debugPrint logical if TRUE print debugging information when in stringsubs mode. | |
| #' @return result of expr executed in calling environment (or expression if eval==FALSE). | |
| #' | |
| #' @seealso \code{\link[base]{bquote}}, \code{\link[base]{do.call}} | |
| #' | |
| #' @examples | |
| #' | |
| #' d <- data.frame( | |
| #' Sepal_Length=c(5.8,5.7), | |
| #' Sepal_Width=c(4.0,4.4), | |
| #' Species='setosa') | |
| #' | |
| #' mapping <- qc( | |
| #' AREA_COL = Sepal_area, | |
| #' LENGTH_COL = Sepal_Length, | |
| #' WIDTH_COL = Sepal_Width | |
| #' ) | |
| #' | |
| #' # let-block notation | |
| #' let( | |
| #' mapping, | |
| #' d %.>% | |
| #' transform(., AREA_COL = LENGTH_COL * WIDTH_COL) | |
| #' ) | |
| #' | |
| #' | |
| #' # Note: in packages can make assignment such as: | |
| #' # AREA_COL <- LENGTH_COL <- WIDTH_COL <- NULL | |
| #' # prior to code so targets don't look like unbound names. | |
| #' | |
| #' | |
| #' @export | |
| let <- function(alias, expr, | |
| ..., | |
| envir= parent.frame(), | |
| subsMethod= 'langsubs', | |
| strict= TRUE, | |
| eval= TRUE, | |
| debugPrint= FALSE) { | |
| force(envir) | |
| exprQ <- substitute(expr) # do this early before things enter local environment | |
| stop_if_dot_args(substitute(list(...)), "wrapr::let") | |
| allowedMethods <- c('langsubs', 'stringsubs', 'subsubs') | |
| if((!is.character(subsMethod)) || | |
| (length(subsMethod)!=1) || | |
| (!(subsMethod %in% allowedMethods))) { | |
| stop(paste("wrapr::let subsMethod must be one of:", | |
| paste(allowedMethods, collapse = ', '))) | |
| } | |
| alias <- prepareAlias(alias, strict=strict) | |
| exprS <- exprQ | |
| if(length(alias)>0) { | |
| if(subsMethod=='langsubs') { | |
| # recursive language implementation. | |
| # only replace matching symbols. | |
| exprS <- letprep_lang(alias, exprQ) | |
| } else if(subsMethod=='subsubs') { | |
| # substitute based solution, does not bind left-hand sides | |
| aliasN <- lapply(alias, as.name) | |
| exprS <- do.call(substitute, list(exprQ, aliasN)) | |
| } else if(subsMethod=='stringsubs') { | |
| # string substitution based implementation. | |
| # Similar to \code{gtools::strmacro} by Gregory R. Warnes. | |
| exprS <- letprep_str(alias, wrapr_deparse(exprQ)) | |
| } else { | |
| stop(paste("wrapr::let unexpected subsMethod '", subsMethod, "'")) | |
| } | |
| } | |
| if(debugPrint) { | |
| print(alias) | |
| print(exprS) | |
| } | |
| if(!eval) { | |
| return(exprS) | |
| } | |
| # try to execute expression in parent environment | |
| rm(list=setdiff(ls(), c('exprS', 'envir'))) | |
| eval(exprS, | |
| envir=envir, | |
| enclos=envir) | |
| } | |
| #' Inline let-block notation. | |
| #' | |
| #' Inline version of \code{let}-block. | |
| #' | |
| #' @param a (left argument) named character vector with target names as names, and replacement names as values. | |
| #' @param b (right argument) expression or block to evaluate under let substitution rules. | |
| #' @return evaluated block. | |
| #' | |
| #' @examples | |
| #' | |
| #' d <- data.frame( | |
| #' Sepal_Length=c(5.8,5.7), | |
| #' Sepal_Width=c(4.0,4.4), | |
| #' Species='setosa') | |
| #' | |
| #' # let-block notation | |
| #' let( | |
| #' qc( | |
| #' AREA_COL = Sepal_area, | |
| #' LENGTH_COL = Sepal_Length, | |
| #' WIDTH_COL = Sepal_Width | |
| #' ), | |
| #' d %.>% | |
| #' transform(., AREA_COL = LENGTH_COL * WIDTH_COL) | |
| #' ) | |
| #' | |
| #' # %in_block% notation | |
| #' qc( | |
| #' AREA_COL = Sepal_area, | |
| #' LENGTH_COL = Sepal_Length, | |
| #' WIDTH_COL = Sepal_Width | |
| #' ) %in_block% { | |
| #' d %.>% | |
| #' transform(., AREA_COL = LENGTH_COL * WIDTH_COL) | |
| #' } | |
| #' | |
| #' # Note: in packages can make assignment such as: | |
| #' # AREA_COL <- LENGTH_COL <- WIDTH_COL <- NULL | |
| #' # prior to code so targets don't look like unbound names. | |
| #' | |
| #' @export | |
| #' | |
| #' @seealso \code{\link{let}} | |
| #' | |
| `%in_block%` <- function(a, b) { | |
| env <- parent.frame() | |
| force(env) # probably do not need this step | |
| do.call(let, | |
| list( | |
| expr = substitute(b), | |
| alias = a, | |
| envir = env)) | |
| } | |