diff --git a/NAMESPACE b/NAMESPACE index 2374b28..47e8dbc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export("%eq%") export("%p%") export("labels<-") export(Sys.path) +export(changed) export(copy.attributes) export(dir.find) export(file.find) diff --git a/NEWS.md b/NEWS.md index 68dd73e..7817036 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,8 @@ * Fix for CRAN. * Added infinity symbol "infin" keyword to `symbol()` function. +* Added `changed()` function to identify grouping boundaries. +* Fixed bug on sort that was causing factors to be ignored in some circumstances. # common 1.0.8 diff --git a/R/other_functions.R b/R/other_functions.R index 8937ab8..a6c092a 100644 --- a/R/other_functions.R +++ b/R/other_functions.R @@ -347,3 +347,212 @@ copy.attributes <- function(source, target) { return(ret) } + +# Changed Functions ------------------------------------------------------- + + + +#' @title Identify changed values +#' @description The \code{changed} function identifies changes in a vector or +#' data frame. The function is used to locate grouping boundaries. It will +#' return a TRUE each time the current value is different from the previous +#' value. The \code{changed} function is similar to the Base R \code{duplicated} +#' function, except \code{changed} the function will return TRUE even if +#' the changed value is not unique. +#' @details +#' For a data frame, +#' by default, the function will return another data frame with an equal +#' number of change indicator columns. The column names +#' will be the original column names, with a ".changed" suffix. +#' +#' To collapse +#' the multiple change indicators into one vector, use the "simplify" option. +#' In this case, the returned vector will essentially be an "or" operation +#' across all columns. +#' @param x A vector of values in which to identify changed values. +#' Also accepts a data frame. In the case of a data frame, the function +#' will use all columns. Input data can be any data type. +#' @param reverse Reverse the direction of the scan to identify the last +#' value in a group instead of the first. +#' @param simplify If the input data to the function is a data frame, +#' the simplify option will return a single vector of indicator values +#' instead of a data frame of indicator values. +#' @returns A vector of TRUE or FALSE values indicating the grouping boundaries +#' of the vector or data frame. +#' @examples +#' # Create sample vector +#' v1 <- c(1, 1, 1, 2, 2, 3, 3, 3, 1, 1) +#' +#' # Identify changed values +#' res1 <- changed(v1) +#' +#' # View results +#' res1 +#' # [1] TRUE FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRUE FALSE +#' +#' # Create sample data frame +#' v2 <- c("A", "A", "A", "A", "A", "A", "B", "B", "B", "B") +#' dat <- data.frame(v1, v2) +#' +#' # View original data frame +#' dat +#' # v1 v2 +#' # 1 1 A +#' # 2 1 A +#' # 3 1 A +#' # 4 2 A +#' # 5 2 A +#' # 6 3 A +#' # 7 3 B +#' # 8 3 B +#' # 9 1 B +#' # 10 1 B +#' +#' # Get changed values for each column +#' res2 <- changed(dat) +#' +#' # View results +#' res2 +#' # v1.changed v2.changed +#' # 1 TRUE TRUE +#' # 2 FALSE FALSE +#' # 3 FALSE FALSE +#' # 4 TRUE FALSE +#' # 5 FALSE FALSE +#' # 6 TRUE FALSE +#' # 7 FALSE TRUE +#' # 8 FALSE FALSE +#' # 9 TRUE FALSE +#' # 10 FALSE FALSE +#' +#' # Get changed values for all columns +#' res3 <- changed(dat, simplify = TRUE) +#' +#' # View results +#' res3 +#' # [1] TRUE FALSE FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE +#' +#' # Get last items in each group instead of first +#' res4 <- changed(dat, reverse = TRUE) +#' +#' # View results +#' res4 +#' # v1.changed v2.changed +#' # 1 FALSE FALSE +#' # 2 FALSE FALSE +#' # 3 TRUE FALSE +#' # 4 FALSE FALSE +#' # 5 TRUE FALSE +#' # 6 FALSE TRUE +#' # 7 FALSE FALSE +#' # 8 TRUE FALSE +#' # 9 FALSE FALSE +#' # 10 TRUE TRUE +#' @export +changed <- function(x, reverse = FALSE, simplify = FALSE) { + + ret <- NULL + + if (!is.null(x)) { + if (is.data.frame(x)) { + + retv <- list() + + for (i in seq_len(length(x))) { + + retv[[i]] <- changedv(x[[i]], reverse) + } + + ret <- as.data.frame(retv) + names(ret) <- paste0(names(x), ".changed") + + if (simplify) { + ret <- collapsedf(ret) + } + + } else { + + ret <- changedv(x, reverse) + } + + } + + return(ret) +} + +# Vector version +changedv <- function(x, reverse = FALSE) { + + + vect <- x + if (reverse == TRUE) { + + vect <- rev(x) + } + + # Create lag vector + vect_lag <- c(NA, vect[seq(1, length(vect) - 1)]) + + # Identify changes + ret<- ifelse(compint(vect, vect_lag), FALSE, TRUE) + + ret[1] <- TRUE + + if (reverse == TRUE) { + + ret <- rev(ret) + } + + return(ret) +} + + +compint <- Vectorize(function(x, y) { + + ret <- FALSE + + if (all(is.na(x) & is.na(y))) { + ret <- TRUE + } else if (all(is.na(x) | is.na(y))) { + + ret <- FALSE + + } else if (all(x == y)) { + + ret <- TRUE + } + + return(ret) + +}, USE.NAMES = FALSE, SIMPLIFY = TRUE) + + + +collapsedf <- function(df) { + + ret <- df + + if (!is.null(df)) { + + if (length(df) > 1) { + + ret <- df[[1]] + for (i in seq(2, length(df))) { + + ret <- collapse(ret, df[[i]]) + + } + } + + } + + return(ret) +} + +collapse <- function(x, y) { + + ret <- x | y + + return(ret) + +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 2f70df4..339a8e9 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -33,6 +33,7 @@ reference: - roundup - find.names - copy.attributes + - changed navbar: type: inverse diff --git a/docs/404.html b/docs/404.html index 17a41ff..4382435 100644 --- a/docs/404.html +++ b/docs/404.html @@ -39,7 +39,7 @@ common - 1.0.8 + 1.0.9 diff --git a/docs/LICENSE.html b/docs/LICENSE.html index 2306975..55d1d45 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -17,7 +17,7 @@ common - 1.0.8 + 1.0.9 diff --git a/docs/articles/common.html b/docs/articles/common.html index 6175700..e269fe4 100644 --- a/docs/articles/common.html +++ b/docs/articles/common.html @@ -40,7 +40,7 @@ common - 1.0.8 + 1.0.9 @@ -135,6 +135,9 @@

common

  • spaces(): A function to create a string of blank spaces.
  • +
  • +changed(): A function to +identify changed values in a vector or data frame.
  • A generalized NSE quoting function @@ -605,10 +608,10 @@

    Special Symbolssupsc() and subsc() functions in that it looks up a UTF-8 character. Instead of providing a direct 1 to 1 translation, however, it looks up the UTF-8 character based on a keyword.

    -

    For example, the ‘reg’ keyword looks up the registered trademark -symbol. The ‘ne’ keyword looks up the symbol for not equals. These -keyword names follow HTML conventions. The function supports keywords -for trademarks, currencies, mathematical symbols, logical symbols, Greek +

    For example, the ’regkeyword looks up the registered trademark +symbol. The ’nekeyword looks up the symbol for not equals. These keyword +names follow HTML conventions. The function supports keywords for +trademarks, currencies, mathematical symbols, logical symbols, Greek letters, and more. See the symbol() documentation for a complete list of supported keywords.

    top

    @@ -632,6 +635,100 @@

    Repeating spacesstr <- "Left" %p% spaces(25) %p% "Right" str # [1] "Left Right"

    +

    top

    + +
    +

    Identify changed values +

    +

    Base R has a duplicated() function that is sometimes +used to identify grouping boundaries in a vector. But this function also +performs a unique() operation on the vector, such that not +all boundaries return a TRUE value. Observe the following:

    +
    # Create sample vector
    +v1 <- c(1, 1, 1, 2, 2, 3, 3, 3, 1, 1)
    +
    +# Identify duplicated values
    +res1 <- !duplicated(v1)
    +
    +# View duplicated results
    +res1
    +# [1] TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE
    +

    Notice that the “1” at position nine does not return TRUE.

    +

    Now lets run the same vector through the changed() +function:

    +
    # Identify changed values
    +res2 <- changed(v1)
    +
    +# View changed results
    +res2
    +# [1] TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE
    +

    This time, the changed() function identified each time +the vector changed value, whether or not the value had appeared +previously.

    +

    This function can also be used on data frames:

    +
    # Create sample data frame
    +v2 <- c("A", "A", "A", "A", "A", "A", "B", "B", "B", "B")
    +dat <- data.frame(v1, v2)
    +
    +# View original data frame
    +dat
    +#    v1 v2
    +# 1   1  A
    +# 2   1  A
    +# 3   1  A
    +# 4   2  A
    +# 5   2  A
    +# 6   3  A
    +# 7   3  B
    +# 8   3  B
    +# 9   1  B
    +# 10  1  B
    +
    +# Get changed values for each column
    +res3 <- changed(dat)
    +
    +# View results
    +res3
    +#    v1.changed v2.changed
    +# 1        TRUE       TRUE
    +# 2       FALSE      FALSE
    +# 3       FALSE      FALSE
    +# 4        TRUE      FALSE
    +# 5       FALSE      FALSE
    +# 6        TRUE      FALSE
    +# 7       FALSE       TRUE
    +# 8       FALSE      FALSE
    +# 9        TRUE      FALSE
    +# 10      FALSE      FALSE
    +

    If you wish to return a single indicator vector for the combination +of all columns, use the “simplify” option.

    +
    # Get changed values for each column
    +res4 <- changed(dat, simplify = TRUE)
    +
    +# View results
    +res4
    +# [1]  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE
    +

    The above vector returns a TRUE when either the “v1” or “v2” value +changes.

    +

    The “reverse” option identifies the last items in a group instead of +the first:

    +
    # Find last items in each group
    +res3 <- changed(dat, reverse = TRUE)
    +
    +# View results
    +res3
    +#    v1.changed v2.changed
    +# 1       FALSE      FALSE
    +# 2       FALSE      FALSE
    +# 3        TRUE      FALSE
    +# 4       FALSE      FALSE
    +# 5        TRUE      FALSE
    +# 6       FALSE       TRUE
    +# 7       FALSE      FALSE
    +# 8        TRUE      FALSE
    +# 9       FALSE      FALSE
    +# 10       TRUE       TRUE
    +

    top

    diff --git a/docs/articles/index.html b/docs/articles/index.html index dc9001a..681a42d 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -17,7 +17,7 @@ common - 1.0.8 + 1.0.9 diff --git a/docs/authors.html b/docs/authors.html index f3a4565..0fe4f8a 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ common - 1.0.8 + 1.0.9 @@ -70,13 +70,13 @@

    Citation

    Bosak D (2023). common: Solutions for Common Problems in Base R. -R package version 1.0.8, https://common.r-sassy.org. +R package version 1.0.9, https://common.r-sassy.org.

    @Manual{,
       title = {common: Solutions for Common Problems in Base R},
       author = {David Bosak},
       year = {2023},
    -  note = {R package version 1.0.8},
    +  note = {R package version 1.0.9},
       url = {https://common.r-sassy.org},
     }
    diff --git a/docs/index.html b/docs/index.html index 4281d2b..7d7975e 100644 --- a/docs/index.html +++ b/docs/index.html @@ -42,7 +42,7 @@ common - 1.0.8 + 1.0.9 diff --git a/docs/news/index.html b/docs/news/index.html index a1b0a91..91b8b4d 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -17,7 +17,7 @@ common - 1.0.8 + 1.0.9 @@ -51,7 +51,14 @@

    Changelog

    - + +
    +
    +
    diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 557ac1c..1f1494f 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -3,7 +3,7 @@ pkgdown: 2.0.7 pkgdown_sha: ~ articles: common: common.html -last_built: 2023-08-07T11:39Z +last_built: 2023-09-18T02:02Z urls: reference: https://common.r-sassy.org/reference article: https://common.r-sassy.org/articles diff --git a/docs/reference/Sys.path.html b/docs/reference/Sys.path.html index f393b91..1924301 100644 --- a/docs/reference/Sys.path.html +++ b/docs/reference/Sys.path.html @@ -19,7 +19,7 @@ common - 1.0.8 + 1.0.9
    diff --git a/docs/reference/changed.html b/docs/reference/changed.html new file mode 100644 index 0000000..bb26044 --- /dev/null +++ b/docs/reference/changed.html @@ -0,0 +1,204 @@ + +Identify changed values — changed • common + + +
    +
    + + + +
    +
    + + +
    +

    The changed function identifies changes in a vector or +data frame. The function is used to locate grouping boundaries. It will +return a TRUE each time the current value is different from the previous +value. The changed function is similar to the Base R duplicated +function, except changed the function will return TRUE even if +the changed value is not unique.

    +
    + +
    +
    changed(x, reverse = FALSE, simplify = FALSE)
    +
    + +
    +

    Arguments

    +
    x
    +

    A vector of values in which to identify changed values. +Also accepts a data frame. In the case of a data frame, the function +will use all columns. Input data can be any data type.

    + + +
    reverse
    +

    Reverse the direction of the scan to identify the last +value in a group instead of the first.

    + + +
    simplify
    +

    If the input data to the function is a data frame, +the simplify option will return a single vector of indicator values +instead of a data frame of indicator values.

    + +
    +
    +

    Value

    + + +

    A vector of TRUE or FALSE values indicating the grouping boundaries +of the vector or data frame.

    +
    +
    +

    Details

    +

    For a data frame, +by default, the function will return another data frame with an equal +number of change indicator columns. The column names +will be the original column names, with a ".changed" suffix.

    +

    To collapse +the multiple change indicators into one vector, use the "simplify" option. +In this case, the returned vector will essentially be an "or" operation +across all columns.

    +
    + +
    +

    Examples

    +
    # Create sample vector
    +v1 <- c(1, 1, 1, 2, 2, 3, 3, 3, 1, 1)
    +
    +# Identify changed values
    +res1 <- changed(v1)
    +
    +# View results
    +res1
    +# [1] TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE
    +
    +# Create sample data frame
    +v2 <- c("A", "A", "A", "A", "A", "A", "B", "B", "B", "B")
    +dat <- data.frame(v1, v2)
    +
    +# View original data frame
    +dat
    +#    v1 v2
    +# 1   1  A
    +# 2   1  A
    +# 3   1  A
    +# 4   2  A
    +# 5   2  A
    +# 6   3  A
    +# 7   3  B
    +# 8   3  B
    +# 9   1  B
    +# 10  1  B
    +
    +# Get changed values for each column
    +res2 <- changed(dat)
    +
    +# View results
    +res2
    +#    v1.changed v2.changed
    +# 1        TRUE       TRUE
    +# 2       FALSE      FALSE
    +# 3       FALSE      FALSE
    +# 4        TRUE      FALSE
    +# 5       FALSE      FALSE
    +# 6        TRUE      FALSE
    +# 7       FALSE       TRUE
    +# 8       FALSE      FALSE
    +# 9        TRUE      FALSE
    +# 10      FALSE      FALSE
    +
    +# Get changed values for all columns
    +res3 <- changed(dat, simplify = TRUE)
    +
    +# View results
    +res3
    +# [1] TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE
    +
    +# Get last items in each group instead of first
    +res4 <- changed(dat, reverse = TRUE)
    +
    +# View results
    +res4
    +#    v1.changed v2.changed
    +# 1       FALSE      FALSE
    +# 2       FALSE      FALSE
    +# 3        TRUE      FALSE
    +# 4       FALSE      FALSE
    +# 5        TRUE      FALSE
    +# 6       FALSE       TRUE
    +# 7       FALSE      FALSE
    +# 8        TRUE      FALSE
    +# 9       FALSE      FALSE
    +# 10       TRUE       TRUE
    +
    +
    + +
    + + +
    + + + + + + + + diff --git a/docs/reference/common.html b/docs/reference/common.html index bf5d99b..da30949 100644 --- a/docs/reference/common.html +++ b/docs/reference/common.html @@ -22,7 +22,7 @@ common - 1.0.8 + 1.0.9 diff --git a/docs/reference/copy.attributes.html b/docs/reference/copy.attributes.html index eea14cf..1f46354 100644 --- a/docs/reference/copy.attributes.html +++ b/docs/reference/copy.attributes.html @@ -22,7 +22,7 @@ common - 1.0.8 + 1.0.9 diff --git a/docs/reference/dir.find.html b/docs/reference/dir.find.html index 3637f64..1893b70 100644 --- a/docs/reference/dir.find.html +++ b/docs/reference/dir.find.html @@ -24,7 +24,7 @@ common - 1.0.8 + 1.0.9 diff --git a/docs/reference/file.find.html b/docs/reference/file.find.html index 5d542ef..f9b9e8c 100644 --- a/docs/reference/file.find.html +++ b/docs/reference/file.find.html @@ -24,7 +24,7 @@ common - 1.0.8 + 1.0.9 diff --git a/docs/reference/find.names.html b/docs/reference/find.names.html index e564c49..58ec2f6 100644 --- a/docs/reference/find.names.html +++ b/docs/reference/find.names.html @@ -19,7 +19,7 @@ common - 1.0.8 + 1.0.9 diff --git a/docs/reference/grapes-eq-grapes.html b/docs/reference/grapes-eq-grapes.html index 3feb50c..f5bc644 100644 --- a/docs/reference/grapes-eq-grapes.html +++ b/docs/reference/grapes-eq-grapes.html @@ -23,7 +23,7 @@ common - 1.0.8 + 1.0.9 diff --git a/docs/reference/grapes-p-grapes.html b/docs/reference/grapes-p-grapes.html index 1bb15bc..e2809ae 100644 --- a/docs/reference/grapes-p-grapes.html +++ b/docs/reference/grapes-p-grapes.html @@ -21,7 +21,7 @@ common - 1.0.8 + 1.0.9 diff --git a/docs/reference/index.html b/docs/reference/index.html index 6eff565..7ab0a1a 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -17,7 +17,7 @@ common - 1.0.8 + 1.0.9 @@ -129,6 +129,10 @@

    Other Functions copy.attributes()

    Copy attributes between two data frames

    + +

    changed()

    + +

    Identify changed values

    diff --git a/docs/reference/roundup.html b/docs/reference/roundup.html index ab5b0c9..d5b3bb9 100644 --- a/docs/reference/roundup.html +++ b/docs/reference/roundup.html @@ -19,7 +19,7 @@ common - 1.0.8 + 1.0.9 diff --git a/docs/reference/sort.data.frame.html b/docs/reference/sort.data.frame.html index 39b46c7..cd75aa4 100644 --- a/docs/reference/sort.data.frame.html +++ b/docs/reference/sort.data.frame.html @@ -19,7 +19,7 @@ common - 1.0.8 + 1.0.9 diff --git a/docs/reference/spaces.html b/docs/reference/spaces.html index 1ad1a6d..f5882e7 100644 --- a/docs/reference/spaces.html +++ b/docs/reference/spaces.html @@ -18,7 +18,7 @@ common - 1.0.8 + 1.0.9 diff --git a/docs/reference/subsc.html b/docs/reference/subsc.html index 304ebab..d2d18f4 100644 --- a/docs/reference/subsc.html +++ b/docs/reference/subsc.html @@ -26,7 +26,7 @@ common - 1.0.8 + 1.0.9 diff --git a/docs/reference/supsc.html b/docs/reference/supsc.html index 3c409b2..1a1ce97 100644 --- a/docs/reference/supsc.html +++ b/docs/reference/supsc.html @@ -24,7 +24,7 @@ common - 1.0.8 + 1.0.9 diff --git a/docs/reference/symbol.html b/docs/reference/symbol.html index 7e89b29..4ac4a24 100644 --- a/docs/reference/symbol.html +++ b/docs/reference/symbol.html @@ -24,7 +24,7 @@ common - 1.0.8 + 1.0.9 @@ -93,7 +93,7 @@

    Keywords

    The following symbol keywords are available: