diff --git a/NAMESPACE b/NAMESPACE index 2374b28..47e8dbc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export("%eq%") export("%p%") export("labels<-") export(Sys.path) +export(changed) export(copy.attributes) export(dir.find) export(file.find) diff --git a/NEWS.md b/NEWS.md index 68dd73e..7817036 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,8 @@ * Fix for CRAN. * Added infinity symbol "infin" keyword to `symbol()` function. +* Added `changed()` function to identify grouping boundaries. +* Fixed bug on sort that was causing factors to be ignored in some circumstances. # common 1.0.8 diff --git a/R/other_functions.R b/R/other_functions.R index 8937ab8..a6c092a 100644 --- a/R/other_functions.R +++ b/R/other_functions.R @@ -347,3 +347,212 @@ copy.attributes <- function(source, target) { return(ret) } + +# Changed Functions ------------------------------------------------------- + + + +#' @title Identify changed values +#' @description The \code{changed} function identifies changes in a vector or +#' data frame. The function is used to locate grouping boundaries. It will +#' return a TRUE each time the current value is different from the previous +#' value. The \code{changed} function is similar to the Base R \code{duplicated} +#' function, except \code{changed} the function will return TRUE even if +#' the changed value is not unique. +#' @details +#' For a data frame, +#' by default, the function will return another data frame with an equal +#' number of change indicator columns. The column names +#' will be the original column names, with a ".changed" suffix. +#' +#' To collapse +#' the multiple change indicators into one vector, use the "simplify" option. +#' In this case, the returned vector will essentially be an "or" operation +#' across all columns. +#' @param x A vector of values in which to identify changed values. +#' Also accepts a data frame. In the case of a data frame, the function +#' will use all columns. Input data can be any data type. +#' @param reverse Reverse the direction of the scan to identify the last +#' value in a group instead of the first. +#' @param simplify If the input data to the function is a data frame, +#' the simplify option will return a single vector of indicator values +#' instead of a data frame of indicator values. +#' @returns A vector of TRUE or FALSE values indicating the grouping boundaries +#' of the vector or data frame. +#' @examples +#' # Create sample vector +#' v1 <- c(1, 1, 1, 2, 2, 3, 3, 3, 1, 1) +#' +#' # Identify changed values +#' res1 <- changed(v1) +#' +#' # View results +#' res1 +#' # [1] TRUE FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRUE FALSE +#' +#' # Create sample data frame +#' v2 <- c("A", "A", "A", "A", "A", "A", "B", "B", "B", "B") +#' dat <- data.frame(v1, v2) +#' +#' # View original data frame +#' dat +#' # v1 v2 +#' # 1 1 A +#' # 2 1 A +#' # 3 1 A +#' # 4 2 A +#' # 5 2 A +#' # 6 3 A +#' # 7 3 B +#' # 8 3 B +#' # 9 1 B +#' # 10 1 B +#' +#' # Get changed values for each column +#' res2 <- changed(dat) +#' +#' # View results +#' res2 +#' # v1.changed v2.changed +#' # 1 TRUE TRUE +#' # 2 FALSE FALSE +#' # 3 FALSE FALSE +#' # 4 TRUE FALSE +#' # 5 FALSE FALSE +#' # 6 TRUE FALSE +#' # 7 FALSE TRUE +#' # 8 FALSE FALSE +#' # 9 TRUE FALSE +#' # 10 FALSE FALSE +#' +#' # Get changed values for all columns +#' res3 <- changed(dat, simplify = TRUE) +#' +#' # View results +#' res3 +#' # [1] TRUE FALSE FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE +#' +#' # Get last items in each group instead of first +#' res4 <- changed(dat, reverse = TRUE) +#' +#' # View results +#' res4 +#' # v1.changed v2.changed +#' # 1 FALSE FALSE +#' # 2 FALSE FALSE +#' # 3 TRUE FALSE +#' # 4 FALSE FALSE +#' # 5 TRUE FALSE +#' # 6 FALSE TRUE +#' # 7 FALSE FALSE +#' # 8 TRUE FALSE +#' # 9 FALSE FALSE +#' # 10 TRUE TRUE +#' @export +changed <- function(x, reverse = FALSE, simplify = FALSE) { + + ret <- NULL + + if (!is.null(x)) { + if (is.data.frame(x)) { + + retv <- list() + + for (i in seq_len(length(x))) { + + retv[[i]] <- changedv(x[[i]], reverse) + } + + ret <- as.data.frame(retv) + names(ret) <- paste0(names(x), ".changed") + + if (simplify) { + ret <- collapsedf(ret) + } + + } else { + + ret <- changedv(x, reverse) + } + + } + + return(ret) +} + +# Vector version +changedv <- function(x, reverse = FALSE) { + + + vect <- x + if (reverse == TRUE) { + + vect <- rev(x) + } + + # Create lag vector + vect_lag <- c(NA, vect[seq(1, length(vect) - 1)]) + + # Identify changes + ret<- ifelse(compint(vect, vect_lag), FALSE, TRUE) + + ret[1] <- TRUE + + if (reverse == TRUE) { + + ret <- rev(ret) + } + + return(ret) +} + + +compint <- Vectorize(function(x, y) { + + ret <- FALSE + + if (all(is.na(x) & is.na(y))) { + ret <- TRUE + } else if (all(is.na(x) | is.na(y))) { + + ret <- FALSE + + } else if (all(x == y)) { + + ret <- TRUE + } + + return(ret) + +}, USE.NAMES = FALSE, SIMPLIFY = TRUE) + + + +collapsedf <- function(df) { + + ret <- df + + if (!is.null(df)) { + + if (length(df) > 1) { + + ret <- df[[1]] + for (i in seq(2, length(df))) { + + ret <- collapse(ret, df[[i]]) + + } + } + + } + + return(ret) +} + +collapse <- function(x, y) { + + ret <- x | y + + return(ret) + +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 2f70df4..339a8e9 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -33,6 +33,7 @@ reference: - roundup - find.names - copy.attributes + - changed navbar: type: inverse diff --git a/docs/404.html b/docs/404.html index 17a41ff..4382435 100644 --- a/docs/404.html +++ b/docs/404.html @@ -39,7 +39,7 @@
diff --git a/docs/LICENSE.html b/docs/LICENSE.html index 2306975..55d1d45 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -17,7 +17,7 @@ diff --git a/docs/articles/common.html b/docs/articles/common.html index 6175700..e269fe4 100644 --- a/docs/articles/common.html +++ b/docs/articles/common.html @@ -40,7 +40,7 @@ @@ -135,6 +135,9 @@subsc()
functions in that it looks
up a UTF-8 character. Instead of providing a direct 1 to 1 translation,
however, it looks up the UTF-8 character based on a keyword.
-For example, the ‘reg’ keyword looks up the registered trademark -symbol. The ‘ne’ keyword looks up the symbol for not equals. These -keyword names follow HTML conventions. The function supports keywords -for trademarks, currencies, mathematical symbols, logical symbols, Greek +
For example, the ’regkeyword looks up the registered trademark
+symbol. The ’nekeyword looks up the symbol for not equals. These keyword
+names follow HTML conventions. The function supports keywords for
+trademarks, currencies, mathematical symbols, logical symbols, Greek
letters, and more. See the symbol()
documentation for a
complete list of supported keywords.
Base R has a duplicated()
function that is sometimes
+used to identify grouping boundaries in a vector. But this function also
+performs a unique()
operation on the vector, such that not
+all boundaries return a TRUE value. Observe the following:
# Create sample vector
+v1 <- c(1, 1, 1, 2, 2, 3, 3, 3, 1, 1)
+
+# Identify duplicated values
+res1 <- !duplicated(v1)
+
+# View duplicated results
+res1
+# [1] TRUE FALSE FALSE TRUE FALSE TRUE FALSE FALSE FALSE FALSE
+Notice that the “1” at position nine does not return TRUE.
+Now lets run the same vector through the changed()
+function:
# Identify changed values
+res2 <- changed(v1)
+
+# View changed results
+res2
+# [1] TRUE FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRUE FALSE
+This time, the changed()
function identified each time
+the vector changed value, whether or not the value had appeared
+previously.
This function can also be used on data frames:
+# Create sample data frame
+v2 <- c("A", "A", "A", "A", "A", "A", "B", "B", "B", "B")
+dat <- data.frame(v1, v2)
+
+# View original data frame
+dat
+# v1 v2
+# 1 1 A
+# 2 1 A
+# 3 1 A
+# 4 2 A
+# 5 2 A
+# 6 3 A
+# 7 3 B
+# 8 3 B
+# 9 1 B
+# 10 1 B
+
+# Get changed values for each column
+res3 <- changed(dat)
+
+# View results
+res3
+# v1.changed v2.changed
+# 1 TRUE TRUE
+# 2 FALSE FALSE
+# 3 FALSE FALSE
+# 4 TRUE FALSE
+# 5 FALSE FALSE
+# 6 TRUE FALSE
+# 7 FALSE TRUE
+# 8 FALSE FALSE
+# 9 TRUE FALSE
+# 10 FALSE FALSE
+If you wish to return a single indicator vector for the combination +of all columns, use the “simplify” option.
+# Get changed values for each column
+res4 <- changed(dat, simplify = TRUE)
+
+# View results
+res4
+# [1] TRUE FALSE FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE
+The above vector returns a TRUE when either the “v1” or “v2” value +changes.
+The “reverse” option identifies the last items in a group instead of +the first:
+# Find last items in each group
+res3 <- changed(dat, reverse = TRUE)
+
+# View results
+res3
+# v1.changed v2.changed
+# 1 FALSE FALSE
+# 2 FALSE FALSE
+# 3 TRUE FALSE
+# 4 FALSE FALSE
+# 5 TRUE FALSE
+# 6 FALSE TRUE
+# 7 FALSE FALSE
+# 8 TRUE FALSE
+# 9 FALSE FALSE
+# 10 TRUE TRUE
+
Bosak D (2023). common: Solutions for Common Problems in Base R. -R package version 1.0.8, https://common.r-sassy.org. +R package version 1.0.9, https://common.r-sassy.org.
@Manual{, title = {common: Solutions for Common Problems in Base R}, author = {David Bosak}, year = {2023}, - note = {R package version 1.0.8}, + note = {R package version 1.0.9}, url = {https://common.r-sassy.org}, }diff --git a/docs/index.html b/docs/index.html index 4281d2b..7d7975e 100644 --- a/docs/index.html +++ b/docs/index.html @@ -42,7 +42,7 @@ diff --git a/docs/news/index.html b/docs/news/index.html index a1b0a91..91b8b4d 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -17,7 +17,7 @@ @@ -51,7 +51,14 @@
The changed
function identifies changes in a vector or
+data frame. The function is used to locate grouping boundaries. It will
+return a TRUE each time the current value is different from the previous
+value. The changed
function is similar to the Base R duplicated
+function, except changed
the function will return TRUE even if
+the changed value is not unique.
changed(x, reverse = FALSE, simplify = FALSE)
A vector of values in which to identify changed values. +Also accepts a data frame. In the case of a data frame, the function +will use all columns. Input data can be any data type.
Reverse the direction of the scan to identify the last +value in a group instead of the first.
If the input data to the function is a data frame, +the simplify option will return a single vector of indicator values +instead of a data frame of indicator values.
A vector of TRUE or FALSE values indicating the grouping boundaries +of the vector or data frame.
+For a data frame, +by default, the function will return another data frame with an equal +number of change indicator columns. The column names +will be the original column names, with a ".changed" suffix.
+To collapse +the multiple change indicators into one vector, use the "simplify" option. +In this case, the returned vector will essentially be an "or" operation +across all columns.
+# Create sample vector
+v1 <- c(1, 1, 1, 2, 2, 3, 3, 3, 1, 1)
+
+# Identify changed values
+res1 <- changed(v1)
+
+# View results
+res1
+# [1] TRUE FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRUE FALSE
+
+# Create sample data frame
+v2 <- c("A", "A", "A", "A", "A", "A", "B", "B", "B", "B")
+dat <- data.frame(v1, v2)
+
+# View original data frame
+dat
+# v1 v2
+# 1 1 A
+# 2 1 A
+# 3 1 A
+# 4 2 A
+# 5 2 A
+# 6 3 A
+# 7 3 B
+# 8 3 B
+# 9 1 B
+# 10 1 B
+
+# Get changed values for each column
+res2 <- changed(dat)
+
+# View results
+res2
+# v1.changed v2.changed
+# 1 TRUE TRUE
+# 2 FALSE FALSE
+# 3 FALSE FALSE
+# 4 TRUE FALSE
+# 5 FALSE FALSE
+# 6 TRUE FALSE
+# 7 FALSE TRUE
+# 8 FALSE FALSE
+# 9 TRUE FALSE
+# 10 FALSE FALSE
+
+# Get changed values for all columns
+res3 <- changed(dat, simplify = TRUE)
+
+# View results
+res3
+# [1] TRUE FALSE FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE
+
+# Get last items in each group instead of first
+res4 <- changed(dat, reverse = TRUE)
+
+# View results
+res4
+# v1.changed v2.changed
+# 1 FALSE FALSE
+# 2 FALSE FALSE
+# 3 TRUE FALSE
+# 4 FALSE FALSE
+# 5 TRUE FALSE
+# 6 FALSE TRUE
+# 7 FALSE FALSE
+# 8 TRUE FALSE
+# 9 FALSE FALSE
+# 10 TRUE TRUE
Copy attributes between two data frames
Identify changed values
The following symbol keywords are available:
Trademark and Copyright: copy, reg, trade
Financial: cent, euro, pound, rupee, ruble, yen, yuan
Mathmatical: asymp, bcong, cong, coprod, empty, fnof, - ge, int, Int, le, ncong, ne, not, part, plusmn, + ge, int, Int, infin, le, ncong, ne, not, part, plusmn, prod, radic, sime, sum
Logical: and, cap, cup, comp, cuvee, cuwed, exist,
forall, fork, isin, nexist, ni, notin,
diff --git a/docs/reference/v.html b/docs/reference/v.html
index d55b59d..84cf75c 100644
--- a/docs/reference/v.html
+++ b/docs/reference/v.html
@@ -20,7 +20,7 @@