Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: datawizard
Title: Easy Data Wrangling and Statistical Transformations
Version: 1.2.0.6
Version: 1.2.0.7
Authors@R: c(
person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut",
comment = c(ORCID = "0000-0003-1995-6531")),
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ CHANGES
* `display()` methods now support the `{tinytable}` package. Use `format = "tt"`
to export tables as `tinytable` objects (#646).

* Improved performance for several functions that process grouped data frames
when the input is a grouped `tibble` (#651).

BUG FIXES

* Fixed an issue when `demean()`ing nested structures with more than 2 grouping
Expand Down
30 changes: 30 additions & 0 deletions R/data_arrange.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,19 @@ data_arrange.default <- function(data, select = NULL, safe = TRUE) {
return(data)
}

original_x <- data

# Input validation check
data <- .coerce_to_dataframe(data)

# Remove tidyverse attributes, will add them back at the end
if (inherits(original_x, "tbl_df")) {
tbl_input <- TRUE
data <- as.data.frame(data, stringsAsFactors = FALSE)
} else {
tbl_input <- FALSE
}

# find which vars should be decreasing
desc <- select[startsWith(select, "-")]
desc <- gsub("^-", "", desc)
Expand Down Expand Up @@ -95,15 +105,27 @@ data_arrange.default <- function(data, select = NULL, safe = TRUE) {
rownames(out) <- NULL
}

# add back custom attributes
out <- .replace_attrs(out, attributes(original_x))

out
}


#' @export
data_arrange.grouped_df <- function(data, select = NULL, safe = TRUE) {
original_x <- data
grps <- attr(data, "groups", exact = TRUE)
grps <- grps[[".rows"]]

# Remove tidyverse attributes, will add them back at the end
if (inherits(data, "tbl_df")) {
tbl_input <- TRUE
data <- as.data.frame(data, stringsAsFactors = FALSE)
} else {
tbl_input <- FALSE
}

out <- lapply(grps, function(x) {
data_arrange.default(data[x, ], select = select, safe = safe)
})
Expand All @@ -114,5 +136,13 @@ data_arrange.grouped_df <- function(data, select = NULL, safe = TRUE) {
rownames(out) <- NULL
}

# add back tidyverse attributes
if (isTRUE(tbl_input)) {
class(out) <- c("tbl_df", "tbl", "data.frame")
}

# add back custom attributes
out <- .replace_attrs(out, attributes(original_x))

out
}
2 changes: 2 additions & 0 deletions R/data_duplicated.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ data_duplicated.grouped_df <- function(data,
grps <- attr(data, "groups", exact = TRUE)
grps <- grps[[".rows"]]

data <- as.data.frame(data)

out <- lapply(grps, function(x) {
data_duplicated.data.frame(data[x, ], select = select)
})
Expand Down
32 changes: 32 additions & 0 deletions R/data_match.R
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,15 @@ data_filter <- function(x, ...) {
#' @export
data_filter.data.frame <- function(x, ...) {
out <- x

# convert tibble to data.frame
if (inherits(x, "tbl_df")) {
out <- as.data.frame(out, stringsAsFactors = FALSE)
tbl_input <- TRUE
} else {
tbl_input <- FALSE
}

dots <- match.call(expand.dots = FALSE)[["..."]]

if (any(nzchar(names(dots), keepNA = TRUE))) {
Expand Down Expand Up @@ -275,15 +284,30 @@ data_filter.data.frame <- function(x, ...) {

# add back custom attributes
out <- .replace_attrs(out, attributes(x))

# add back tidyverse attributes
if (isTRUE(tbl_input)) {
class(out) <- c("tbl_df", "tbl", "data.frame")
}

out
}


#' @export
data_filter.grouped_df <- function(x, ...) {
original_x <- x
grps <- attr(x, "groups", exact = TRUE)
grps <- grps[[".rows"]]

# Remove tidyverse attributes, will add them back at the end
if (inherits(x, "tbl_df")) {
tbl_input <- TRUE
x <- as.data.frame(x, stringsAsFactors = FALSE)
} else {
tbl_input <- FALSE
}

dots <- match.call(expand.dots = FALSE)[["..."]]
out <- lapply(grps, function(grp) {
arguments <- list(x[grp, ])
Expand All @@ -297,6 +321,14 @@ data_filter.grouped_df <- function(x, ...) {
rownames(out) <- NULL
}

# add back tidyverse attributes
if (isTRUE(tbl_input)) {
class(out) <- c("tbl_df", "tbl", "data.frame")
}

# add back custom attributes
out <- .replace_attrs(out, attributes(original_x))

out
}

Expand Down
2 changes: 1 addition & 1 deletion R/data_unique.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ data_unique.grouped_df <- function(data,
grps <- attr(data, "groups", exact = TRUE)
grps <- grps[[".rows"]]

data2 <- data_ungroup(data)
data2 <- as.data.frame(data_ungroup(data))

out <- lapply(grps, function(x) {
data_unique.data.frame(data2[x, ], select = select, keep = keep, verbose = verbose)
Expand Down
19 changes: 19 additions & 0 deletions tests/testthat/test-data_match.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
match = "or",
return_indices = TRUE
))
x2 <- nrow(poorman::filter(efc, c172code == 1 | e16sex == 2))

Check warning on line 26 in tests/testthat/test-data_match.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=tests/testthat/test-data_match.R,line=26,col=9,[nrow_subset_linter] Use arithmetic to count the number of rows satisfying a condition, rather than fully subsetting the data.frame and counting the resulting rows. For example, replace nrow(subset(x, is_treatment)) with sum(x$is_treatment). NB: use na.rm = TRUE if `is_treatment` has missing values.
expect_identical(x1, x2)

# "AND" works
Expand All @@ -33,7 +33,7 @@
match = "and",
return_indices = TRUE
))
x2 <- nrow(poorman::filter(efc, c172code == 1, e16sex == 2))

Check warning on line 36 in tests/testthat/test-data_match.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=tests/testthat/test-data_match.R,line=36,col=9,[nrow_subset_linter] Use arithmetic to count the number of rows satisfying a condition, rather than fully subsetting the data.frame and counting the resulting rows. For example, replace nrow(subset(x, is_treatment)) with sum(x$is_treatment). NB: use na.rm = TRUE if `is_treatment` has missing values.
expect_identical(x1, x2)

# "NOT" works
Expand All @@ -43,7 +43,7 @@
match = "not",
return_indices = TRUE
))
x2 <- nrow(poorman::filter(efc, c172code != 1, e16sex != 2))

Check warning on line 46 in tests/testthat/test-data_match.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=tests/testthat/test-data_match.R,line=46,col=9,[nrow_subset_linter] Use arithmetic to count the number of rows satisfying a condition, rather than fully subsetting the data.frame and counting the resulting rows. For example, replace nrow(subset(x, is_treatment)) with sum(x$is_treatment). NB: use na.rm = TRUE if `is_treatment` has missing values.
expect_identical(x1, x2)

# remove NA
Expand Down Expand Up @@ -231,7 +231,7 @@
)

foo3 <- function(data) {
var <- "mpg >= 30"

Check warning on line 234 in tests/testthat/test-data_match.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=tests/testthat/test-data_match.R,line=234,col=5,[object_overwrite_linter] 'var' is an exported object from package 'stats'. Avoid re-using such symbols.
data_filter(data, var)
}
expect_identical(
Expand Down Expand Up @@ -345,3 +345,22 @@
)
# styler: on
})


test_that("data_filter works with tibbles", {
skip_if_not_installed("tibble")
skip_if_not_installed("dplyr")
data(mtcars)

# preserve class
d <- tibble::as_tibble(mtcars)
out <- data_filter(d, mpg > 15)
expect_s3_class(out, "tbl_df")

# preserve attributes
d <- tibble::as_tibble(mtcars)
d <- dplyr::group_by(d, cyl)
out <- data_filter(d, mpg > 15)
expect_s3_class(out, "tbl_df")
expect_named(attr(out, "groups"), c("cyl", ".rows"))
})
Loading