Skip to content

Commit

Permalink
ARROW-17738: [R] dplyr::compute should convert from grouped arrow_dpl…
Browse files Browse the repository at this point in the history
…yr_query to arrow Table (#14160)

Authored-by: SHIMA Tatsuya <ts1s1andn@gmail.com>
Signed-off-by: Dewey Dunnington <dewey@voltrondata.com>
  • Loading branch information
eitsupi committed Oct 7, 2022
1 parent 45a008d commit 12667cd
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 13 deletions.
9 changes: 5 additions & 4 deletions r/R/dplyr-collect.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,7 @@ restore_dplyr_features <- function(df, query) {
)
} else {
# This is a Table, via compute() or collect(as_data_frame = FALSE)
df <- as_adq(df)
df$group_by_vars <- query$group_by_vars
df$drop_empty_groups <- query$drop_empty_groups
df$metadata$r$attributes$.group_vars <- query$group_by_vars
}
}
df
Expand All @@ -80,7 +78,10 @@ collapse.arrow_dplyr_query <- function(x, ...) {
# Figure out what schema will result from the query
x$schema <- implicit_schema(x)
# Nest inside a new arrow_dplyr_query (and keep groups)
restore_dplyr_features(arrow_dplyr_query(x), x)
out <- arrow_dplyr_query(x)
out$group_by_vars <- x$group_by_vars
out$drop_empty_groups <- x$drop_empty_groups
out
}
collapse.Dataset <- collapse.ArrowTabular <- collapse.RecordBatchReader <- function(x, ...) {
arrow_dplyr_query(x)
Expand Down
10 changes: 3 additions & 7 deletions r/tests/testthat/test-dataset-dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -284,13 +284,9 @@ test_that("compute()/collect(as_data_frame=FALSE)", {
group_by(fct) %>%
compute()

# the group_by() prevents compute() from returning a Table...
expect_s3_class(tab5, "arrow_dplyr_query")

# ... but $.data is a Table...
expect_r6_class(tab5$.data, "Table")
# ... and the mutate() was evaluated
expect_true("negint" %in% names(tab5$.data))
expect_r6_class(tab5, "Table")
# mutate() was evaluated
expect_true("negint" %in% names(tab5))
})

test_that("head/tail on query on dataset", {
Expand Down
44 changes: 42 additions & 2 deletions r/tests/testthat/test-dplyr-query.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ test_that("collect(as_data_frame=FALSE)", {
filter(int > 5) %>%
group_by(int) %>%
collect(as_data_frame = FALSE)
expect_s3_class(b4, "arrow_dplyr_query")
expect_r6_class(b4, "Table")
expect_equal(
as.data.frame(b4),
expected %>%
Expand Down Expand Up @@ -156,7 +156,7 @@ test_that("compute()", {
filter(int > 5) %>%
group_by(int) %>%
compute()
expect_s3_class(b4, "arrow_dplyr_query")
expect_r6_class(b4, "Table")
expect_equal(
as.data.frame(b4),
expected %>%
Expand Down Expand Up @@ -579,3 +579,43 @@ test_that("needs_projection unit tests", {
tab %>% relocate(lgl)
))
})

test_that("compute() on a grouped query returns a Table with groups in metadata", {
tab1 <- tbl %>%
arrow_table() %>%
group_by(int) %>%
compute()
expect_r6_class(tab1, "Table")
expect_equal(
as.data.frame(tab1),
tbl %>%
group_by(int)
)
expect_equal(
collect(tab1),
tbl %>%
group_by(int)
)
})

test_that("collect() is identical to compute() %>% collect()", {
tab1 <- tbl %>%
arrow_table()
adq1 <- tab1 %>%
group_by(int)

expect_equal(
tab1 %>%
compute() %>%
collect(),
tab1 %>%
collect()
)
expect_equal(
adq1 %>%
compute() %>%
collect(),
adq1 %>%
collect()
)
})

0 comments on commit 12667cd

Please sign in to comment.