Skip to content

Commit

Permalink
ARROW-12283: [R] Bindings for basic type convert functions in dplyr v…
Browse files Browse the repository at this point in the history
…erbs

This adds support for `as.character()`, `as.double()`, `as.integer()`, `bit64::as.integer64()`, `as.logical()`, and `as.numeric()` in dplyr

Closes #9942 from ianmcook/ARROW-12283

Authored-by: Ian Cook <ianmcook@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
  • Loading branch information
ianmcook authored and nealrichardson committed Apr 8, 2021
1 parent c92815d commit f2db785
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 0 deletions.
34 changes: 34 additions & 0 deletions r/R/dplyr.R
Expand Up @@ -396,6 +396,40 @@ build_function_list <- function(FUN) {
# Include mappings from R function name spellings
lapply(set_names(names(.array_function_map)), wrapper),
# Plus some special handling where it's not 1:1
as.character = function(x) {
FUN("cast", x, options = cast_options(to_type = string()))
},
as.double = function(x) {
FUN("cast", x, options = cast_options(to_type = float64()))
},
as.integer = function(x) {
FUN(
"cast",
x,
options = cast_options(
to_type = int32(),
allow_float_truncate = TRUE,
allow_decimal_truncate = TRUE
)
)
},
as.integer64 = function(x) {
FUN(
"cast",
x,
options = cast_options(
to_type = int64(),
allow_float_truncate = TRUE,
allow_decimal_truncate = TRUE
)
)
},
as.logical = function(x) {
FUN("cast", x, options = cast_options(to_type = boolean()))
},
as.numeric = function(x) {
FUN("cast", x, options = cast_options(to_type = float64()))
},
nchar = function(x, type = "chars", allowNA = FALSE, keepNA = NA) {
if (allowNA) {
stop("allowNA = TRUE not supported for Arrow", call. = FALSE)
Expand Down
102 changes: 102 additions & 0 deletions r/tests/testthat/test-dplyr.R
Expand Up @@ -357,3 +357,105 @@ test_that("relocate with selection helpers", {
"Unsupported selection helper"
)
})

test_that("explicit type conversions", {
library(bit64)
expect_dplyr_equal(
input %>%
transmute(
int2chr = as.character(int),
int2dbl = as.double(int),
int2int = as.integer(int),
int2num = as.numeric(int),
dbl2chr = as.character(dbl),
dbl2dbl = as.double(dbl),
dbl2int = as.integer(dbl),
dbl2num = as.numeric(dbl),
) %>%
collect(),
tbl
)
expect_dplyr_equal(
input %>%
transmute(
chr2chr = as.character(chr),
chr2dbl = as.double(chr),
chr2int = as.integer(chr),
chr2num = as.numeric(chr)
) %>%
collect(),
tibble(chr = c("1", "2", "3"))
)
expect_dplyr_equal(
input %>%
transmute(
chr2i64 = as.integer64(chr),
dbl2i64 = as.integer64(dbl),
i642i64 = as.integer64(i64),
) %>%
collect(),
tibble(chr = "10000000000", dbl = 10000000000, i64 = as.integer64(1e10))
)
expect_dplyr_equal(
input %>%
transmute(
chr2lgl = as.logical(chr),
dbl2lgl = as.logical(dbl),
int2lgl = as.logical(int)
) %>%
collect(),
tibble(
chr = c("TRUE", "FALSE", "true", "false"),
dbl = c(1, 0, -99, 0),
int = c(1L, 0L, -99L, 0L)
)
)
expect_dplyr_equal(
input %>%
transmute(
dbl2chr = as.character(dbl),
dbl2dbl = as.double(dbl),
dbl2int = as.integer(dbl),
dbl2lgl = as.logical(dbl),
int2chr = as.character(int),
int2dbl = as.double(int),
int2int = as.integer(int),
int2lgl = as.logical(int),
lgl2chr = toupper(as.character(lgl)), # Arrow returns "true", "false"
lgl2dbl = as.double(lgl),
lgl2int = as.integer(lgl),
lgl2lgl = as.logical(lgl),
) %>%
collect(),
tibble(
dbl = c(1, 0, NA_real_),
int = c(1L, 0L, NA_integer_),
lgl = c(TRUE, FALSE, NA)
)
)
})

test_that("bad explicit type conversions", {

# Arrow returns lowercase "true", "false"
expect_error(
expect_dplyr_equal(
input %>%
transmute(lgl2chr = as.character(lgl)) %>%
collect(),
tibble(lgl = c(TRUE, FALSE, NA)
)
)
)

# Arrow fails to parse these strings as Booleans
expect_error(
expect_dplyr_equal(
input %>%
transmute(chr2lgl = as.logical(chr)) %>%
collect(),
tibble(chr = c("TRU", "FAX", ""))
)
)

})

0 comments on commit f2db785

Please sign in to comment.