Skip to content

Commit

Permalink
Refactor and resolve some TODOs
Browse files Browse the repository at this point in the history
  • Loading branch information
nealrichardson committed Aug 31, 2022
1 parent 1429a23 commit ed6fce6
Showing 1 changed file with 61 additions and 47 deletions.
108 changes: 61 additions & 47 deletions r/R/expression.R
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,15 @@ build_expr <- function(FUN,
}
if (FUN == "%in%") {
# Special-case %in%, which is different from the Array function name
value_set <- Array$create(args[[2]])
try(
value_set <- cast_or_parse(value_set, args[[1]]$type()),
silent = TRUE
)

expr <- Expression$create("is_in", args[[1]],
options = list(
# If args[[2]] is already an Arrow object (like a scalar),
# this wouldn't work
value_set = Array$create(args[[2]]),
value_set = value_set,
skip_nulls = TRUE
)
)
Expand Down Expand Up @@ -276,56 +280,66 @@ wrap_scalars <- function(args, FUN) {
# * %/%: we switch behavior based on int vs. dbl in R (see build_expr) so skip
# * binary_repeat, list_element: 2nd arg must be integer, Acero will handle it
if (any(is_expr) && !(arrow_fun %in% c("binary_repeat", "list_element")) && !(FUN %in% "%/%")) {
if (sum(is_expr) == 1) {
# Simple case: just one expr so take its type
try(
{
# If the Expression has no Schema embedded, we cannot resolve its
# type here, so this will error, hence the try() wrapping it
to_type <- args[[which(is_expr)]]$type()
# Try casting to this type, but if the cast fails,
# we'll just keep the original
args[!is_expr] <- lapply(args[!is_expr], function(x) {
if (x$type == string()) {
if (to_type == date32()) {
x <- call_function(
"strptime",
x,
options = list(format = "%Y-%m-%d", unit = 0L)
)
} else if (to_type$id == Type[["TIMESTAMP"]]) {
x <- call_function(
"strptime",
x,
options = list(format = "%Y-%m-%d %H:%M:%S", unit = 1L)
)
# R assumes timestamps without timezone specified are
# local timezone while Arrow assumes UTC. For consistency
# with R behavior, specify local timezone here.
x <- call_function(
"assume_timezone",
x,
options = list(timezone = Sys.timezone())
)
}
}
x$cast(to_type)
})
},
silent = TRUE
)
} else {
# TODO: check if all expr types are the same, and if so, cast to that
# Functions that exercise code that go through here (in our tests):
# * case_when
# * pmin/pmax
}
try(
{
# If the Expression has no Schema embedded, we cannot resolve its
# type here, so this will error, hence the try() wrapping it
# This will also error if length(args[is_expr]) == 0, or
# if there are multiple exprs that do not share a common type.
to_type <- common_type(args[is_expr])
# Try casting to this type, but if the cast fails,
# we'll just keep the original
args[!is_expr] <- lapply(args[!is_expr], cast_or_parse, type = to_type)
},
silent = TRUE
)
}

args[!is_expr] <- lapply(args[!is_expr], Expression$scalar)
args
}

common_type <- function(exprs) {
types <- map(exprs, ~ .$type())
first_type <- types[[1]]
if (length(types) == 1 || all(map_lgl(types, ~ .$Equals(first_type)))) {
# Functions (in our tests) that have multiple exprs to check:
# * case_when
# * pmin/pmax
return(first_type)
}
stop("There is no common type in these expressions")
}

cast_or_parse <- function(x, type) {
# For most types, just cast.
# But for string -> date/time, we need to call a parsing function
if (x$type == string()) {
if (type == date32()) {
x <- call_function(
"strptime",
x,
options = list(format = "%Y-%m-%d", unit = 0L)
)
} else if (type$id == Type[["TIMESTAMP"]]) {
x <- call_function(
"strptime",
x,
options = list(format = "%Y-%m-%d %H:%M:%S", unit = 1L)
)
# R assumes timestamps without timezone specified are
# local timezone while Arrow assumes UTC. For consistency
# with R behavior, specify local timezone here.
x <- call_function(
"assume_timezone",
x,
options = list(timezone = Sys.timezone())
)
}
}
x$cast(type)
}

#' @export
Ops.Expression <- function(e1, e2) {
if (.Generic == "!") {
Expand Down

0 comments on commit ed6fce6

Please sign in to comment.