Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-13916: [C++] Implement strftime on date32/64 types #11219

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/kernels/scalar_temporal.cc
Expand Up @@ -1199,7 +1199,7 @@ void RegisterScalarTemporal(FunctionRegistry* registry) {

static const auto default_strftime_options = StrftimeOptions();
auto strftime = MakeSimpleUnaryTemporal<Strftime>(
"strftime", {WithTimes, WithTimestamps}, utf8(), &strftime_doc,
"strftime", {WithTimes, WithDates, WithTimestamps}, utf8(), &strftime_doc,
&default_strftime_options, StrftimeState::Init);
DCHECK_OK(registry->AddFunction(std::move(strftime)));

Expand Down
19 changes: 19 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
Expand Up @@ -693,6 +693,25 @@ TEST_F(ScalarTemporalTest, Strftime) {
Invalid,
testing::HasSubstr("Invalid: Timezone not present, cannot convert to string"),
Strftime(arr_ns, StrftimeOptions("%Y-%m-%dT%H:%M:%S%Z")));

auto options_ymd = StrftimeOptions("%Y-%m-%d");

const char* date32s = R"([0, 10957, 10958, null])";
const char* date64s = R"([0, 946684800000, 946771200000, null])";
const char* dates32_ymd = R"(["1970-01-01", "2000-01-01", "2000-01-02", null])";
const char* dates64_ymd = R"(["1970-01-01", "2000-01-01", "2000-01-02", null])";
const char* dates32_ymdhms =
R"(["1970-01-01T00:00:00", "2000-01-01T00:00:00", "2000-01-02T00:00:00", null])";
const char* dates64_ymdhms =
R"(["1970-01-01T00:00:00.000", "2000-01-01T00:00:00.000",
"2000-01-02T00:00:00.000", null])";

CheckScalarUnary("strftime", date32(), date32s, utf8(), dates32_ymd, &options_ymd);
CheckScalarUnary("strftime", date64(), date64s, utf8(), dates64_ymd, &options_ymd);
CheckScalarUnary("strftime", date32(), date32s, utf8(), dates32_ymdhms,
&options_ymdhms);
CheckScalarUnary("strftime", date64(), date64s, utf8(), dates64_ymdhms,
&options_ymdhms);
}

TEST_F(ScalarTemporalTest, StrftimeNoTimezone) {
Expand Down
2 changes: 1 addition & 1 deletion docs/source/cpp/compute.rst
Expand Up @@ -1215,7 +1215,7 @@ provided by a concrete function :func:`~arrow::compute::Cast`.
+=================+============+====================+==================+==============================+=======+
| cast | Unary | Many | Variable | :struct:`CastOptions` | |
+-----------------+------------+--------------------+------------------+------------------------------+-------+
| strftime | Unary | Timestamp, Time | String | :struct:`StrftimeOptions` | \(1) |
| strftime | Unary | Temporal | String | :struct:`StrftimeOptions` | \(1) |
+-----------------+------------+--------------------+------------------+------------------------------+-------+
| strptime | Unary | String-like | Timestamp | :struct:`StrptimeOptions` | |
+-----------------+------------+--------------------+------------------+------------------------------+-------+
Expand Down
14 changes: 8 additions & 6 deletions r/R/dplyr-functions.R
Expand Up @@ -731,13 +731,15 @@ nse_funcs$round <- function(x, digits = 0) {
)
}

nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7)) {

# The "day_of_week" compute function returns numeric days of week and not locale-aware strftime
# When the ticket below is resolved, we should be able to support the label argument
# https://issues.apache.org/jira/browse/ARROW-13133
nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7),
locale = Sys.getlocale("LC_TIME")) {
if (label) {
arrow_not_supported("Label argument")
if (abbr) (
format <- "%a"
) else {
format <- "%A"
}
return(Expression$create("strftime", x, options = list(format = format, locale = locale)))
}

Expression$create("day_of_week", x, options = list(count_from_zero = FALSE, week_start = week_start))
Expand Down
44 changes: 32 additions & 12 deletions r/tests/testthat/test-dplyr-lubridate.R
Expand Up @@ -125,12 +125,22 @@ test_that("extract wday from timestamp", {
test_df
)

# We should be able to support the label argument after this ticket is resolved:
# https://issues.apache.org/jira/browse/ARROW-13133
x <- Expression$field_ref("x")
expect_error(
nse_funcs$wday(x, label = TRUE),
"Label argument not supported by Arrow"
skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168

expect_dplyr_equal(
input %>%
mutate(x = wday(date, label = TRUE)) %>%
mutate(x = as.character(x)) %>%
collect(),
test_df
)

expect_dplyr_equal(
input %>%
mutate(x = wday(datetime, label = TRUE, abbr = TRUE)) %>%
mutate(x = as.character(x)) %>%
collect(),
test_df
)
})

Expand Down Expand Up @@ -259,12 +269,22 @@ test_that("extract wday from date", {
test_df
)

# We should be able to support the label argument after this ticket is resolved:
# https://issues.apache.org/jira/browse/ARROW-13133
x <- Expression$field_ref("x")
expect_error(
nse_funcs$wday(x, label = TRUE),
"Label argument not supported by Arrow"
skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168

expect_dplyr_equal(
input %>%
mutate(x = wday(date, label = TRUE, abbr = TRUE)) %>%
mutate(x = as.character(x)) %>%
collect(),
test_df
)

expect_dplyr_equal(
input %>%
mutate(x = wday(date, label = TRUE)) %>%
mutate(x = as.character(x)) %>%
collect(),
test_df
)
})

Expand Down
25 changes: 17 additions & 8 deletions r/tests/testthat/test-dplyr-string-functions.R
Expand Up @@ -737,35 +737,44 @@ test_that("errors in strptime", {
test_that("strftime", {
skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168

times <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA))
seconds <- tibble(x = c("05.000000", NA))
times <- tibble(
datetime = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA),
date = c(as.Date("2021-09-09"), NA)
)
formats <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %z %Z %j %U %W %x %X %% %G %V %u"

expect_dplyr_equal(
input %>%
mutate(x = strftime(x, format = formats)) %>%
mutate(x = strftime(datetime, format = formats)) %>%
collect(),
times
)

expect_dplyr_equal(
input %>%
mutate(x = strftime(date, format = formats)) %>%
collect(),
times
)

expect_dplyr_equal(
input %>%
mutate(x = strftime(x, format = formats, tz = "Pacific/Marquesas")) %>%
mutate(x = strftime(datetime, format = formats, tz = "Pacific/Marquesas")) %>%
collect(),
times
)

expect_dplyr_equal(
input %>%
mutate(x = strftime(x, format = formats, tz = "EST", usetz = TRUE)) %>%
mutate(x = strftime(datetime, format = formats, tz = "EST", usetz = TRUE)) %>%
collect(),
times
)

withr::with_timezone("Pacific/Marquesas",
expect_dplyr_equal(
input %>%
mutate(x = strftime(x, format = formats, tz = "EST")) %>%
mutate(x = strftime(datetime, format = formats, tz = "EST")) %>%
collect(),
times
)
Expand All @@ -776,7 +785,7 @@ test_that("strftime", {
expect_error(
times %>%
Table$create() %>%
mutate(x = strftime(x, format = "%c")) %>%
mutate(x = strftime(datetime, format = "%c")) %>%
collect(),
"%c flag is not supported in non-C locales."
)
Expand All @@ -787,7 +796,7 @@ test_that("strftime", {
# point numbers with 3, 6 and 9 decimal places respectively.
expect_dplyr_equal(
input %>%
mutate(x = strftime(x, format = "%S")) %>%
mutate(x = strftime(datetime, format = "%S")) %>%
transmute(as.double(substr(x, 1, 2))) %>%
collect(),
times,
Expand Down