Skip to content

Commit

Permalink
Add date32 date64 to strftime.
Browse files Browse the repository at this point in the history
  • Loading branch information
rok committed Sep 23, 2021
1 parent dd6c325 commit e50eb84
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 32 deletions.
17 changes: 11 additions & 6 deletions cpp/src/arrow/compute/kernels/scalar_temporal.cc
Expand Up @@ -528,13 +528,18 @@ struct Strftime {
static Result<Strftime> Make(KernelContext* ctx, const DataType& type) {
const StrftimeOptions& options = StrftimeState::Get(ctx);

auto timezone = GetInputTimezone(type);
// This check is due to surprising %c behavior.
// See https://github.com/HowardHinnant/date/issues/704
if ((options.format.find("%c") != std::string::npos) && (options.locale != "C")) {
return Status::Invalid("%c flag is not supported in non-C locales.");
}
if (timezone.empty()) {
std::string timezone;
bool not_timestamp = &type == date32().get() || &type == date64().get();
if (!not_timestamp) {
timezone = GetInputTimezone(type);
}

if (timezone.empty() || not_timestamp) {
if ((options.format.find("%z") != std::string::npos) ||
(options.format.find("%Z") != std::string::npos)) {
return Status::Invalid(
Expand All @@ -556,7 +561,7 @@ struct Strftime {
TimestampFormatter formatter{self.options.format, self.tz, self.locale};

if (in.is_valid) {
const int64_t in_val = internal::UnboxScalar<const TimestampType>::Unbox(in);
const int64_t in_val = internal::UnboxScalar<const InType>::Unbox(in);
ARROW_ASSIGN_OR_RAISE(auto formatted, formatter(in_val));
checked_cast<StringScalar*>(out)->value = Buffer::FromString(std::move(formatted));
} else {
Expand Down Expand Up @@ -584,7 +589,7 @@ struct Strftime {
ARROW_ASSIGN_OR_RAISE(auto formatted, formatter(arg));
return string_builder.Append(std::move(formatted));
};
RETURN_NOT_OK(VisitArrayDataInline<Int64Type>(in, visit_value, visit_null));
RETURN_NOT_OK(VisitArrayDataInline<InType>(in, visit_value, visit_null));

std::shared_ptr<Array> out_array;
RETURN_NOT_OK(string_builder.Finish(&out_array));
Expand Down Expand Up @@ -1186,8 +1191,8 @@ void RegisterScalarTemporal(FunctionRegistry* registry) {

static const auto default_strftime_options = StrftimeOptions();
auto strftime = MakeSimpleUnaryTemporal<Strftime>(
"strftime", {WithTimestamps}, utf8(), &strftime_doc, &default_strftime_options,
StrftimeState::Init);
"strftime", {WithDates, WithTimestamps}, utf8(), &strftime_doc,
&default_strftime_options, StrftimeState::Init);
DCHECK_OK(registry->AddFunction(std::move(strftime)));

auto assume_timezone =
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
Expand Up @@ -566,6 +566,14 @@ TEST_F(ScalarTemporalTest, Strftime) {
utf8(), string_microseconds, &options);
CheckScalarUnary("strftime", timestamp(TimeUnit::NANO, "US/Hawaii"), nanoseconds,
utf8(), string_nanoseconds, &options);

auto options2 = StrftimeOptions("%Y-%m-%d");
const char* date32s = R"([0, 10957, 10958, null])";
const char* date64s = R"([0, 946684800000, 946771200000, null])";
const char* string_dates32 = R"(["1970-01-01", "2000-01-01", "2000-01-02", null])";
const char* string_dates64 = R"(["1970-01-01", "2000-01-01", "2000-01-02", null])";
CheckScalarUnary("strftime", date32(), date32s, utf8(), string_dates32, &options2);
CheckScalarUnary("strftime", date64(), date64s, utf8(), string_dates64, &options2);
}

TEST_F(ScalarTemporalTest, StrftimeNoTimezone) {
Expand Down
13 changes: 7 additions & 6 deletions r/R/dplyr-functions.R
Expand Up @@ -731,13 +731,14 @@ nse_funcs$round <- function(x, digits = 0) {
)
}

nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7)) {

# The "day_of_week" compute function returns numeric days of week and not locale-aware strftime
# When the ticket below is resolved, we should be able to support the label argument
# https://issues.apache.org/jira/browse/ARROW-13133
nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7), locale = Sys.getlocale("LC_TIME")) {
if (label) {
arrow_not_supported("Label argument")
if (abbr) (
format <- "%a"
) else {
format <- "%A"
}
return(Expression$create("strftime", x, options = list(format = format, locale = locale)))
}

Expression$create("day_of_week", x, options = list(one_based_numbering = TRUE, week_start = week_start))
Expand Down
40 changes: 28 additions & 12 deletions r/tests/testthat/test-dplyr-lubridate.R
Expand Up @@ -116,12 +116,20 @@ test_that("extract wday from timestamp", {
test_df
)

# We should be able to support the label argument after this ticket is resolved:
# https://issues.apache.org/jira/browse/ARROW-13133
x <- Expression$field_ref("x")
expect_error(
nse_funcs$wday(x, label = TRUE),
"Label argument not supported by Arrow"
expect_dplyr_equal(
input %>%
mutate(x = wday(date, label = TRUE)) %>%
mutate(x = as.character(x)) %>%
collect(),
test_df
)

expect_dplyr_equal(
input %>%
mutate(x = wday(datetime, label = TRUE, abbr = TRUE)) %>%
mutate(x = as.character(x)) %>%
collect(),
test_df
)
})

Expand Down Expand Up @@ -241,12 +249,20 @@ test_that("extract wday from date", {
test_df
)

# We should be able to support the label argument after this ticket is resolved:
# https://issues.apache.org/jira/browse/ARROW-13133
x <- Expression$field_ref("x")
expect_error(
nse_funcs$wday(x, label = TRUE),
"Label argument not supported by Arrow"
expect_dplyr_equal(
input %>%
mutate(x = wday(date, label = TRUE, abbr = TRUE)) %>%
mutate(x = as.character(x)) %>%
collect(),
test_df
)

expect_dplyr_equal(
input %>%
mutate(x = wday(date, label = TRUE)) %>%
mutate(x = as.character(x)) %>%
collect(),
test_df
)
})

Expand Down
25 changes: 17 additions & 8 deletions r/tests/testthat/test-dplyr-string-functions.R
Expand Up @@ -737,35 +737,44 @@ test_that("errors in strptime", {
test_that("strftime", {
skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168

times <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA))
seconds <- tibble(x = c("05.000000", NA))
times <- tibble(
datetime = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA),
date = c(as.Date("2021-09-09"), NA)
)
formats <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %z %Z %j %U %W %x %X %% %G %V %u"

expect_dplyr_equal(
input %>%
mutate(x = strftime(x, format = formats)) %>%
mutate(x = strftime(datetime, format = formats)) %>%
collect(),
times
)

expect_dplyr_equal(
input %>%
mutate(x = strftime(date, format = formats)) %>%
collect(),
times
)

expect_dplyr_equal(
input %>%
mutate(x = strftime(x, format = formats, tz = "Pacific/Marquesas")) %>%
mutate(x = strftime(datetime, format = formats, tz = "Pacific/Marquesas")) %>%
collect(),
times
)

expect_dplyr_equal(
input %>%
mutate(x = strftime(x, format = formats, tz = "EST", usetz = TRUE)) %>%
mutate(x = strftime(datetime, format = formats, tz = "EST", usetz = TRUE)) %>%
collect(),
times
)

withr::with_timezone("Pacific/Marquesas",
expect_dplyr_equal(
input %>%
mutate(x = strftime(x, format = formats, tz = "EST")) %>%
mutate(x = strftime(datetime, format = formats, tz = "EST")) %>%
collect(),
times
)
Expand All @@ -776,7 +785,7 @@ test_that("strftime", {
expect_error(
times %>%
Table$create() %>%
mutate(x = strftime(x, format = "%c")) %>%
mutate(x = strftime(datetime, format = "%c")) %>%
collect(),
"%c flag is not supported in non-C locales."
)
Expand All @@ -787,7 +796,7 @@ test_that("strftime", {
# point numbers with 3, 6 and 9 decimal places respectively.
expect_dplyr_equal(
input %>%
mutate(x = strftime(x, format = "%S")) %>%
mutate(x = strftime(datetime, format = "%S")) %>%
transmute(as.double(substr(x, 1, 2))) %>%
collect(),
times,
Expand Down

0 comments on commit e50eb84

Please sign in to comment.