Skip to content

Commit

Permalink
Merge pull request #9615 from hawkfish/strptime-infinity
Browse files Browse the repository at this point in the history
Internal #576: strptime strftime infinities
  • Loading branch information
Mytherin committed Nov 10, 2023
2 parents 7c96448 + 074b43c commit 57f7a78
Show file tree
Hide file tree
Showing 9 changed files with 148 additions and 44 deletions.
22 changes: 14 additions & 8 deletions extension/icu/icu-strptime.cpp
Expand Up @@ -107,7 +107,11 @@ struct ICUStrptime : public ICUDateFunc {
ParseResult parsed;
for (auto &format : info.formats) {
if (format.Parse(input, parsed)) {
return GetTime(calendar, ToMicros(calendar, parsed, format));
if (parsed.is_special) {
return parsed.ToTimestamp();
} else {
return GetTime(calendar, ToMicros(calendar, parsed, format));
}
}
}

Expand Down Expand Up @@ -137,9 +141,13 @@ struct ICUStrptime : public ICUDateFunc {
ParseResult parsed;
for (auto &format : info.formats) {
if (format.Parse(input, parsed)) {
timestamp_t result;
if (TryGetTime(calendar, ToMicros(calendar, parsed, format), result)) {
return result;
if (parsed.is_special) {
return parsed.ToTimestamp();
} else {
timestamp_t result;
if (TryGetTime(calendar, ToMicros(calendar, parsed, format), result)) {
return result;
}
}
}
}
Expand Down Expand Up @@ -360,8 +368,7 @@ struct ICUStrftime : public ICUDateFunc {
if (Timestamp::IsFinite(input)) {
return Operation(calendar.get(), input, tz_name, format, result);
} else {
mask.SetInvalid(idx);
return string_t();
return StringVector::AddString(result, Timestamp::ToString(input));
}
});
}
Expand All @@ -375,8 +382,7 @@ struct ICUStrftime : public ICUDateFunc {

return Operation(calendar.get(), input, tz_name, format, result);
} else {
mask.SetInvalid(idx);
return string_t();
return StringVector::AddString(result, Timestamp::ToString(input));
}
});
}
Expand Down
2 changes: 1 addition & 1 deletion src/common/types/date.cpp
Expand Up @@ -190,7 +190,7 @@ bool Date::ParseDoubleDigit(const char *buf, idx_t len, idx_t &pos, int32_t &res
return false;
}

static bool TryConvertDateSpecial(const char *buf, idx_t len, idx_t &pos, const char *special) {
bool Date::TryConvertDateSpecial(const char *buf, idx_t len, idx_t &pos, const char *special) {
auto p = pos;
for (; p < len && *special; ++p) {
const auto s = *special++;
Expand Down
73 changes: 57 additions & 16 deletions src/function/scalar/strftime_format.cpp
Expand Up @@ -603,20 +603,19 @@ string StrTimeFormat::ParseFormatSpecifier(const string &format_string, StrTimeF
void StrfTimeFormat::ConvertDateVector(Vector &input, Vector &result, idx_t count) {
D_ASSERT(input.GetType().id() == LogicalTypeId::DATE);
D_ASSERT(result.GetType().id() == LogicalTypeId::VARCHAR);
UnaryExecutor::ExecuteWithNulls<date_t, string_t>(input, result, count,
[&](date_t input, ValidityMask &mask, idx_t idx) {
if (Date::IsFinite(input)) {
dtime_t time(0);
idx_t len = GetLength(input, time, 0, nullptr);
string_t target = StringVector::EmptyString(result, len);
FormatString(input, time, target.GetDataWriteable());
target.Finalize();
return target;
} else {
mask.SetInvalid(idx);
return string_t();
}
});
UnaryExecutor::ExecuteWithNulls<date_t, string_t>(
input, result, count, [&](date_t input, ValidityMask &mask, idx_t idx) {
if (Date::IsFinite(input)) {
dtime_t time(0);
idx_t len = GetLength(input, time, 0, nullptr);
string_t target = StringVector::EmptyString(result, len);
FormatString(input, time, target.GetDataWriteable());
target.Finalize();
return target;
} else {
return StringVector::AddString(result, Date::ToString(input));
}
});
}

void StrfTimeFormat::ConvertTimestampVector(Vector &input, Vector &result, idx_t count) {
Expand All @@ -634,8 +633,7 @@ void StrfTimeFormat::ConvertTimestampVector(Vector &input, Vector &result, idx_t
target.Finalize();
return target;
} else {
mask.SetInvalid(idx);
return string_t();
return StringVector::AddString(result, Timestamp::ToString(input));
}
});
}
Expand Down Expand Up @@ -733,7 +731,38 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) const {
data++;
size--;
}

// Check for specials
// Precheck for alphas for performance.
idx_t pos = 0;
result.is_special = false;
if (size > 4) {
if (StringUtil::CharacterIsAlpha(*data)) {
if (Date::TryConvertDateSpecial(data, size, pos, Date::PINF)) {
result.is_special = true;
result.special = date_t::infinity();
} else if (Date::TryConvertDateSpecial(data, size, pos, Date::EPOCH)) {
result.is_special = true;
result.special = date_t::epoch();
}
} else if (*data == '-' && Date::TryConvertDateSpecial(data, size, pos, Date::NINF)) {
result.is_special = true;
result.special = date_t::ninfinity();
}
}
if (result.is_special) {
// skip trailing spaces
while (pos < size && StringUtil::CharacterIsSpace(data[pos])) {
pos++;
}
if (pos != size) {
error_message = "Special timestamp did not match: trailing characters";
error_position = pos;
return false;
}
return true;
}

TimeSpecifierAMOrPM ampm = TimeSpecifierAMOrPM::TIME_SPECIFIER_NONE;

// Year offset state (Year+W/j)
Expand Down Expand Up @@ -1135,6 +1164,9 @@ string StrpTimeFormat::FormatStrpTimeError(const string &input, idx_t position)
}

date_t StrpTimeFormat::ParseResult::ToDate() {
if (is_special) {
return special;
}
return Date::FromDate(data[0], data[1], data[2]);
}

Expand All @@ -1143,6 +1175,15 @@ bool StrpTimeFormat::ParseResult::TryToDate(date_t &result) {
}

timestamp_t StrpTimeFormat::ParseResult::ToTimestamp() {
if (is_special) {
if (special == date_t::infinity()) {
return timestamp_t::infinity();
} else if (special == date_t::ninfinity()) {
return timestamp_t::ninfinity();
}
return Timestamp::FromDatetime(special, dtime_t(0));
}

date_t date = Date::FromDate(data[0], data[1], data[2]);
const auto hour_offset = data[7] / Interval::MINS_PER_HOUR;
const auto mins_offset = data[7] % Interval::MINS_PER_HOUR;
Expand Down
3 changes: 3 additions & 0 deletions src/include/duckdb/common/types/date.hpp
Expand Up @@ -122,6 +122,9 @@ class Date {
DUCKDB_API static date_t FromCString(const char *str, idx_t len, bool strict = false);
//! Convert a date object to a string in the format "YYYY-MM-DD"
DUCKDB_API static string ToString(date_t date);
//! Try to convert the string as a give "special" date (e.g, PINF, ...)
//! Returns true if it was successful and updates the scan pos.
DUCKDB_API static bool TryConvertDateSpecial(const char *buf, idx_t len, idx_t &pos, const char *special);
//! Try to convert text in a buffer to a date; returns true if parsing was successful
//! If the date was a "special" value, the special flag will be set.
DUCKDB_API static bool TryConvertDate(const char *buf, idx_t len, idx_t &pos, date_t &result, bool &special,
Expand Down
3 changes: 3 additions & 0 deletions src/include/duckdb/function/scalar/strftime_format.hpp
Expand Up @@ -131,6 +131,9 @@ struct StrpTimeFormat : public StrTimeFormat {
string error_message;
idx_t error_position = DConstants::INVALID_INDEX;

bool is_special;
date_t special;

date_t ToDate();
timestamp_t ToTimestamp();

Expand Down
12 changes: 7 additions & 5 deletions test/sql/function/date/test_strftime.test
Expand Up @@ -114,15 +114,17 @@ SELECT strftime('2019-01-23'::DATE, 42);

foreach datatype DATE TIMESTAMP

foreach special infinity -infinity

# PG to_char returns NULL here
# but we can do better.
query I
SELECT strftime('${special}'::${datatype}, '%Y-%m-%d');
SELECT strftime('infinity'::${datatype}, '%Y-%m-%d');
----
NULL
infinity

endloop
query I
SELECT strftime('-infinity'::${datatype}, '%Y-%m-%d');
----
-infinity

endloop

Expand Down
38 changes: 24 additions & 14 deletions test/sql/function/timestamp/test_icu_strftime.test
Expand Up @@ -16,56 +16,68 @@ PRAGMA enable_verification

statement ok
CREATE TABLE timestamps AS SELECT ts::TIMESTAMPTZ AS ts FROM (VALUES
('-infinity'),
('0044-03-13 (BC) 10:33:41.987654+01'),
('1962-07-31 12:20:48.123456+00'),
('epoch'),
('2021-01-01 00:00:00+00'),
('2021-02-02 00:00:00+00'),
('2021-11-26 10:15:13.123456+00'),
('2021-11-15 02:30:00-08'),
('2021-11-15 02:30:00-07'),
('2021-12-25 00:00:00+02'),
('infinity'),
(NULL),
) tbl(ts);

# String casts
query I
SELECT ts::VARCHAR FROM timestamps;
----
-infinity
0044-03-13 (BC) 01:40:43.987654-07:52
1962-07-31 05:20:48.123456-07
1969-12-31 16:00:00-08
2020-12-31 16:00:00-08
2021-02-01 16:00:00-08
2021-11-26 02:15:13.123456-08
2021-11-15 02:30:00-08
2021-11-15 01:30:00-08
2021-12-24 14:00:00-08
infinity
NULL

query II
SELECT ts, strftime(ts, '%Y-%m-%d %H:%M:%S.%f %Z') FROM timestamps;
----
-infinity -infinity
0044-03-13 (BC) 01:40:43.987654-07:52 -43-03-13 01:40:43.987654 America/Los_Angeles
1962-07-31 05:20:48.123456-07 1962-07-31 05:20:48.123456 America/Los_Angeles
1969-12-31 16:00:00-08 1969-12-31 16:00:00.000000 America/Los_Angeles
2020-12-31 16:00:00-08 2020-12-31 16:00:00.000000 America/Los_Angeles
2021-02-01 16:00:00-08 2021-02-01 16:00:00.000000 America/Los_Angeles
2021-11-26 02:15:13.123456-08 2021-11-26 02:15:13.123456 America/Los_Angeles
2021-11-15 02:30:00-08 2021-11-15 02:30:00.000000 America/Los_Angeles
2021-11-15 01:30:00-08 2021-11-15 01:30:00.000000 America/Los_Angeles
2021-12-24 14:00:00-08 2021-12-24 14:00:00.000000 America/Los_Angeles
infinity infinity
NULL NULL

# TZ at the start
query II
SELECT ts, strftime(ts, '%Z %Y-%m-%d %H:%M:%S.%f') FROM timestamps;
----
-infinity -infinity
0044-03-13 (BC) 01:40:43.987654-07:52 America/Los_Angeles -43-03-13 01:40:43.987654
1962-07-31 05:20:48.123456-07 America/Los_Angeles 1962-07-31 05:20:48.123456
1969-12-31 16:00:00-08 America/Los_Angeles 1969-12-31 16:00:00.000000
2020-12-31 16:00:00-08 America/Los_Angeles 2020-12-31 16:00:00.000000
2021-02-01 16:00:00-08 America/Los_Angeles 2021-02-01 16:00:00.000000
2021-11-26 02:15:13.123456-08 America/Los_Angeles 2021-11-26 02:15:13.123456
2021-11-15 02:30:00-08 America/Los_Angeles 2021-11-15 02:30:00.000000
2021-11-15 01:30:00-08 America/Los_Angeles 2021-11-15 01:30:00.000000
2021-12-24 14:00:00-08 America/Los_Angeles 2021-12-24 14:00:00.000000
infinity infinity
NULL NULL

#
Expand Down Expand Up @@ -195,27 +207,33 @@ SET TimeZone='Asia/Kathmandu';
query II
SELECT ts, strftime(ts, '%Y-%m-%d %H:%M:%S.%f %Z') FROM timestamps;
----
-infinity -infinity
0044-03-13 (BC) 15:14:57.987654+05:41 -43-03-13 15:14:57.987654 Asia/Kathmandu
1962-07-31 17:50:48.123456+05:30 1962-07-31 17:50:48.123456 Asia/Kathmandu
1970-01-01 05:30:00+05:30 1970-01-01 05:30:00.000000 Asia/Kathmandu
2021-01-01 05:45:00+05:45 2021-01-01 05:45:00.000000 Asia/Kathmandu
2021-02-02 05:45:00+05:45 2021-02-02 05:45:00.000000 Asia/Kathmandu
2021-11-26 16:00:13.123456+05:45 2021-11-26 16:00:13.123456 Asia/Kathmandu
2021-11-15 16:15:00+05:45 2021-11-15 16:15:00.000000 Asia/Kathmandu
2021-11-15 15:15:00+05:45 2021-11-15 15:15:00.000000 Asia/Kathmandu
2021-12-25 03:45:00+05:45 2021-12-25 03:45:00.000000 Asia/Kathmandu
infinity infinity
NULL NULL

query II
SELECT ts, strftime(ts, '%Y-%m-%d %H:%M:%S.%f %z') FROM timestamps;
----
-infinity -infinity
0044-03-13 (BC) 15:14:57.987654+05:41 -43-03-13 15:14:57.987654 +05:41
1962-07-31 17:50:48.123456+05:30 1962-07-31 17:50:48.123456 +05:30
1970-01-01 05:30:00+05:30 1970-01-01 05:30:00.000000 +05:30
2021-01-01 05:45:00+05:45 2021-01-01 05:45:00.000000 +05:45
2021-02-02 05:45:00+05:45 2021-02-02 05:45:00.000000 +05:45
2021-11-26 16:00:13.123456+05:45 2021-11-26 16:00:13.123456 +05:45
2021-11-15 16:15:00+05:45 2021-11-15 16:15:00.000000 +05:45
2021-11-15 15:15:00+05:45 2021-11-15 15:15:00.000000 +05:45
2021-12-25 03:45:00+05:45 2021-12-25 03:45:00.000000 +05:45
infinity infinity
NULL NULL

statement ok
Expand All @@ -224,43 +242,35 @@ SET TimeZone='Canada/Newfoundland';
query II
SELECT ts, strftime(ts, '%Y-%m-%d %H:%M:%S.%f %Z') FROM timestamps;
----
-infinity -infinity
0044-03-13 (BC) 06:02:49.987654-03:30 -43-03-13 06:02:49.987654 Canada/Newfoundland
1962-07-31 09:50:48.123456-02:30 1962-07-31 09:50:48.123456 Canada/Newfoundland
1969-12-31 20:30:00-03:30 1969-12-31 20:30:00.000000 Canada/Newfoundland
2020-12-31 20:30:00-03:30 2020-12-31 20:30:00.000000 Canada/Newfoundland
2021-02-01 20:30:00-03:30 2021-02-01 20:30:00.000000 Canada/Newfoundland
2021-11-26 06:45:13.123456-03:30 2021-11-26 06:45:13.123456 Canada/Newfoundland
2021-11-15 07:00:00-03:30 2021-11-15 07:00:00.000000 Canada/Newfoundland
2021-11-15 06:00:00-03:30 2021-11-15 06:00:00.000000 Canada/Newfoundland
2021-12-24 18:30:00-03:30 2021-12-24 18:30:00.000000 Canada/Newfoundland
infinity infinity
NULL NULL

query II
SELECT ts, strftime(ts, '%Y-%m-%d %H:%M:%S.%f %z') FROM timestamps;
----
-infinity -infinity
0044-03-13 (BC) 06:02:49.987654-03:30 -43-03-13 06:02:49.987654 -03:30
1962-07-31 09:50:48.123456-02:30 1962-07-31 09:50:48.123456 -02:30
1969-12-31 20:30:00-03:30 1969-12-31 20:30:00.000000 -03:30
2020-12-31 20:30:00-03:30 2020-12-31 20:30:00.000000 -03:30
2021-02-01 20:30:00-03:30 2021-02-01 20:30:00.000000 -03:30
2021-11-26 06:45:13.123456-03:30 2021-11-26 06:45:13.123456 -03:30
2021-11-15 07:00:00-03:30 2021-11-15 07:00:00.000000 -03:30
2021-11-15 06:00:00-03:30 2021-11-15 06:00:00.000000 -03:30
2021-12-24 18:30:00-03:30 2021-12-24 18:30:00.000000 -03:30
infinity infinity
NULL NULL

#
# Infinities
#

foreach special infinity -infinity

# PG to_char returns NULL here
query I
SELECT strftime('${special}'::TIMESTAMPTZ, '%Y-%m-%d');
----
NULL

endloop

#
# Errors/Coverage
#
Expand Down
21 changes: 21 additions & 0 deletions test/sql/function/timestamp/test_icu_strptime.test
Expand Up @@ -43,6 +43,27 @@ select strptime('2022-03-05 17:59:17.123456789 CST', '%Y-%m-%d %H:%M:%S.%n %Z');
----
2022-03-05 15:59:17.123457-08

# Parse specials

foreach func strptime try_strptime

query I
select ${func}('infinity', '%Y-%m-%d %H:%M:%S.%g %Z');
----
infinity

query I
select ${func}('-infinity', '%Y-%m-%d %H:%M:%S.%g %Z');
----
-infinity

query I
select ${func}('epoch', '%Y-%m-%d %H:%M:%S.%g %Z');
----
1969-12-31 16:00:00-08

endloop

#
# Parsing pre-Gregorian timestamps
#
Expand Down

0 comments on commit 57f7a78

Please sign in to comment.