Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Internal #576: strptime strftime infinities #9615

Merged
merged 3 commits into from Nov 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 14 additions & 8 deletions extension/icu/icu-strptime.cpp
Expand Up @@ -107,7 +107,11 @@ struct ICUStrptime : public ICUDateFunc {
ParseResult parsed;
for (auto &format : info.formats) {
if (format.Parse(input, parsed)) {
return GetTime(calendar, ToMicros(calendar, parsed, format));
if (parsed.is_special) {
return parsed.ToTimestamp();
} else {
return GetTime(calendar, ToMicros(calendar, parsed, format));
}
}
}

Expand Down Expand Up @@ -137,9 +141,13 @@ struct ICUStrptime : public ICUDateFunc {
ParseResult parsed;
for (auto &format : info.formats) {
if (format.Parse(input, parsed)) {
timestamp_t result;
if (TryGetTime(calendar, ToMicros(calendar, parsed, format), result)) {
return result;
if (parsed.is_special) {
return parsed.ToTimestamp();
} else {
timestamp_t result;
if (TryGetTime(calendar, ToMicros(calendar, parsed, format), result)) {
return result;
}
}
}
}
Expand Down Expand Up @@ -360,8 +368,7 @@ struct ICUStrftime : public ICUDateFunc {
if (Timestamp::IsFinite(input)) {
return Operation(calendar.get(), input, tz_name, format, result);
} else {
mask.SetInvalid(idx);
return string_t();
return StringVector::AddString(result, Timestamp::ToString(input));
}
});
}
Expand All @@ -375,8 +382,7 @@ struct ICUStrftime : public ICUDateFunc {

return Operation(calendar.get(), input, tz_name, format, result);
} else {
mask.SetInvalid(idx);
return string_t();
return StringVector::AddString(result, Timestamp::ToString(input));
}
});
}
Expand Down
2 changes: 1 addition & 1 deletion src/common/types/date.cpp
Expand Up @@ -190,7 +190,7 @@ bool Date::ParseDoubleDigit(const char *buf, idx_t len, idx_t &pos, int32_t &res
return false;
}

static bool TryConvertDateSpecial(const char *buf, idx_t len, idx_t &pos, const char *special) {
bool Date::TryConvertDateSpecial(const char *buf, idx_t len, idx_t &pos, const char *special) {
auto p = pos;
for (; p < len && *special; ++p) {
const auto s = *special++;
Expand Down
73 changes: 57 additions & 16 deletions src/function/scalar/strftime_format.cpp
Expand Up @@ -603,20 +603,19 @@ string StrTimeFormat::ParseFormatSpecifier(const string &format_string, StrTimeF
void StrfTimeFormat::ConvertDateVector(Vector &input, Vector &result, idx_t count) {
D_ASSERT(input.GetType().id() == LogicalTypeId::DATE);
D_ASSERT(result.GetType().id() == LogicalTypeId::VARCHAR);
UnaryExecutor::ExecuteWithNulls<date_t, string_t>(input, result, count,
[&](date_t input, ValidityMask &mask, idx_t idx) {
if (Date::IsFinite(input)) {
dtime_t time(0);
idx_t len = GetLength(input, time, 0, nullptr);
string_t target = StringVector::EmptyString(result, len);
FormatString(input, time, target.GetDataWriteable());
target.Finalize();
return target;
} else {
mask.SetInvalid(idx);
return string_t();
}
});
UnaryExecutor::ExecuteWithNulls<date_t, string_t>(
input, result, count, [&](date_t input, ValidityMask &mask, idx_t idx) {
if (Date::IsFinite(input)) {
dtime_t time(0);
idx_t len = GetLength(input, time, 0, nullptr);
string_t target = StringVector::EmptyString(result, len);
FormatString(input, time, target.GetDataWriteable());
target.Finalize();
return target;
} else {
return StringVector::AddString(result, Date::ToString(input));
}
});
}

void StrfTimeFormat::ConvertTimestampVector(Vector &input, Vector &result, idx_t count) {
Expand All @@ -634,8 +633,7 @@ void StrfTimeFormat::ConvertTimestampVector(Vector &input, Vector &result, idx_t
target.Finalize();
return target;
} else {
mask.SetInvalid(idx);
return string_t();
return StringVector::AddString(result, Timestamp::ToString(input));
}
});
}
Expand Down Expand Up @@ -733,7 +731,38 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) const {
data++;
size--;
}

// Check for specials
// Precheck for alphas for performance.
idx_t pos = 0;
result.is_special = false;
if (size > 4) {
if (StringUtil::CharacterIsAlpha(*data)) {
if (Date::TryConvertDateSpecial(data, size, pos, Date::PINF)) {
result.is_special = true;
result.special = date_t::infinity();
} else if (Date::TryConvertDateSpecial(data, size, pos, Date::EPOCH)) {
result.is_special = true;
result.special = date_t::epoch();
}
} else if (*data == '-' && Date::TryConvertDateSpecial(data, size, pos, Date::NINF)) {
result.is_special = true;
result.special = date_t::ninfinity();
}
}
if (result.is_special) {
// skip trailing spaces
while (pos < size && StringUtil::CharacterIsSpace(data[pos])) {
pos++;
}
if (pos != size) {
error_message = "Special timestamp did not match: trailing characters";
error_position = pos;
return false;
}
return true;
}

TimeSpecifierAMOrPM ampm = TimeSpecifierAMOrPM::TIME_SPECIFIER_NONE;

// Year offset state (Year+W/j)
Expand Down Expand Up @@ -1135,6 +1164,9 @@ string StrpTimeFormat::FormatStrpTimeError(const string &input, idx_t position)
}

date_t StrpTimeFormat::ParseResult::ToDate() {
if (is_special) {
return special;
}
return Date::FromDate(data[0], data[1], data[2]);
}

Expand All @@ -1143,6 +1175,15 @@ bool StrpTimeFormat::ParseResult::TryToDate(date_t &result) {
}

timestamp_t StrpTimeFormat::ParseResult::ToTimestamp() {
if (is_special) {
if (special == date_t::infinity()) {
return timestamp_t::infinity();
} else if (special == date_t::ninfinity()) {
return timestamp_t::ninfinity();
}
return Timestamp::FromDatetime(special, dtime_t(0));
}

date_t date = Date::FromDate(data[0], data[1], data[2]);
const auto hour_offset = data[7] / Interval::MINS_PER_HOUR;
const auto mins_offset = data[7] % Interval::MINS_PER_HOUR;
Expand Down
3 changes: 3 additions & 0 deletions src/include/duckdb/common/types/date.hpp
Expand Up @@ -122,6 +122,9 @@ class Date {
DUCKDB_API static date_t FromCString(const char *str, idx_t len, bool strict = false);
//! Convert a date object to a string in the format "YYYY-MM-DD"
DUCKDB_API static string ToString(date_t date);
//! Try to convert the string as a give "special" date (e.g, PINF, ...)
//! Returns true if it was successful and updates the scan pos.
DUCKDB_API static bool TryConvertDateSpecial(const char *buf, idx_t len, idx_t &pos, const char *special);
//! Try to convert text in a buffer to a date; returns true if parsing was successful
//! If the date was a "special" value, the special flag will be set.
DUCKDB_API static bool TryConvertDate(const char *buf, idx_t len, idx_t &pos, date_t &result, bool &special,
Expand Down
3 changes: 3 additions & 0 deletions src/include/duckdb/function/scalar/strftime_format.hpp
Expand Up @@ -131,6 +131,9 @@ struct StrpTimeFormat : public StrTimeFormat {
string error_message;
idx_t error_position = DConstants::INVALID_INDEX;

bool is_special;
date_t special;

date_t ToDate();
timestamp_t ToTimestamp();

Expand Down
12 changes: 7 additions & 5 deletions test/sql/function/date/test_strftime.test
Expand Up @@ -114,15 +114,17 @@ SELECT strftime('2019-01-23'::DATE, 42);

foreach datatype DATE TIMESTAMP

foreach special infinity -infinity

# PG to_char returns NULL here
# but we can do better.
query I
SELECT strftime('${special}'::${datatype}, '%Y-%m-%d');
SELECT strftime('infinity'::${datatype}, '%Y-%m-%d');
----
NULL
infinity

endloop
query I
SELECT strftime('-infinity'::${datatype}, '%Y-%m-%d');
----
-infinity

endloop

Expand Down
38 changes: 24 additions & 14 deletions test/sql/function/timestamp/test_icu_strftime.test
Expand Up @@ -16,56 +16,68 @@ PRAGMA enable_verification

statement ok
CREATE TABLE timestamps AS SELECT ts::TIMESTAMPTZ AS ts FROM (VALUES
('-infinity'),
('0044-03-13 (BC) 10:33:41.987654+01'),
('1962-07-31 12:20:48.123456+00'),
('epoch'),
('2021-01-01 00:00:00+00'),
('2021-02-02 00:00:00+00'),
('2021-11-26 10:15:13.123456+00'),
('2021-11-15 02:30:00-08'),
('2021-11-15 02:30:00-07'),
('2021-12-25 00:00:00+02'),
('infinity'),
(NULL),
) tbl(ts);

# String casts
query I
SELECT ts::VARCHAR FROM timestamps;
----
-infinity
0044-03-13 (BC) 01:40:43.987654-07:52
1962-07-31 05:20:48.123456-07
1969-12-31 16:00:00-08
2020-12-31 16:00:00-08
2021-02-01 16:00:00-08
2021-11-26 02:15:13.123456-08
2021-11-15 02:30:00-08
2021-11-15 01:30:00-08
2021-12-24 14:00:00-08
infinity
NULL

query II
SELECT ts, strftime(ts, '%Y-%m-%d %H:%M:%S.%f %Z') FROM timestamps;
----
-infinity -infinity
0044-03-13 (BC) 01:40:43.987654-07:52 -43-03-13 01:40:43.987654 America/Los_Angeles
1962-07-31 05:20:48.123456-07 1962-07-31 05:20:48.123456 America/Los_Angeles
1969-12-31 16:00:00-08 1969-12-31 16:00:00.000000 America/Los_Angeles
2020-12-31 16:00:00-08 2020-12-31 16:00:00.000000 America/Los_Angeles
2021-02-01 16:00:00-08 2021-02-01 16:00:00.000000 America/Los_Angeles
2021-11-26 02:15:13.123456-08 2021-11-26 02:15:13.123456 America/Los_Angeles
2021-11-15 02:30:00-08 2021-11-15 02:30:00.000000 America/Los_Angeles
2021-11-15 01:30:00-08 2021-11-15 01:30:00.000000 America/Los_Angeles
2021-12-24 14:00:00-08 2021-12-24 14:00:00.000000 America/Los_Angeles
infinity infinity
NULL NULL

# TZ at the start
query II
SELECT ts, strftime(ts, '%Z %Y-%m-%d %H:%M:%S.%f') FROM timestamps;
----
-infinity -infinity
0044-03-13 (BC) 01:40:43.987654-07:52 America/Los_Angeles -43-03-13 01:40:43.987654
1962-07-31 05:20:48.123456-07 America/Los_Angeles 1962-07-31 05:20:48.123456
1969-12-31 16:00:00-08 America/Los_Angeles 1969-12-31 16:00:00.000000
2020-12-31 16:00:00-08 America/Los_Angeles 2020-12-31 16:00:00.000000
2021-02-01 16:00:00-08 America/Los_Angeles 2021-02-01 16:00:00.000000
2021-11-26 02:15:13.123456-08 America/Los_Angeles 2021-11-26 02:15:13.123456
2021-11-15 02:30:00-08 America/Los_Angeles 2021-11-15 02:30:00.000000
2021-11-15 01:30:00-08 America/Los_Angeles 2021-11-15 01:30:00.000000
2021-12-24 14:00:00-08 America/Los_Angeles 2021-12-24 14:00:00.000000
infinity infinity
NULL NULL

#
Expand Down Expand Up @@ -195,27 +207,33 @@ SET TimeZone='Asia/Kathmandu';
query II
SELECT ts, strftime(ts, '%Y-%m-%d %H:%M:%S.%f %Z') FROM timestamps;
----
-infinity -infinity
0044-03-13 (BC) 15:14:57.987654+05:41 -43-03-13 15:14:57.987654 Asia/Kathmandu
1962-07-31 17:50:48.123456+05:30 1962-07-31 17:50:48.123456 Asia/Kathmandu
1970-01-01 05:30:00+05:30 1970-01-01 05:30:00.000000 Asia/Kathmandu
2021-01-01 05:45:00+05:45 2021-01-01 05:45:00.000000 Asia/Kathmandu
2021-02-02 05:45:00+05:45 2021-02-02 05:45:00.000000 Asia/Kathmandu
2021-11-26 16:00:13.123456+05:45 2021-11-26 16:00:13.123456 Asia/Kathmandu
2021-11-15 16:15:00+05:45 2021-11-15 16:15:00.000000 Asia/Kathmandu
2021-11-15 15:15:00+05:45 2021-11-15 15:15:00.000000 Asia/Kathmandu
2021-12-25 03:45:00+05:45 2021-12-25 03:45:00.000000 Asia/Kathmandu
infinity infinity
NULL NULL

query II
SELECT ts, strftime(ts, '%Y-%m-%d %H:%M:%S.%f %z') FROM timestamps;
----
-infinity -infinity
0044-03-13 (BC) 15:14:57.987654+05:41 -43-03-13 15:14:57.987654 +05:41
1962-07-31 17:50:48.123456+05:30 1962-07-31 17:50:48.123456 +05:30
1970-01-01 05:30:00+05:30 1970-01-01 05:30:00.000000 +05:30
2021-01-01 05:45:00+05:45 2021-01-01 05:45:00.000000 +05:45
2021-02-02 05:45:00+05:45 2021-02-02 05:45:00.000000 +05:45
2021-11-26 16:00:13.123456+05:45 2021-11-26 16:00:13.123456 +05:45
2021-11-15 16:15:00+05:45 2021-11-15 16:15:00.000000 +05:45
2021-11-15 15:15:00+05:45 2021-11-15 15:15:00.000000 +05:45
2021-12-25 03:45:00+05:45 2021-12-25 03:45:00.000000 +05:45
infinity infinity
NULL NULL

statement ok
Expand All @@ -224,43 +242,35 @@ SET TimeZone='Canada/Newfoundland';
query II
SELECT ts, strftime(ts, '%Y-%m-%d %H:%M:%S.%f %Z') FROM timestamps;
----
-infinity -infinity
0044-03-13 (BC) 06:02:49.987654-03:30 -43-03-13 06:02:49.987654 Canada/Newfoundland
1962-07-31 09:50:48.123456-02:30 1962-07-31 09:50:48.123456 Canada/Newfoundland
1969-12-31 20:30:00-03:30 1969-12-31 20:30:00.000000 Canada/Newfoundland
2020-12-31 20:30:00-03:30 2020-12-31 20:30:00.000000 Canada/Newfoundland
2021-02-01 20:30:00-03:30 2021-02-01 20:30:00.000000 Canada/Newfoundland
2021-11-26 06:45:13.123456-03:30 2021-11-26 06:45:13.123456 Canada/Newfoundland
2021-11-15 07:00:00-03:30 2021-11-15 07:00:00.000000 Canada/Newfoundland
2021-11-15 06:00:00-03:30 2021-11-15 06:00:00.000000 Canada/Newfoundland
2021-12-24 18:30:00-03:30 2021-12-24 18:30:00.000000 Canada/Newfoundland
infinity infinity
NULL NULL

query II
SELECT ts, strftime(ts, '%Y-%m-%d %H:%M:%S.%f %z') FROM timestamps;
----
-infinity -infinity
0044-03-13 (BC) 06:02:49.987654-03:30 -43-03-13 06:02:49.987654 -03:30
1962-07-31 09:50:48.123456-02:30 1962-07-31 09:50:48.123456 -02:30
1969-12-31 20:30:00-03:30 1969-12-31 20:30:00.000000 -03:30
2020-12-31 20:30:00-03:30 2020-12-31 20:30:00.000000 -03:30
2021-02-01 20:30:00-03:30 2021-02-01 20:30:00.000000 -03:30
2021-11-26 06:45:13.123456-03:30 2021-11-26 06:45:13.123456 -03:30
2021-11-15 07:00:00-03:30 2021-11-15 07:00:00.000000 -03:30
2021-11-15 06:00:00-03:30 2021-11-15 06:00:00.000000 -03:30
2021-12-24 18:30:00-03:30 2021-12-24 18:30:00.000000 -03:30
infinity infinity
NULL NULL

#
# Infinities
#

foreach special infinity -infinity

# PG to_char returns NULL here
query I
SELECT strftime('${special}'::TIMESTAMPTZ, '%Y-%m-%d');
----
NULL

endloop

#
# Errors/Coverage
#
Expand Down
21 changes: 21 additions & 0 deletions test/sql/function/timestamp/test_icu_strptime.test
Expand Up @@ -43,6 +43,27 @@ select strptime('2022-03-05 17:59:17.123456789 CST', '%Y-%m-%d %H:%M:%S.%n %Z');
----
2022-03-05 15:59:17.123457-08

# Parse specials

foreach func strptime try_strptime

query I
select ${func}('infinity', '%Y-%m-%d %H:%M:%S.%g %Z');
----
infinity

query I
select ${func}('-infinity', '%Y-%m-%d %H:%M:%S.%g %Z');
----
-infinity

query I
select ${func}('epoch', '%Y-%m-%d %H:%M:%S.%g %Z');
----
1969-12-31 16:00:00-08

endloop

#
# Parsing pre-Gregorian timestamps
#
Expand Down