Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue #9887: ISO Format Directives #9910

Merged
merged 3 commits into from Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
15 changes: 15 additions & 0 deletions src/common/enum_util.cpp
Expand Up @@ -5608,6 +5608,12 @@ const char* EnumUtil::ToChars<StrTimeSpecifier>(StrTimeSpecifier value) {
return "LOCALE_APPROPRIATE_TIME";
case StrTimeSpecifier::NANOSECOND_PADDED:
return "NANOSECOND_PADDED";
case StrTimeSpecifier::YEAR_ISO:
return "YEAR_ISO";
case StrTimeSpecifier::WEEKDAY_ISO:
return "WEEKDAY_ISO";
case StrTimeSpecifier::WEEK_NUMBER_ISO:
return "WEEK_NUMBER_ISO";
default:
throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
}
Expand Down Expand Up @@ -5714,6 +5720,15 @@ StrTimeSpecifier EnumUtil::FromString<StrTimeSpecifier>(const char *value) {
if (StringUtil::Equals(value, "NANOSECOND_PADDED")) {
return StrTimeSpecifier::NANOSECOND_PADDED;
}
if (StringUtil::Equals(value, "YEAR_ISO")) {
return StrTimeSpecifier::YEAR_ISO;
}
if (StringUtil::Equals(value, "WEEKDAY_ISO")) {
return StrTimeSpecifier::WEEKDAY_ISO;
}
if (StringUtil::Equals(value, "WEEK_NUMBER_ISO")) {
return StrTimeSpecifier::WEEK_NUMBER_ISO;
}
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
}

Expand Down
178 changes: 176 additions & 2 deletions src/function/scalar/strftime_format.cpp
Expand Up @@ -14,6 +14,7 @@ idx_t StrfTimepecifierSize(StrTimeSpecifier specifier) {
case StrTimeSpecifier::ABBREVIATED_MONTH_NAME:
return 3;
case StrTimeSpecifier::WEEKDAY_DECIMAL:
case StrTimeSpecifier::WEEKDAY_ISO:
return 1;
case StrTimeSpecifier::DAY_OF_MONTH_PADDED:
case StrTimeSpecifier::MONTH_DECIMAL_PADDED:
Expand All @@ -25,6 +26,7 @@ idx_t StrfTimepecifierSize(StrTimeSpecifier specifier) {
case StrTimeSpecifier::AM_PM:
case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST:
case StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST:
case StrTimeSpecifier::WEEK_NUMBER_ISO:
return 2;
case StrTimeSpecifier::NANOSECOND_PADDED:
return 9;
Expand All @@ -34,6 +36,8 @@ idx_t StrfTimepecifierSize(StrTimeSpecifier specifier) {
return 3;
case StrTimeSpecifier::DAY_OF_YEAR_PADDED:
return 3;
case StrTimeSpecifier::YEAR_ISO:
return 4;
default:
return 0;
}
Expand Down Expand Up @@ -210,7 +214,10 @@ bool StrfTimeFormat::IsDateSpecifier(StrTimeSpecifier specifier) {
case StrTimeSpecifier::DAY_OF_YEAR_DECIMAL:
case StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST:
case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST:
case StrTimeSpecifier::WEEK_NUMBER_ISO:
case StrTimeSpecifier::WEEKDAY_DECIMAL:
case StrTimeSpecifier::WEEKDAY_ISO:
case StrTimeSpecifier::YEAR_ISO:
return true;
default:
return false;
Expand Down Expand Up @@ -246,12 +253,22 @@ char *StrfTimeFormat::WriteDateSpecifier(StrTimeSpecifier specifier, date_t date
case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST:
target = WritePadded2(target, Date::ExtractWeekNumberRegular(date, false));
break;
case StrTimeSpecifier::WEEK_NUMBER_ISO:
target = WritePadded2(target, Date::ExtractISOWeekNumber(date));
break;
case StrTimeSpecifier::DAY_OF_YEAR_DECIMAL: {
uint32_t doy = Date::ExtractDayOfTheYear(date);
target += NumericHelper::UnsignedLength<uint32_t>(doy);
NumericHelper::FormatUnsigned(doy, target);
break;
}
case StrTimeSpecifier::YEAR_ISO:
target = WritePadded(target, Date::ExtractISOYearNumber(date), 4);
break;
case StrTimeSpecifier::WEEKDAY_ISO:
*target = char('0' + uint8_t(Date::ExtractISODayOfTheWeek(date)));
target++;
break;
default:
throw InternalException("Unimplemented date specifier for strftime");
}
Expand Down Expand Up @@ -493,6 +510,9 @@ string StrTimeFormat::ParseFormatSpecifier(const string &format_string, StrTimeF
case 'w':
specifier = StrTimeSpecifier::WEEKDAY_DECIMAL;
break;
case 'u':
specifier = StrTimeSpecifier::WEEKDAY_ISO;
break;
case 'd':
specifier = StrTimeSpecifier::DAY_OF_MONTH_PADDED;
break;
Expand All @@ -512,6 +532,9 @@ string StrTimeFormat::ParseFormatSpecifier(const string &format_string, StrTimeF
case 'Y':
specifier = StrTimeSpecifier::YEAR_DECIMAL;
break;
case 'G':
specifier = StrTimeSpecifier::YEAR_ISO;
break;
case 'H':
specifier = StrTimeSpecifier::HOUR_24_PADDED;
break;
Expand Down Expand Up @@ -551,6 +574,9 @@ string StrTimeFormat::ParseFormatSpecifier(const string &format_string, StrTimeF
case 'W':
specifier = StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST;
break;
case 'V':
specifier = StrTimeSpecifier::WEEK_NUMBER_ISO;
break;
case 'c':
case 'x':
case 'X':
Expand Down Expand Up @@ -646,6 +672,7 @@ void StrpTimeFormat::AddFormatSpecifier(string preceding_literal, StrTimeSpecifi
int StrpTimeFormat::NumericSpecifierWidth(StrTimeSpecifier specifier) {
switch (specifier) {
case StrTimeSpecifier::WEEKDAY_DECIMAL:
case StrTimeSpecifier::WEEKDAY_ISO:
return 1;
case StrTimeSpecifier::DAY_OF_MONTH_PADDED:
case StrTimeSpecifier::DAY_OF_MONTH:
Expand All @@ -663,12 +690,14 @@ int StrpTimeFormat::NumericSpecifierWidth(StrTimeSpecifier specifier) {
case StrTimeSpecifier::SECOND_DECIMAL:
case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST:
case StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST:
case StrTimeSpecifier::WEEK_NUMBER_ISO:
return 2;
case StrTimeSpecifier::MILLISECOND_PADDED:
case StrTimeSpecifier::DAY_OF_YEAR_PADDED:
case StrTimeSpecifier::DAY_OF_YEAR_DECIMAL:
return 3;
case StrTimeSpecifier::YEAR_DECIMAL:
case StrTimeSpecifier::YEAR_ISO:
return 4;
case StrTimeSpecifier::MICROSECOND_PADDED:
return 6;
Expand Down Expand Up @@ -772,6 +801,12 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) const {
uint64_t yearday = 0;
bool has_weekday = false;

// ISO state (%G/%V/%u)
// Out of range values to detect multiple specifications
uint64_t iso_year = 10000;
uint64_t iso_week = 54;
uint64_t iso_weekday = 8;

for (idx_t i = 0;; i++) {
D_ASSERT(i < literals.size());
// first compare the literal
Expand Down Expand Up @@ -843,6 +878,17 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) const {
break;
case StrTimeSpecifier::YEAR_WITHOUT_CENTURY_PADDED:
case StrTimeSpecifier::YEAR_WITHOUT_CENTURY:
switch (offset_specifier) {
case StrTimeSpecifier::YEAR_ISO:
case StrTimeSpecifier::WEEK_NUMBER_ISO:
// Override
case StrTimeSpecifier::WEEKDAY_DECIMAL:
// First offset specifier
offset_specifier = specifiers[i];
break;
default:
break;
}
// year without century..
// Python uses 69 as a crossover point (i.e. >= 69 is 19.., < 69 is 20..)
if (number >= 100) {
Expand All @@ -858,9 +904,59 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) const {
}
break;
case StrTimeSpecifier::YEAR_DECIMAL:
switch (offset_specifier) {
case StrTimeSpecifier::YEAR_ISO:
case StrTimeSpecifier::WEEK_NUMBER_ISO:
// Override
case StrTimeSpecifier::WEEKDAY_DECIMAL:
// First offset specifier
offset_specifier = specifiers[i];
break;
default:
break;
}
// year as full number
result_data[0] = number;
break;
case StrTimeSpecifier::YEAR_ISO:
switch (offset_specifier) {
// y/m/d overrides G/V/u but does not conflict
case StrTimeSpecifier::DAY_OF_MONTH_PADDED:
case StrTimeSpecifier::DAY_OF_MONTH:
case StrTimeSpecifier::MONTH_DECIMAL_PADDED:
case StrTimeSpecifier::MONTH_DECIMAL:
case StrTimeSpecifier::YEAR_WITHOUT_CENTURY_PADDED:
case StrTimeSpecifier::YEAR_WITHOUT_CENTURY:
case StrTimeSpecifier::YEAR_DECIMAL:
// Just validate, don't use
break;
case StrTimeSpecifier::WEEKDAY_DECIMAL:
// First offset specifier
offset_specifier = specifiers[i];
break;
case StrTimeSpecifier::YEAR_ISO:
case StrTimeSpecifier::WEEK_NUMBER_ISO:
// Already parsing ISO
if (iso_year <= 9999) {
error_message = "Multiple ISO year offsets specified";
error_position = start_pos;
return false;
}
break;
default:
error_message = "Incompatible ISO year offset specified";
error_position = start_pos;
return false;
break;
}
if (number > 9999) {
// %G only supports numbers between [0..9999]
error_message = "ISO Year out of range, expected a value between 0000 and 9999";
error_position = start_pos;
return false;
}
iso_year = number;
break;
case StrTimeSpecifier::HOUR_24_PADDED:
case StrTimeSpecifier::HOUR_24_DECIMAL:
if (number >= 24) {
Expand Down Expand Up @@ -926,12 +1022,16 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) const {
case StrTimeSpecifier::MONTH_DECIMAL:
// Just validate, don't use
break;
case StrTimeSpecifier::YEAR_WITHOUT_CENTURY_PADDED:
case StrTimeSpecifier::YEAR_WITHOUT_CENTURY:
case StrTimeSpecifier::YEAR_DECIMAL:
// Switch to offset parsing
case StrTimeSpecifier::WEEKDAY_DECIMAL:
// First offset specifier
offset_specifier = specifiers[i];
break;
default:
error_message = "Multiple year offsets specified";
error_message = "Multiple week offsets specified";
error_position = start_pos;
return false;
}
Expand All @@ -951,6 +1051,57 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) const {
has_weekday = true;
weekday = number;
break;
case StrTimeSpecifier::WEEK_NUMBER_ISO:
// y/m/d overrides G/V/u but does not conflict
switch (offset_specifier) {
case StrTimeSpecifier::DAY_OF_MONTH_PADDED:
case StrTimeSpecifier::DAY_OF_MONTH:
case StrTimeSpecifier::MONTH_DECIMAL_PADDED:
case StrTimeSpecifier::MONTH_DECIMAL:
case StrTimeSpecifier::YEAR_WITHOUT_CENTURY_PADDED:
case StrTimeSpecifier::YEAR_WITHOUT_CENTURY:
case StrTimeSpecifier::YEAR_DECIMAL:
// Just validate, don't use
break;
break;
case StrTimeSpecifier::WEEKDAY_DECIMAL:
// First offset specifier
offset_specifier = specifiers[i];
break;
case StrTimeSpecifier::WEEK_NUMBER_ISO:
case StrTimeSpecifier::YEAR_ISO:
// Already parsing ISO
if (iso_week <= 53) {
error_message = "Multiple ISO week offsets specified";
error_position = start_pos;
return false;
}
break;
default:
error_message = "Incompatible ISO week offset specified";
error_position = start_pos;
return false;
}
if (number < 1 || number > 53) {
error_message = "ISO week offset out of range, expected a value between 1 and 53";
error_position = start_pos;
return false;
}
iso_week = number;
break;
case StrTimeSpecifier::WEEKDAY_ISO:
if (iso_weekday <= 7) {
error_message = "Multiple ISO weekday offsets specified";
error_position = start_pos;
return false;
}
if (number < 1 || number > 7) {
error_message = "ISO weekday offset out of range, expected a value between 1 and 7";
error_position = start_pos;
return false;
}
iso_weekday = number;
break;
case StrTimeSpecifier::DAY_OF_YEAR_PADDED:
case StrTimeSpecifier::DAY_OF_YEAR_DECIMAL:
// m/d overrides j but does not conflict
Expand All @@ -961,6 +1112,11 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) const {
case StrTimeSpecifier::MONTH_DECIMAL:
// Just validate, don't use
break;
case StrTimeSpecifier::YEAR_WITHOUT_CENTURY_PADDED:
case StrTimeSpecifier::YEAR_WITHOUT_CENTURY:
case StrTimeSpecifier::YEAR_DECIMAL:
// Part of the offset
break;
case StrTimeSpecifier::WEEKDAY_DECIMAL:
// First offset specifier
offset_specifier = specifiers[i];
Expand Down Expand Up @@ -1109,6 +1265,21 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) const {
}
}
switch (offset_specifier) {
case StrTimeSpecifier::YEAR_ISO:
case StrTimeSpecifier::WEEK_NUMBER_ISO: {
// Default to 1900-01-01
iso_year = (iso_year > 9999) ? 1900 : iso_year;
iso_week = (iso_week > 53) ? 1 : iso_week;
iso_weekday = (iso_weekday > 7) ? 1 : iso_weekday;
// Gregorian and ISO agree on the year of January 4
auto jan4 = Date::FromDate(iso_year, 1, 4);
// ISO Week 1 starts on the previous Monday
auto week1 = Date::GetMondayOfCurrentWeek(jan4);
// ISO Week N starts N-1 weeks later
auto iso_date = week1 + (iso_week - 1) * 7 + (iso_weekday - 1);
Date::Convert(iso_date, result_data[0], result_data[1], result_data[2]);
break;
}
case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST:
case StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST: {
// Adjust weekday to be 0-based for the week type
Expand Down Expand Up @@ -1136,7 +1307,10 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) const {
case StrTimeSpecifier::DAY_OF_MONTH:
case StrTimeSpecifier::MONTH_DECIMAL_PADDED:
case StrTimeSpecifier::MONTH_DECIMAL:
// m/d overrides UWw/j
case StrTimeSpecifier::YEAR_WITHOUT_CENTURY_PADDED:
case StrTimeSpecifier::YEAR_WITHOUT_CENTURY:
case StrTimeSpecifier::YEAR_DECIMAL:
// m/d overrides UWVwu/j
break;
default:
D_ASSERT(offset_specifier == StrTimeSpecifier::WEEKDAY_DECIMAL);
Expand Down
8 changes: 7 additions & 1 deletion src/include/duckdb/function/scalar/strftime_format.hpp
Expand Up @@ -53,7 +53,13 @@ enum class StrTimeSpecifier : uint8_t {
29, // %c - Locale’s appropriate date and time representation. (Mon Sep 30 07:06:05 2013)
LOCALE_APPROPRIATE_DATE = 30, // %x - Locale’s appropriate date representation. (09/30/13)
LOCALE_APPROPRIATE_TIME = 31, // %X - Locale’s appropriate time representation. (07:06:05)
NANOSECOND_PADDED = 32 // %n - Nanosecond as a decimal number, zero-padded on the left. (000000000 - 999999999)
NANOSECOND_PADDED = 32, // %n - Nanosecond as a decimal number, zero-padded on the left. (000000000 - 999999999)
// Python 3.6 ISO directives https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes
YEAR_ISO =
33, // %G - ISO 8601 year with century representing the year that contains the greater part of the ISO week
WEEKDAY_ISO = 34, // %u - ISO 8601 weekday as a decimal number where 1 is Monday (1..7)
WEEK_NUMBER_ISO = 35, // %V - ISO 8601 week as a decimal number with Monday as the first day of the week.
// Week 01 is the week containing Jan 4. (01..53)
};

struct StrTimeFormat {
Expand Down