From 1286193796521a8df5d0c47f10fa3741c1738f4a Mon Sep 17 00:00:00 2001 From: Gabriel Date: Wed, 21 Sep 2022 15:18:22 +0800 Subject: [PATCH 1/3] [Improvement](dateformat) Improve efficiency for function `date_format` --- be/src/vec/functions/date_time_transforms.h | 1 + be/src/vec/runtime/vdatetime_value.cpp | 176 ++++++++++++-------- 2 files changed, 107 insertions(+), 70 deletions(-) diff --git a/be/src/vec/functions/date_time_transforms.h b/be/src/vec/functions/date_time_transforms.h index 6f733cc21866ec..82557132219915 100644 --- a/be/src/vec/functions/date_time_transforms.h +++ b/be/src/vec/functions/date_time_transforms.h @@ -338,6 +338,7 @@ struct TransformerToStringTwoArgument { PaddedPODArray& null_map) { auto len = ts.size(); res_offsets.resize(len); + res_data.reserve(len * format.size() + len); null_map.resize_fill(len, false); size_t offset = 0; diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index 5d5c71ac324692..c9e740de9a38de 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -587,8 +587,32 @@ int VecDateTimeValue::compute_format_len(const char* format, int len) { return size; } +static const char digits100[201] = + "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899"; + +char* write_two_digits_to_string(int number, char* dst) { + memcpy(dst, &digits100[number * 2], 2); + return dst + 2; +} + +char* write_four_digits_to_string(int number, char* dst) { + memcpy(dst, &digits100[(number / 100) * 2], 2); + memcpy(dst + 2, &digits100[(number % 100) * 2], 2); + return dst + 4; +} + bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) const { char buf[64]; + char* cursor = buf; char* pos = NULL; const char* ptr = format; const char* end = format + len; @@ -602,6 +626,54 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c // Skip '%' ptr++; switch (ch = *ptr++) { + case 'y': + // Year, numeric (two digits) + to = write_two_digits_to_string(_year % 100, to); + cursor += 2; + pos = cursor; + break; + case 'Y': + // Year, numeric, four digits + to = write_four_digits_to_string(_year, to); + cursor += 4; + pos = cursor; + break; + case 'd': + // Day of month (00...31) + to = write_two_digits_to_string(_day, to); + cursor += 2; + pos = cursor; + break; + case 'H': + to = write_two_digits_to_string(_hour, to); + cursor += 2; + pos = cursor; + break; + case 'i': + // Minutes, numeric (00..59) + to = write_two_digits_to_string(_minute, to); + cursor += 2; + pos = cursor; + break; + case 'm': + to = write_two_digits_to_string(_month, to); + cursor += 2; + pos = cursor; + break; + case 'h': + case 'I': + // Hour (01..12) + to = write_two_digits_to_string((_hour % 24 + 11) % 12 + 1, to); + cursor += 2; + pos = cursor; + break; + case 's': + case 'S': + // Seconds (00..59) + to = write_two_digits_to_string(_second, to); + cursor += 2; + pos = cursor; + break; case 'a': // Abbreviated weekday name if (_type == TIME_TIME || (_year == 0 && _month == 0)) { @@ -618,18 +690,13 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c break; case 'c': // Month, numeric (0...12) - pos = int_to_str(_month, buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); - break; - case 'd': - // Day of month (00...31) - pos = int_to_str(_day, buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + pos = int_to_str(_month, cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'D': // Day of the month with English suffix (0th, 1st, ...) - pos = int_to_str(_day, buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); + pos = int_to_str(_day, cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); if (_day >= 10 && _day <= 19) { to = append_string("th", to); } else { @@ -651,49 +718,28 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c break; case 'e': // Day of the month, numeric (0..31) - pos = int_to_str(_day, buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); + pos = int_to_str(_day, cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'f': // Microseconds (000000..999999) - pos = int_to_str(0, buf); - to = append_with_prefix(buf, pos - buf, '0', 6, to); - break; - case 'h': - case 'I': - // Hour (01..12) - pos = int_to_str((_hour % 24 + 11) % 12 + 1, buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); - break; - case 'H': - // Hour (00..23) - pos = int_to_str(_hour, buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); - break; - case 'i': - // Minutes, numeric (00..59) - pos = int_to_str(_minute, buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + pos = int_to_str(0, cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 6, to); break; case 'j': // Day of year (001..366) - pos = int_to_str(daynr() - doris::calc_daynr(_year, 1, 1) + 1, buf); - to = append_with_prefix(buf, pos - buf, '0', 3, to); + pos = int_to_str(daynr() - doris::calc_daynr(_year, 1, 1) + 1, cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 3, to); break; case 'k': // Hour (0..23) - pos = int_to_str(_hour, buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); + pos = int_to_str(_hour, cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'l': // Hour (1..12) - pos = int_to_str((_hour % 24 + 11) % 12 + 1, buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); - break; - case 'm': - // Month, numeric (00..12) - pos = int_to_str(_month, buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + pos = int_to_str((_hour % 24 + 11) % 12 + 1, cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'M': // Month name (January..December) @@ -728,12 +774,6 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c to = append_string(" AM", to); } break; - case 's': - case 'S': - // Seconds (00..59) - pos = int_to_str(_second, buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); - break; case 'T': // Time, 24-hour (hh:mm:ss) *to++ = (char)('0' + ((_hour % 24) / 10)); @@ -753,8 +793,9 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c if (_type == TIME_TIME) { return false; } - pos = int_to_str(week(mysql_week_mode(1)), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + to = write_two_digits_to_string(week(mysql_week_mode(1)), to); + cursor += 2; + pos = cursor; break; case 'U': // Week (00..53), where Sunday is the first day of the week; @@ -762,8 +803,9 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c if (_type == TIME_TIME) { return false; } - pos = int_to_str(week(mysql_week_mode(0)), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + to = write_two_digits_to_string(week(mysql_week_mode(0)), to); + cursor += 2; + pos = cursor; break; case 'v': // Week (01..53), where Monday is the first day of the week; @@ -771,8 +813,9 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c if (_type == TIME_TIME) { return false; } - pos = int_to_str(week(mysql_week_mode(3)), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + to = write_two_digits_to_string(week(mysql_week_mode(3)), to); + cursor += 2; + pos = cursor; break; case 'V': // Week (01..53), where Sunday is the first day of the week; @@ -780,16 +823,17 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c if (_type == TIME_TIME) { return false; } - pos = int_to_str(week(mysql_week_mode(2)), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + to = write_two_digits_to_string(week(mysql_week_mode(2)), to); + cursor += 2; + pos = cursor; break; case 'w': // Day of the week (0=Sunday..6=Saturday) if (_type == TIME_TIME || (_month == 0 && _year == 0)) { return false; } - pos = int_to_str(doris::calc_weekday(daynr(), true), buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); + pos = int_to_str(doris::calc_weekday(daynr(), true), cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'W': // Weekday name (Sunday..Saturday) @@ -803,8 +847,9 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c } uint32_t year = 0; calc_week(*this, mysql_week_mode(3), &year, true); - pos = int_to_str(year, buf); - to = append_with_prefix(buf, pos - buf, '0', 4, to); + to = write_four_digits_to_string(_year, to); + cursor += 4; + pos = cursor; break; } case 'X': { @@ -815,20 +860,11 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c } uint32_t year = 0; calc_week(*this, mysql_week_mode(2), &year); - pos = int_to_str(year, buf); - to = append_with_prefix(buf, pos - buf, '0', 4, to); + to = write_four_digits_to_string(_year, to); + cursor += 4; + pos = cursor; break; } - case 'y': - // Year, numeric (two digits) - pos = int_to_str(_year % 100, buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); - break; - case 'Y': - // Year, numeric, four digits - pos = int_to_str(_year, buf); - to = append_with_prefix(buf, pos - buf, '0', 4, to); - break; default: *to++ = ch; break; From 8c4288d52f83aa4f7ab1e109b0bd3b212d931b0c Mon Sep 17 00:00:00 2001 From: Gabriel Date: Wed, 21 Sep 2022 15:56:15 +0800 Subject: [PATCH 2/3] update --- be/src/vec/runtime/vdatetime_value.cpp | 167 ++++++++++++++----------- 1 file changed, 92 insertions(+), 75 deletions(-) diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index c9e740de9a38de..87f0eddacbe2e6 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -847,7 +847,7 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c } uint32_t year = 0; calc_week(*this, mysql_week_mode(3), &year, true); - to = write_four_digits_to_string(_year, to); + to = write_four_digits_to_string(year, to); cursor += 4; pos = cursor; break; @@ -860,7 +860,7 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c } uint32_t year = 0; calc_week(*this, mysql_week_mode(2), &year); - to = write_four_digits_to_string(_year, to); + to = write_four_digits_to_string(year, to); cursor += 4; pos = cursor; break; @@ -1050,7 +1050,7 @@ static bool str_to_int64(const char* ptr, const char** endptr, int64_t* ret) { return false; } // Skip '0' - while (ptr < end && *ptr == '0') { + while (ptr < end && * ptr == '0') { ptr++; } const char* n_end = ptr + 9; @@ -1091,7 +1091,7 @@ static int find_in_lib(const char* lib[], const char* str, const char* end) { for (; lib[pos] != NULL; ++pos) { const char* i = str; const char* j = lib[pos]; - while (i < end && *j) { + while (i < end && * j) { if (toupper(*i) != toupper(*j)) { break; } @@ -2724,6 +2724,7 @@ template bool DateV2Value::to_format_string(const char* format, int len, char* to) const { char buf[64]; char* pos = nullptr; + char* cursor = buf; const char* ptr = format; const char* end = format + len; char ch = '\0'; @@ -2736,6 +2737,54 @@ bool DateV2Value::to_format_string(const char* format, int len, char* to) con // Skip '%' ptr++; switch (ch = *ptr++) { + case 'y': + // Year, numeric (two digits) + to = write_two_digits_to_string(this->year() % 100, to); + cursor += 2; + pos = cursor; + break; + case 'Y': + // Year, numeric, four digits + to = write_four_digits_to_string(this->year(), to); + cursor += 4; + pos = cursor; + break; + case 'd': + // Day of month (00...31) + to = write_two_digits_to_string(this->day(), to); + cursor += 2; + pos = cursor; + break; + case 'H': + to = write_two_digits_to_string(this->hour(), to); + cursor += 2; + pos = cursor; + break; + case 'i': + // Minutes, numeric (00..59) + to = write_two_digits_to_string(this->minute(), to); + cursor += 2; + pos = cursor; + break; + case 'm': + to = write_two_digits_to_string(this->month(), to); + cursor += 2; + pos = cursor; + break; + case 'h': + case 'I': + // Hour (01..12) + to = write_two_digits_to_string((this->hour() % 24 + 11) % 12 + 1, to); + cursor += 2; + pos = cursor; + break; + case 's': + case 'S': + // Seconds (00..59) + to = write_two_digits_to_string(this->second(), to); + cursor += 2; + pos = cursor; + break; case 'a': // Abbreviated weekday name if (this->year() == 0 && this->month() == 0) { @@ -2752,18 +2801,13 @@ bool DateV2Value::to_format_string(const char* format, int len, char* to) con break; case 'c': // Month, numeric (0...12) - pos = int_to_str(this->month(), buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); - break; - case 'd': - // Day of month (00...31) - pos = int_to_str(this->day(), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + pos = int_to_str(this->month(), cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'D': // Day of the month with English suffix (0th, 1st, ...) - pos = int_to_str(this->day(), buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); + pos = int_to_str(this->day(), cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); if (this->day() >= 10 && this->day() <= 19) { to = append_string("th", to); } else { @@ -2785,49 +2829,28 @@ bool DateV2Value::to_format_string(const char* format, int len, char* to) con break; case 'e': // Day of the month, numeric (0..31) - pos = int_to_str(this->day(), buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); + pos = int_to_str(this->day(), cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'f': // Microseconds (000000..999999) - pos = int_to_str(this->microsecond(), buf); - to = append_with_prefix(buf, pos - buf, '0', 6, to); - break; - case 'h': - case 'I': - // Hour (01..12) - int_to_str((this->hour() % 24 + 11) % 12 + 1, buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); - break; - case 'H': - // Hour (00..23) - pos = int_to_str(this->hour(), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); - break; - case 'i': - // Minutes, numeric (00..59) - pos = int_to_str(this->minute(), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + pos = int_to_str(this->microsecond(), cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 6, to); break; case 'j': // Day of year (001..366) - pos = int_to_str(daynr() - doris::calc_daynr(this->year(), 1, 1) + 1, buf); - to = append_with_prefix(buf, pos - buf, '0', 3, to); + pos = int_to_str(daynr() - doris::calc_daynr(this->year(), 1, 1) + 1, cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 3, to); break; case 'k': // Hour (0..23) - pos = int_to_str(this->hour(), buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); + pos = int_to_str(this->hour(), cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'l': // Hour (1..12) - pos = int_to_str((this->hour() % 12) + 1, buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); - break; - case 'm': - // Month, numeric (00..12) - pos = int_to_str(this->month(), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + pos = int_to_str((this->hour() % 24 + 11) % 12 + 1, cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'M': // Month name (January..December) @@ -2838,7 +2861,11 @@ bool DateV2Value::to_format_string(const char* format, int len, char* to) con break; case 'p': // AM or PM - to = append_string("AM", to); + if ((this->hour() % 24) >= 12) { + to = append_string("PM", to); + } else { + to = append_string("AM", to); + } break; case 'r': { // Time, 12-hour (hh:mm:ss followed by AM or PM) @@ -2859,12 +2886,6 @@ bool DateV2Value::to_format_string(const char* format, int len, char* to) con } break; } - case 's': - case 'S': - // Seconds (00..59) - pos = int_to_str(this->second(), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); - break; case 'T': { // Time, 24-hour (hh:mm:ss) *to++ = (char)('0' + ((this->hour() % 24) / 10)); @@ -2882,34 +2903,38 @@ bool DateV2Value::to_format_string(const char* format, int len, char* to) con case 'u': // Week (00..53), where Monday is the first day of the week; // WEEK() mode 1 - pos = int_to_str(week(mysql_week_mode(1)), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + to = write_two_digits_to_string(week(mysql_week_mode(1)), to); + cursor += 2; + pos = cursor; break; case 'U': // Week (00..53), where Sunday is the first day of the week; // WEEK() mode 0 - pos = int_to_str(week(mysql_week_mode(0)), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + to = write_two_digits_to_string(week(mysql_week_mode(0)), to); + cursor += 2; + pos = cursor; break; case 'v': // Week (01..53), where Monday is the first day of the week; // WEEK() mode 3; used with %x - pos = int_to_str(week(mysql_week_mode(3)), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + to = write_two_digits_to_string(week(mysql_week_mode(3)), to); + cursor += 2; + pos = cursor; break; case 'V': // Week (01..53), where Sunday is the first day of the week; // WEEK() mode 2; used with %X - pos = int_to_str(week(mysql_week_mode(2)), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + to = write_two_digits_to_string(week(mysql_week_mode(2)), to); + cursor += 2; + pos = cursor; break; case 'w': // Day of the week (0=Sunday..6=Saturday) if (this->month() == 0 && this->year() == 0) { return false; } - pos = int_to_str(doris::calc_weekday(daynr(), true), buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); + pos = int_to_str(doris::calc_weekday(daynr(), true), cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'W': // Weekday name (Sunday..Saturday) @@ -2921,8 +2946,9 @@ bool DateV2Value::to_format_string(const char* format, int len, char* to) con uint16_t year = 0; calc_week(this->daynr(), this->year(), this->month(), this->day(), mysql_week_mode(3), &year, true); - pos = int_to_str(year, buf); - to = append_with_prefix(buf, pos - buf, '0', 4, to); + to = write_four_digits_to_string(year, to); + cursor += 4; + pos = cursor; break; } case 'X': { @@ -2931,20 +2957,11 @@ bool DateV2Value::to_format_string(const char* format, int len, char* to) con uint16_t year = 0; calc_week(this->daynr(), this->year(), this->month(), this->day(), mysql_week_mode(2), &year); - pos = int_to_str(year, buf); - to = append_with_prefix(buf, pos - buf, '0', 4, to); + to = write_four_digits_to_string(year, to); + cursor += 4; + pos = cursor; break; } - case 'y': - // Year, numeric (two digits) - pos = int_to_str(this->year() % 100, buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); - break; - case 'Y': - // Year, numeric, four digits - pos = int_to_str(this->year(), buf); - to = append_with_prefix(buf, pos - buf, '0', 4, to); - break; default: *to++ = ch; break; From d11e2ed964c17bb6d010e9a1132858cf85a76313 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Wed, 21 Sep 2022 19:20:33 +0800 Subject: [PATCH 3/3] update --- be/src/vec/runtime/vdatetime_value.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index 87f0eddacbe2e6..1696e57d89de6d 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -611,6 +611,9 @@ char* write_four_digits_to_string(int number, char* dst) { } bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) const { + if (check_range(_year, _month, _day, _hour, _minute, _second, _type)) { + return false; + } char buf[64]; char* cursor = buf; char* pos = NULL; @@ -1050,7 +1053,7 @@ static bool str_to_int64(const char* ptr, const char** endptr, int64_t* ret) { return false; } // Skip '0' - while (ptr < end && * ptr == '0') { + while (ptr < end && *ptr == '0') { ptr++; } const char* n_end = ptr + 9; @@ -1091,7 +1094,7 @@ static int find_in_lib(const char* lib[], const char* str, const char* end) { for (; lib[pos] != NULL; ++pos) { const char* i = str; const char* j = lib[pos]; - while (i < end && * j) { + while (i < end && *j) { if (toupper(*i) != toupper(*j)) { break; } @@ -2722,6 +2725,9 @@ void DateV2Value::set_microsecond(uint32_t microsecond) { template bool DateV2Value::to_format_string(const char* format, int len, char* to) const { + if (is_invalid(year(), month(), day(), hour(), minute(), second(), microsecond())) { + return false; + } char buf[64]; char* pos = nullptr; char* cursor = buf;