Skip to content

Commit

Permalink
Fix for DX-86729 & DX-90059 (partial testing)
Browse files Browse the repository at this point in the history
DX-86729 (Windows-only) fix applied to all platforms for behaviour consistency

Merged in initial failing milliseconds test cases; should pass on the fix branch.

+Handle inconsistent implementation-defined negative mod

+Fixed negative epoch time division; should break broken tests now

+Probably actually fix negative timestamp division rounding issue

+Fix rest(?) of timestamp test suite

+Fix structure member names in test code

+Missing comma, more cases, caveat comment

+Remove deprecated/now-incorrect constant

+Eliminate case of overcorrection for ceiling division; add comments

+Whoops also don't adjust month anymore

+Add missing expected value & fix divisor unit typos

+Add DATE pre-epoch tests & hardcode expected values

Change-Id: I16e4fe9befbff2327608bca92216a670492c8e44
  • Loading branch information
Paul Nienaber committed Apr 24, 2024
1 parent 156ac3d commit 3f1b8e9
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 98 deletions.
50 changes: 34 additions & 16 deletions flight_sql/accessors/date_array_accessor_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,23 @@ using namespace arrow;
using namespace odbcabstraction;

TEST(DateArrayAccessor, Test_Date32Array_CDataType_DATE) {
std::vector<int32_t> values = {7589, 12320, 18980, 19095};
std::vector<int32_t> values = {7589, 12320, 18980, 19095, -1, 0};
std::vector<DATE_STRUCT> expected = {
{1990, 10, 12},
{2003, 9, 25},
{2021, 12, 19},
{2022, 4, 13},
{1969, 12, 31},
{1970, 1, 1},
};

std::shared_ptr<Array> array;
ArrayFromVector<Date32Type, int32_t>(values, &array);

DateArrayFlightSqlAccessor<CDataType_DATE, Date32Array> accessor(
dynamic_cast<NumericArray<Date32Type> *>(array.get()));

std::vector<tagDATE_STRUCT> buffer(values.size());
std::vector<DATE_STRUCT> buffer(values.size());
std::vector<ssize_t> strlen_buffer(values.size());

ColumnBinding binding(CDataType_DATE, 0, 0, buffer.data(), 0, strlen_buffer.data());
Expand All @@ -37,27 +45,39 @@ TEST(DateArrayAccessor, Test_Date32Array_CDataType_DATE) {

for (size_t i = 0; i < values.size(); ++i) {
ASSERT_EQ(sizeof(DATE_STRUCT), strlen_buffer[i]);
tm date{};

int64_t converted_time = values[i] * 86400;
GetTimeForSecondsSinceEpoch(date, converted_time);
ASSERT_EQ((date.tm_year + 1900), buffer[i].year);
ASSERT_EQ(date.tm_mon + 1, buffer[i].month);
ASSERT_EQ(date.tm_mday, buffer[i].day);
ASSERT_EQ(expected[i].year, buffer[i].year);
ASSERT_EQ(expected[i].month, buffer[i].month);
ASSERT_EQ(expected[i].day, buffer[i].day);
}
}

TEST(DateArrayAccessor, Test_Date64Array_CDataType_DATE) {
std::vector<int64_t> values = {86400000, 172800000, 259200000, 1649793238110,
345600000, 432000000, 518400000};
std::vector<int64_t> values = {86400000, 172800000, 259200000, 1649793238110, 0,
345600000, 432000000, 518400000, -86400000, -17987443200000};
std::vector<DATE_STRUCT> expected = {
/* year(16), month(u16), day(u16) */
{1970, 1, 2},
{1970, 1, 3},
{1970, 1, 4},
{2022, 4, 12},
{1970, 1, 1},
{1970, 1, 5},
{1970, 1, 6},
{1970, 1, 7},
{1969, 12, 31},
// This is the documented lower limit of supported Gregorian dates for some parts of Boost,
// however boost::posix_time may go lower?
{1400, 1, 1},
};

std::shared_ptr<Array> array;
ArrayFromVector<Date64Type, int64_t>(values, &array);

DateArrayFlightSqlAccessor<CDataType_DATE, Date64Array> accessor(
dynamic_cast<NumericArray<Date64Type> *>(array.get()));

std::vector<tagDATE_STRUCT> buffer(values.size());
std::vector<DATE_STRUCT> buffer(values.size());
std::vector<ssize_t> strlen_buffer(values.size());

ColumnBinding binding(CDataType_DATE, 0, 0, buffer.data(), 0, strlen_buffer.data());
Expand All @@ -71,11 +91,9 @@ TEST(DateArrayAccessor, Test_Date64Array_CDataType_DATE) {
ASSERT_EQ(sizeof(DATE_STRUCT), strlen_buffer[i]);
tm date{};

int64_t converted_time = values[i] / 1000;
GetTimeForSecondsSinceEpoch(date, converted_time);
ASSERT_EQ((date.tm_year + 1900), buffer[i].year);
ASSERT_EQ(date.tm_mon + 1, buffer[i].month);
ASSERT_EQ(date.tm_mday, buffer[i].day);
ASSERT_EQ(expected[i].year, buffer[i].year);
ASSERT_EQ(expected[i].month, buffer[i].month);
ASSERT_EQ(expected[i].day, buffer[i].day);
}
}

Expand Down
48 changes: 28 additions & 20 deletions flight_sql/accessors/timestamp_array_accessor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
#include "timestamp_array_accessor.h"
#include "odbcabstraction/calendar_utils.h"

#include <cmath>

using namespace arrow;

namespace {
int64_t GetConversionToSecondsDivisor(TimeUnit::type unit) {
inline int64_t GetConversionToSecondsDivisor(TimeUnit::type unit) {
int64_t divisor = 1;
switch (unit) {
case TimeUnit::SECOND:
Expand All @@ -32,27 +34,21 @@ int64_t GetConversionToSecondsDivisor(TimeUnit::type unit) {
return divisor;
}

uint32_t CalculateFraction(TimeUnit::type unit, uint64_t units_since_epoch) {
uint32_t CalculateFraction(TimeUnit::type unit, int64_t units_since_epoch) {
// Convert the given remainder and time unit to nanoseconds
// since the fraction field on TIMESTAMP_STRUCT is in nanoseconds.
switch (unit) {
case TimeUnit::SECOND:
if (unit == TimeUnit::SECOND)
return 0;
case TimeUnit::MILLI:
// 1000000 nanoseconds = 1 millisecond.
return (units_since_epoch %
driver::odbcabstraction::MILLI_TO_SECONDS_DIVISOR) *
1000000;
case TimeUnit::MICRO:
// 1000 nanoseconds = 1 microsecond.
return (units_since_epoch %
driver::odbcabstraction::MICRO_TO_SECONDS_DIVISOR) * 1000;
case TimeUnit::NANO:
// 1000 nanoseconds = 1 microsecond.
return (units_since_epoch %
driver::odbcabstraction::NANO_TO_SECONDS_DIVISOR);
}
return 0;

const int64_t divisor = GetConversionToSecondsDivisor(unit);
const int64_t nano_divisor = GetConversionToSecondsDivisor(TimeUnit::NANO);

if (units_since_epoch < 0)
// See below regarding floor division; here we want ceiling division.
// FIXME this goes poorly (trying to use a value > INT64_MAX when units_since_epoch is
// less than the smallest multiple of divisor greater than INT64_MIN.
units_since_epoch += divisor * std::abs((units_since_epoch - (divisor - 1)) / divisor);
return (units_since_epoch % divisor) * (nano_divisor / divisor);
}
} // namespace

Expand All @@ -72,11 +68,23 @@ TimestampArrayFlightSqlAccessor<TARGET_TYPE, UNIT>::MoveSingleCell_impl(
ColumnBinding *binding, int64_t arrow_row, int64_t cell_counter,
int64_t &value_offset, bool update_value_offset,
odbcabstraction::Diagnostics &diagnostics) {
// Times less than the minimum integer number of seconds that can be represented
// for each time unit will not convert correctly. This is mostly interesting for
// nanoseconds as timestamps in other units are outside of the accepted range of
// Gregorian dates.
auto *buffer = static_cast<TIMESTAMP_STRUCT *>(binding->buffer);

int64_t value = this->GetArray()->Value(arrow_row);
const auto divisor = GetConversionToSecondsDivisor(UNIT);
const auto converted_result_seconds = value / divisor;
const auto converted_result_seconds =
// We want floor division here; C++ will round towards zero
(value < 0)
// Shift all "fractional" values greater than a negative int to <= that int, so rounding
// up (towards zero) results in rounding down of the original value; keep int values
// in the same rounding (towards zero) range.
? ((value - (divisor - 1)) / divisor)
// Towards zero is already floor
: value / divisor;
tm timestamp = {0};

GetTimeForSecondsSinceEpoch(timestamp, converted_result_seconds);
Expand Down
139 changes: 84 additions & 55 deletions flight_sql/accessors/timestamp_array_accessor_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,27 @@ using namespace arrow;
using namespace odbcabstraction;

TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MILLI) {
std::vector<int64_t> values = {86400370, 172800000, 259200000, 1649793238110LL,
345600000, 432000000, 518400000};

std::vector<int64_t> values = {86400370, 172800000, 259200000, 1649793238110LL, 345600000,
432000000, 518400000, -86399000, 0, -86399999, -86399001,
86400001, 86400999};
std::vector<TIMESTAMP_STRUCT> expected = {
/* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */
{1970, 1, 2, 0, 0, 0, 370000000},
{1970, 1, 3, 0, 0, 0, 0},
{1970, 1, 4, 0, 0, 0, 0},
{2022, 4, 12, 19, 53, 58, 110000000},
{1970, 1, 5, 0, 0, 0, 0},
{1970, 1, 6, 0, 0, 0, 0},
{1970, 1, 7, 0, 0, 0, 0},
{1969, 12, 31, 0, 0, 1, 0},
{1970, 1, 1, 0, 0, 0, 0},
/* Tests both ends of the fraction rounding range to ensure we don't tip the wrong way */
{1969, 12, 31, 0, 0, 0, 1000000},
{1969, 12, 31, 0, 0, 0, 999000000},
{1970, 1, 2, 0, 0, 0, 1000000},
{1970, 1, 2, 0, 0, 0, 999000000},
};

std::shared_ptr<Array> timestamp_array;

auto timestamp_field = field("timestamp_field", timestamp(TimeUnit::MILLI));
Expand All @@ -40,26 +58,31 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MILLI) {
for (size_t i = 0; i < values.size(); ++i) {
ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]);

tm date{};

auto converted_time = values[i] / MILLI_TO_SECONDS_DIVISOR;
GetTimeForSecondsSinceEpoch(date, converted_time);

ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
ASSERT_EQ(buffer[i].day, date.tm_mday);
ASSERT_EQ(buffer[i].hour, date.tm_hour);
ASSERT_EQ(buffer[i].minute, date.tm_min);
ASSERT_EQ(buffer[i].second, date.tm_sec);

constexpr uint32_t NANOSECONDS_PER_MILLI = 1000000;
ASSERT_EQ(buffer[i].fraction, (values[i] % MILLI_TO_SECONDS_DIVISOR) * NANOSECONDS_PER_MILLI);
ASSERT_EQ(buffer[i].year, expected[i].year);
ASSERT_EQ(buffer[i].month, expected[i].month);
ASSERT_EQ(buffer[i].day, expected[i].day);
ASSERT_EQ(buffer[i].hour, expected[i].hour);
ASSERT_EQ(buffer[i].minute, expected[i].minute);
ASSERT_EQ(buffer[i].second, expected[i].second);
ASSERT_EQ(buffer[i].fraction, expected[i].fraction);
}
}

TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_SECONDS) {
std::vector<int64_t> values = {86400, 172800, 259200, 1649793238,
345600, 432000, 518400};
345600, 432000, 518400, -86399, 0};
std::vector<TIMESTAMP_STRUCT> expected = {
/* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */
{1970, 1, 2, 0, 0, 0, 0},
{1970, 1, 3, 0, 0, 0, 0},
{1970, 1, 4, 0, 0, 0, 0},
{2022, 4, 12, 19, 53, 58, 0},
{1970, 1, 5, 0, 0, 0, 0},
{1970, 1, 6, 0, 0, 0, 0},
{1970, 1, 7, 0, 0, 0, 0},
{1969, 12, 31, 0, 0, 1, 0},
{1970, 1, 1, 0, 0, 0, 0},
};

std::shared_ptr<Array> timestamp_array;

Expand All @@ -81,23 +104,27 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_SECONDS) {

for (size_t i = 0; i < values.size(); ++i) {
ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]);
tm date{};

auto converted_time = values[i];
GetTimeForSecondsSinceEpoch(date, converted_time);

ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
ASSERT_EQ(buffer[i].day, date.tm_mday);
ASSERT_EQ(buffer[i].hour, date.tm_hour);
ASSERT_EQ(buffer[i].minute, date.tm_min);
ASSERT_EQ(buffer[i].second, date.tm_sec);
ASSERT_EQ(buffer[i].year, expected[i].year);
ASSERT_EQ(buffer[i].month, expected[i].month);
ASSERT_EQ(buffer[i].day, expected[i].day);
ASSERT_EQ(buffer[i].hour, expected[i].hour);
ASSERT_EQ(buffer[i].minute, expected[i].minute);
ASSERT_EQ(buffer[i].second, expected[i].second);
ASSERT_EQ(buffer[i].fraction, 0);
}
}

TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MICRO) {
std::vector<int64_t> values = {86400000000, 1649793238000000};
std::vector<int64_t> values = {0, 86400000000, 1649793238000000, -86399999999, -86399000001};
std::vector<TIMESTAMP_STRUCT> expected = {
/* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */
{1970, 1, 1, 0, 0, 0, 0},
{1970, 1, 2, 0, 0, 0, 0},
{2022, 4, 12, 19, 53, 58, 0},
{1969, 12, 31, 0, 0, 0, 1000},
{1969, 12, 31, 0, 0, 0, 999999000},
};

std::shared_ptr<Array> timestamp_array;

Expand All @@ -120,24 +147,29 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MICRO) {
for (size_t i = 0; i < values.size(); ++i) {
ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]);

tm date{};

auto converted_time = values[i] / MICRO_TO_SECONDS_DIVISOR;
GetTimeForSecondsSinceEpoch(date, converted_time);

ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
ASSERT_EQ(buffer[i].day, date.tm_mday);
ASSERT_EQ(buffer[i].hour, date.tm_hour);
ASSERT_EQ(buffer[i].minute, date.tm_min);
ASSERT_EQ(buffer[i].second, date.tm_sec);
constexpr uint32_t MICROS_PER_NANO = 1000;
ASSERT_EQ(buffer[i].fraction, (values[i] % MICRO_TO_SECONDS_DIVISOR) * MICROS_PER_NANO);
ASSERT_EQ(buffer[i].year, expected[i].year);
ASSERT_EQ(buffer[i].month, expected[i].month);
ASSERT_EQ(buffer[i].day, expected[i].day);
ASSERT_EQ(buffer[i].hour, expected[i].hour);
ASSERT_EQ(buffer[i].minute, expected[i].minute);
ASSERT_EQ(buffer[i].second, expected[i].second);
ASSERT_EQ(buffer[i].fraction, expected[i].fraction);
}
}

TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_NANO) {
std::vector<int64_t> values = {86400000010000, 1649793238000000000};
std::vector<int64_t> values = {86400000010000, 1649793238000000000, -86399999999999, -86399000000001,
86400000000001, 86400999999999, 0};
std::vector<TIMESTAMP_STRUCT> expected = {
/* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */
{1970, 1, 2, 0, 0, 0, 10000},
{2022, 4, 12, 19, 53, 58, 0},
{1969, 12, 31, 0, 0, 0, 1},
{1969, 12, 31, 0, 0, 0, 999999999},
{1970, 1, 2, 0, 0, 0, 1},
{1970, 1, 2, 0, 0, 0, 999999999},
{1970, 1, 1, 0, 0, 0, 0},
};

std::shared_ptr<Array> timestamp_array;

Expand All @@ -159,19 +191,16 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_NANO) {

for (size_t i = 0; i < values.size(); ++i) {
ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]);
tm date{};

auto converted_time = values[i] / NANO_TO_SECONDS_DIVISOR;
GetTimeForSecondsSinceEpoch(date, converted_time);

ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
ASSERT_EQ(buffer[i].day, date.tm_mday);
ASSERT_EQ(buffer[i].hour, date.tm_hour);
ASSERT_EQ(buffer[i].minute, date.tm_min);
ASSERT_EQ(buffer[i].second, date.tm_sec);
ASSERT_EQ(buffer[i].fraction, (values[i] % NANO_TO_SECONDS_DIVISOR));

ASSERT_EQ(buffer[i].year, expected[i].year);
ASSERT_EQ(buffer[i].month, expected[i].month);
ASSERT_EQ(buffer[i].day, expected[i].day);
ASSERT_EQ(buffer[i].hour, expected[i].hour);
ASSERT_EQ(buffer[i].minute, expected[i].minute);
ASSERT_EQ(buffer[i].second, expected[i].second);
ASSERT_EQ(buffer[i].fraction, expected[i].fraction);
}
}

} // namespace flight_sql
} // namespace driver
10 changes: 3 additions & 7 deletions odbcabstraction/calendar_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "odbcabstraction/calendar_utils.h"

#include <boost/date_time/posix_time/posix_time.hpp>
#include <cstdint>
#include <ctime>

Expand All @@ -29,12 +30,7 @@ int64_t GetTodayTimeFromEpoch() {
}

void GetTimeForSecondsSinceEpoch(tm& date, int64_t value) {
#if defined(_WIN32)
gmtime_s(&date, &value);
#else
time_t time_value = static_cast<time_t>(value);
gmtime_r(&time_value, &date);
#endif
}
date = boost::posix_time::to_tm(boost::posix_time::from_time_t(value));
}
} // namespace odbcabstraction
} // namespace driver

0 comments on commit 3f1b8e9

Please sign in to comment.