Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Flip interval field ordering (#5654) #5656

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 4 additions & 2 deletions arrow-arith/src/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1346,8 +1346,10 @@ mod tests {
IntervalMonthDayNanoType::make_value(35, -19, 41899000000000000)
])
);
let a = IntervalMonthDayNanoArray::from(vec![i64::MAX as i128]);
let b = IntervalMonthDayNanoArray::from(vec![1]);
let max_nanos = IntervalMonthDayNanoType::make_value(0, 0, i64::MAX);
let a = IntervalMonthDayNanoArray::from(vec![max_nanos]);
let one_nanos = IntervalMonthDayNanoType::make_value(0, 0, 1);
let b = IntervalMonthDayNanoArray::from(vec![one_nanos]);
let err = add(&a, &b).unwrap_err().to_string();
assert_eq!(
err,
Expand Down
55 changes: 29 additions & 26 deletions arrow-array/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,11 +264,11 @@ Each field is independent (e.g. there is no constraint that the quantity of
nanoseconds represents less than a day's worth of time).

```text
┌──────────────────────────────┬───────────────────────────┐
Nanos Days Months
(64 bits) │ (32 bits) │ (32 bits)
└──────────────────────────────┴───────────────────────────┘
0 63 95 127 bit offset
┌────────────────────────────┬─────────────────────────────┐
Months Days Nanos
(32 bits) │ (32 bits) (64 bits)
└────────────────────────────┴─────────────────────────────┘
0 32 64 128 bit offset
```
Please see the [Arrow Spec](https://github.com/apache/arrow/blob/081b4022fe6f659d8765efc82b3f4787c5039e3c/format/Schema.fbs#L409-L415) for more details

Expand All @@ -290,9 +290,9 @@ representation which is fast and efficient, but leads
to potentially surprising results.

For example a
`IntervalMonthDayNano` of `1 month` will compare as **greater** than a
`IntervalMonthDayNano` of `100 days` because the binary representation of `1 month`
is larger than the binary representation of 100 days.
`IntervalMonthDayNano` of `1 month` will compare as **less** than a
`IntervalMonthDayNano` of `1 days` because the binary representation of `1 month`
is smaller than the binary representation of 1 days.
"#
);
make_type!(
Expand Down Expand Up @@ -928,13 +928,14 @@ impl IntervalDayTimeType {
int32_t milliseconds = 0;
...
}
64 56 48 40 32 24 16 8 0
+-------+-------+-------+-------+-------+-------+-------+-------+
| days | milliseconds |
+-------+-------+-------+-------+-------+-------+-------+-------+
┌──────────────┬──────────────┐
│ Days │ Milliseconds │
│ (32 bits) │ (32 bits) │
└──────────────┴──────────────┘
0 31 63 bit offset
*/
let m = millis as u64 & u32::MAX as u64;
let d = (days as u64 & u32::MAX as u64) << 32;
let m = (millis as u64 & u32::MAX as u64) << 32;
let d = days as u64 & u32::MAX as u64;
(m | d) as <IntervalDayTimeType as ArrowPrimitiveType>::Native
}

Expand All @@ -945,8 +946,8 @@ impl IntervalDayTimeType {
/// * `i` - The IntervalDayTimeType to convert
#[inline]
pub fn to_parts(i: <IntervalDayTimeType as ArrowPrimitiveType>::Native) -> (i32, i32) {
let days = (i >> 32) as i32;
let ms = i as i32;
let days = i as i32;
let ms = (i >> 32) as i32;
(days, ms)
}
}
Expand All @@ -972,14 +973,16 @@ impl IntervalMonthDayNanoType {
int32_t days;
int64_t nanoseconds;
}
128 112 96 80 64 48 32 16 0
+-------+-------+-------+-------+-------+-------+-------+-------+
| months | days | nanos |
+-------+-------+-------+-------+-------+-------+-------+-------+
┌───────────────┬─────────────┬─────────────────────────────┐
│ Months │ Days │ Nanos │
│ (32 bits) │ (32 bits) │ (64 bits) │
└───────────────┴─────────────┴─────────────────────────────┘
0 32 64 128 bit offset

*/
let m = (months as u128 & u32::MAX as u128) << 96;
let d = (days as u128 & u32::MAX as u128) << 64;
let n = nanos as u128 & u64::MAX as u128;
let m = months as u128 & u32::MAX as u128;
let d = (days as u128 & u32::MAX as u128) << 32;
let n = (nanos as u128 & u64::MAX as u128) << 64;
(m | d | n) as <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native
}

Expand All @@ -992,9 +995,9 @@ impl IntervalMonthDayNanoType {
pub fn to_parts(
i: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
) -> (i32, i32, i64) {
let months = (i >> 96) as i32;
let days = (i >> 64) as i32;
let nanos = i as i64;
let months = i as i32;
let days = (i >> 32) as i32;
let nanos = (i >> 64) as i64;
(months, days, nanos)
}
}
Expand Down
9 changes: 4 additions & 5 deletions arrow-ord/src/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1714,18 +1714,17 @@ mod tests {
IntervalDayTimeType::make_value(1, 3000),
// 90M milliseconds
IntervalDayTimeType::make_value(0, 90_000_000),
IntervalDayTimeType::make_value(4, 10),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The interval ordering is now even more arbitrary 😅

I suspect this is the most user-visible change, as I suspect not many people are manually bit-twiddling

],
vec![
IntervalDayTimeType::make_value(0, 1000),
IntervalDayTimeType::make_value(1, 0),
IntervalDayTimeType::make_value(10, 0),
IntervalDayTimeType::make_value(2, 1),
// NB even though 1 day is less than 90M milliseconds long,
// it compares as greater because the underlying type stores
// days and milliseconds as different fields
IntervalDayTimeType::make_value(0, 12),
IntervalDayTimeType::make_value(56, 10),
],
vec![false, true, true, true ,false]
vec![true, false, false, false, false, true]
);

cmp_vec!(
Expand Down Expand Up @@ -1771,7 +1770,7 @@ mod tests {
// 100 days (note is treated as greater than 1 month as the underlying integer representation)
IntervalMonthDayNanoType::make_value(0, 100, 0),
],
vec![false, false, true, false, false]
vec![false, true, true, true, true]
);
}

Expand Down
28 changes: 12 additions & 16 deletions arrow-ord/src/ord.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,14 +227,12 @@ pub mod tests {

let cmp = build_compare(&array, &array).unwrap();

assert_eq!(Ordering::Less, cmp(0, 1));
assert_eq!(Ordering::Greater, cmp(1, 0));

// somewhat confusingly, while 90M milliseconds is more than 1 day,
// it will compare less as the comparison is done on the underlying
// values not field by field
assert_eq!(Ordering::Greater, cmp(1, 2));
assert_eq!(Ordering::Less, cmp(2, 1));
// Ordering is based on the underlying values
// leading to potentially surprising results
assert_eq!(Ordering::Greater, cmp(0, 1));
assert_eq!(Ordering::Less, cmp(1, 0));
assert_eq!(Ordering::Less, cmp(1, 2));
assert_eq!(Ordering::Greater, cmp(2, 1));
}

#[test]
Expand Down Expand Up @@ -271,14 +269,12 @@ pub mod tests {

let cmp = build_compare(&array, &array).unwrap();

assert_eq!(Ordering::Less, cmp(0, 1));
assert_eq!(Ordering::Greater, cmp(1, 0));

// somewhat confusingly, while 100 days is more than 1 month in all cases
// it will compare less as the comparison is done on the underlying
// values not field by field
assert_eq!(Ordering::Greater, cmp(1, 2));
assert_eq!(Ordering::Less, cmp(2, 1));
// Ordering is based on the underlying values
// leading to potentially surprising results
assert_eq!(Ordering::Greater, cmp(0, 1));
assert_eq!(Ordering::Less, cmp(1, 0));
assert_eq!(Ordering::Less, cmp(1, 2));
assert_eq!(Ordering::Greater, cmp(2, 1));
}

#[test]
Expand Down