Skip to content

Commit

Permalink
fix: fix a bug in offset calculation for unions (#863) (#871)
Browse files Browse the repository at this point in the history
The `value_offset` function only read the least significant byte in the
offset array, causing issues with unions with more than 255 rows of any
given variant. Fix the issue by reading the entire i32 offset and add a
unit test.

Co-authored-by: Helgi Kristvin Sigurbjarnarson <helgikrs@gmail.com>
  • Loading branch information
alamb and helgikrs committed Oct 27, 2021
1 parent 8bfff79 commit 01743f3
Showing 1 changed file with 45 additions and 2 deletions.
47 changes: 45 additions & 2 deletions arrow/src/array/array_union.rs
Expand Up @@ -24,7 +24,6 @@ use crate::error::{ArrowError, Result};

use core::fmt;
use std::any::Any;
use std::mem::size_of;

/// An Array that can represent slots of varying types.
///
Expand Down Expand Up @@ -177,7 +176,9 @@ impl UnionArray {
Some(b) => b.count_set_bits_offset(0, index),
None => index,
};
self.data().buffers()[1].as_slice()[valid_slots * size_of::<i32>()] as i32
// safety: reinterpreting is safe since the offset buffer contains `i32` values and is
// properly aligned.
unsafe { self.data().buffers()[1].typed_data::<i32>()[valid_slots] }
} else {
index as i32
}
Expand Down Expand Up @@ -334,6 +335,48 @@ mod tests {
}
}

#[test]
fn test_dense_i32_large() {
let mut builder = UnionBuilder::new_dense(1024);

let expected_type_ids = vec![0_i8; 1024];
let expected_value_offsets: Vec<_> = (0..1024).collect();
let expected_array_values: Vec<_> = (1..=1024).collect();

expected_array_values
.iter()
.for_each(|v| builder.append::<Int32Type>("a", *v).unwrap());

let union = builder.build().unwrap();

// Check type ids
assert_eq!(
union.data().buffers()[0],
Buffer::from_slice_ref(&expected_type_ids)
);
for (i, id) in expected_type_ids.iter().enumerate() {
assert_eq!(id, &union.type_id(i));
}

// Check offsets
assert_eq!(
union.data().buffers()[1],
Buffer::from_slice_ref(&expected_value_offsets)
);
for (i, id) in expected_value_offsets.iter().enumerate() {
assert_eq!(&union.value_offset(i), id);
}

for (i, expected_value) in expected_array_values.iter().enumerate() {
assert!(!union.is_null(i));
let slot = union.value(i);
let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(expected_value, &value);
}
}

#[test]
fn test_dense_mixed() {
let mut builder = UnionBuilder::new_dense(7);
Expand Down

0 comments on commit 01743f3

Please sign in to comment.