Skip to content

Commit

Permalink
Split out arrow-array crate (#2769)
Browse files Browse the repository at this point in the history
* Split out arrow-array

* Fix ffi compilation

* Fix data_gen

* Fix doc

* Doc tweaks

* Fix pyarrow
  • Loading branch information
tustvold committed Sep 26, 2022
1 parent 6bee576 commit 06c204c
Show file tree
Hide file tree
Showing 59 changed files with 2,119 additions and 2,324 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Expand Up @@ -18,6 +18,7 @@
[workspace]
members = [
"arrow",
"arrow-array",
"arrow-data",
"arrow-schema",
"arrow-buffer",
Expand Down
59 changes: 59 additions & 0 deletions arrow-array/Cargo.toml
@@ -0,0 +1,59 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[package]
name = "arrow-array"
version = "23.0.0"
description = "Array abstractions for Apache Arrow"
homepage = "https://github.com/apache/arrow-rs"
repository = "https://github.com/apache/arrow-rs"
authors = ["Apache Arrow <dev@arrow.apache.org>"]
license = "Apache-2.0"
keywords = ["arrow"]
include = [
"benches/*.rs",
"src/**/*.rs",
"Cargo.toml",
]
edition = "2021"
rust-version = "1.62"

[lib]
name = "arrow_array"
path = "src/lib.rs"
bench = false


[target.'cfg(target_arch = "wasm32")'.dependencies]
ahash = { version = "0.8", default-features = false, features = ["compile-time-rng"] }

[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }

[dependencies]
arrow-buffer = { version = "23.0.0", path = "../arrow-buffer" }
arrow-schema = { version = "23.0.0", path = "../arrow-schema" }
arrow-data = { version = "23.0.0", path = "../arrow-data" }
chrono = { version = "0.4", default-features = false, features = ["clock"] }
num = { version = "0.4", default-features = false, features = ["std"] }
half = { version = "2.0", default-features = false }
hashbrown = { version = "0.12", default-features = false }

[dev-dependencies]
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }

[build-dependencies]
Expand Up @@ -15,18 +15,13 @@
// specific language governing permissions and limitations
// under the License.

use std::convert::From;
use std::fmt;
use std::{any::Any, iter::FromIterator};

use super::{
array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, GenericBinaryIter,
GenericListArray, OffsetSizeTrait,
};
use crate::array::array::ArrayAccessor;
use crate::buffer::Buffer;
use crate::util::bit_util;
use crate::{buffer::MutableBuffer, datatypes::DataType};
use crate::iterator::GenericBinaryIter;
use crate::raw_pointer::RawPtrBox;
use crate::{print_long_array, Array, ArrayAccessor, GenericListArray, OffsetSizeTrait};
use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;

/// See [`BinaryArray`] and [`LargeBinaryArray`] for storing
/// binary data.
Expand Down Expand Up @@ -239,13 +234,13 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
}
}

impl<OffsetSize: OffsetSizeTrait> fmt::Debug for GenericBinaryArray<OffsetSize> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericBinaryArray<OffsetSize> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let prefix = OffsetSize::PREFIX;

write!(f, "{}BinaryArray\n[\n", prefix)?;
print_long_array(self, f, |array, index, f| {
fmt::Debug::fmt(&array.value(index), f)
std::fmt::Debug::fmt(&array.value(index), f)
})?;
write!(f, "]")
}
Expand Down Expand Up @@ -387,7 +382,7 @@ impl<'a, T: OffsetSizeTrait> IntoIterator for &'a GenericBinaryArray<T> {
/// Create a BinaryArray from a vector of byte slices.
///
/// ```
/// use arrow::array::{Array, BinaryArray};
/// use arrow_array::{Array, BinaryArray};
/// let values: Vec<&[u8]> =
/// vec![b"one", b"two", b"", b"three"];
/// let array = BinaryArray::from_vec(values);
Expand All @@ -401,7 +396,7 @@ impl<'a, T: OffsetSizeTrait> IntoIterator for &'a GenericBinaryArray<T> {
/// Create a BinaryArray from a vector of Optional (null) byte slices.
///
/// ```
/// use arrow::array::{Array, BinaryArray};
/// use arrow_array::{Array, BinaryArray};
/// let values: Vec<Option<&[u8]>> =
/// vec![Some(b"one"), Some(b"two"), None, Some(b""), Some(b"three")];
/// let array = BinaryArray::from_opt_vec(values);
Expand All @@ -427,7 +422,7 @@ pub type BinaryArray = GenericBinaryArray<i32>;
/// Create a LargeBinaryArray from a vector of byte slices.
///
/// ```
/// use arrow::array::{Array, LargeBinaryArray};
/// use arrow_array::{Array, LargeBinaryArray};
/// let values: Vec<&[u8]> =
/// vec![b"one", b"two", b"", b"three"];
/// let array = LargeBinaryArray::from_vec(values);
Expand All @@ -441,7 +436,7 @@ pub type BinaryArray = GenericBinaryArray<i32>;
/// Create a LargeBinaryArray from a vector of Optional (null) byte slices.
///
/// ```
/// use arrow::array::{Array, LargeBinaryArray};
/// use arrow_array::{Array, LargeBinaryArray};
/// let values: Vec<Option<&[u8]>> =
/// vec![Some(b"one"), Some(b"two"), None, Some(b""), Some(b"three")];
/// let array = LargeBinaryArray::from_opt_vec(values);
Expand All @@ -462,7 +457,8 @@ pub type LargeBinaryArray = GenericBinaryArray<i64>;
#[cfg(test)]
mod tests {
use super::*;
use crate::{array::ListArray, datatypes::Field};
use crate::ListArray;
use arrow_schema::Field;

#[test]
fn test_binary_array() {
Expand Down
Expand Up @@ -15,23 +15,21 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::array::ArrayAccessor;
use std::borrow::Borrow;
use std::convert::From;
use std::iter::{FromIterator, IntoIterator};
use std::{any::Any, fmt};

use super::*;
use super::{array::print_long_array, raw_pointer::RawPtrBox};
use crate::buffer::{Buffer, MutableBuffer};
use crate::util::bit_util;
use crate::builder::BooleanBuilder;
use crate::iterator::BooleanIter;
use crate::raw_pointer::RawPtrBox;
use crate::{print_long_array, Array, ArrayAccessor};
use arrow_buffer::{bit_util, Buffer, MutableBuffer};
use arrow_data::ArrayData;
use arrow_schema::DataType;
use std::any::Any;

/// Array of bools
///
/// # Example
///
/// ```
/// use arrow::array::{Array, BooleanArray};
/// use arrow_array::{Array, BooleanArray};
/// let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]);
/// assert_eq!(4, arr.len());
/// assert_eq!(1, arr.null_count());
Expand All @@ -50,7 +48,7 @@ use crate::util::bit_util;
///
/// Using `from_iter`
/// ```
/// use arrow::array::{Array, BooleanArray};
/// use arrow_array::{Array, BooleanArray};
/// let v = vec![Some(false), Some(true), Some(false), Some(true)];
/// let arr = v.into_iter().collect::<BooleanArray>();
/// assert_eq!(4, arr.len());
Expand All @@ -72,11 +70,11 @@ pub struct BooleanArray {
raw_values: RawPtrBox<u8>,
}

impl fmt::Debug for BooleanArray {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
impl std::fmt::Debug for BooleanArray {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "BooleanArray\n[\n")?;
print_long_array(self, f, |array, index, f| {
fmt::Debug::fmt(&array.value(index), f)
std::fmt::Debug::fmt(&array.value(index), f)
})?;
write!(f, "]")
}
Expand Down Expand Up @@ -238,7 +236,7 @@ impl<'a> BooleanArray {
}
}

impl<Ptr: Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray {
impl<Ptr: std::borrow::Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray {
fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
let iter = iter.into_iter();
let (_, data_len) = iter.size_hint();
Expand Down Expand Up @@ -279,9 +277,6 @@ impl<Ptr: Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray {
mod tests {
use super::*;

use crate::buffer::Buffer;
use crate::datatypes::DataType;

#[test]
fn test_boolean_fmt_debug() {
let arr = BooleanArray::from(vec![true, false, false]);
Expand Down
Expand Up @@ -15,34 +15,31 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::ArrayAccessor;
use std::convert::From;
use std::fmt;
use std::marker::PhantomData;
use std::{any::Any, iter::FromIterator};

use super::{
array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, FixedSizeListArray,
use crate::builder::BooleanBufferBuilder;
use crate::decimal::{Decimal, Decimal256};
use crate::iterator::DecimalIter;
use crate::raw_pointer::RawPtrBox;
use crate::types::{Decimal128Type, Decimal256Type, DecimalType, NativeDecimalType};
use crate::{
print_long_array, Array, ArrayAccessor, FixedSizeBinaryArray, FixedSizeListArray,
};
use super::{BooleanBufferBuilder, DecimalIter, FixedSizeBinaryArray};
#[allow(deprecated)]
use crate::buffer::{Buffer, MutableBuffer};
use crate::datatypes::validate_decimal_precision;
use crate::datatypes::{
validate_decimal256_precision_with_lt_bytes, DataType, Decimal128Type,
Decimal256Type, DecimalType, NativeDecimalType,
use arrow_buffer::{Buffer, MutableBuffer};
use arrow_data::decimal::{
validate_decimal256_precision_with_lt_bytes, validate_decimal_precision,
};
use crate::error::{ArrowError, Result};
use crate::util::decimal::{Decimal, Decimal256};
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
use std::any::Any;
use std::marker::PhantomData;

/// `Decimal128Array` stores fixed width decimal numbers,
/// with a fixed precision and scale.
///
/// # Examples
///
/// ```
/// use arrow::array::{Array, DecimalArray, Decimal128Array};
/// use arrow::datatypes::DataType;
/// use arrow_array::{Array, DecimalArray, Decimal128Array};
/// use arrow_schema::DataType;
///
/// // Create a DecimalArray with the default precision and scale
/// let decimal_array: Decimal128Array = vec![
Expand Down Expand Up @@ -268,7 +265,11 @@ impl<T: DecimalType> DecimalArray<T> {
/// 1. `precision` is larger than [`Self::MAX_PRECISION`]
/// 2. `scale` is larger than [`Self::MAX_SCALE`];
/// 3. `scale` is > `precision`
pub fn with_precision_and_scale(self, precision: u8, scale: u8) -> Result<Self>
pub fn with_precision_and_scale(
self,
precision: u8,
scale: u8,
) -> Result<Self, ArrowError>
where
Self: Sized,
{
Expand All @@ -292,7 +293,11 @@ impl<T: DecimalType> DecimalArray<T> {
}

// validate that the new precision and scale are valid or not
fn validate_precision_scale(&self, precision: u8, scale: u8) -> Result<()> {
fn validate_precision_scale(
&self,
precision: u8,
scale: u8,
) -> Result<(), ArrowError> {
if precision > Self::MAX_PRECISION {
return Err(ArrowError::InvalidArgumentError(format!(
"precision {} is greater than max {}",
Expand Down Expand Up @@ -320,7 +325,7 @@ impl<T: DecimalType> DecimalArray<T> {
}

// validate all the data in the array are valid within the new precision or not
fn validate_data(&self, precision: u8) -> Result<()> {
fn validate_data(&self, precision: u8) -> Result<(), ArrowError> {
// TODO: Move into DecimalType
match Self::VALUE_LENGTH {
16 => self
Expand Down Expand Up @@ -361,7 +366,7 @@ impl Decimal128Array {

// Validates decimal128 values in this array can be properly interpreted
// with the specified precision.
fn validate_decimal_precision(&self, precision: u8) -> Result<()> {
fn validate_decimal_precision(&self, precision: u8) -> Result<(), ArrowError> {
(0..self.len()).try_for_each(|idx| {
if self.is_valid(idx) {
let decimal = unsafe { self.value_unchecked(idx) };
Expand All @@ -376,7 +381,7 @@ impl Decimal128Array {
impl Decimal256Array {
// Validates decimal256 values in this array can be properly interpreted
// with the specified precision.
fn validate_decimal_precision(&self, precision: u8) -> Result<()> {
fn validate_decimal_precision(&self, precision: u8) -> Result<(), ArrowError> {
(0..self.len()).try_for_each(|idx| {
if self.is_valid(idx) {
let raw_val = unsafe {
Expand Down Expand Up @@ -504,8 +509,8 @@ impl<T: DecimalType> From<DecimalArray<T>> for ArrayData {
}
}

impl<T: DecimalType> fmt::Debug for DecimalArray<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
impl<T: DecimalType> std::fmt::Debug for DecimalArray<T> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
f,
"Decimal{}Array<{}, {}>\n[\n",
Expand Down Expand Up @@ -552,13 +557,12 @@ impl<'a, T: DecimalType> DecimalArray<T> {

#[cfg(test)]
mod tests {
use crate::array::Decimal256Builder;
use crate::datatypes::{DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE};
use crate::util::decimal::Decimal128;
use crate::{array::Decimal128Builder, datatypes::Field};
use num::{BigInt, Num};

use super::*;
use crate::builder::{Decimal128Builder, Decimal256Builder};
use crate::decimal::Decimal128;
use arrow_data::decimal::{DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE};
use arrow_schema::Field;
use num::{BigInt, Num};

#[test]
fn test_decimal_array() {
Expand Down

0 comments on commit 06c204c

Please sign in to comment.