diff --git a/ci/rust-build-main.bat b/ci/rust-build-main.bat index e7f3c32a549..97ed1d512c6 100644 --- a/ci/rust-build-main.bat +++ b/ci/rust-build-main.bat @@ -31,6 +31,12 @@ rustup default nightly rustup show cargo build --target %TARGET% --all-targets --release || exit /B @echo +@echo Build with no default features +@echo ------------------------------ +pushd arrow +cargo build --target %TARGET% --all-targets --no-default-features || exit /B +popd +@echo @echo Test (release) @echo -------------- cargo test --target %TARGET% --release || exit /B diff --git a/ci/travis_script_rust.sh b/ci/travis_script_rust.sh index 704cb37bb06..20656b68558 100755 --- a/ci/travis_script_rust.sh +++ b/ci/travis_script_rust.sh @@ -32,6 +32,9 @@ rustup show cargo +stable fmt --all -- --check RUSTFLAGS="-D warnings" cargo build --all-targets +pushd arrow +cargo build --no-default-features +popd cargo test # run examples diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml index 1f6c9111759..d97e1afd22a 100644 --- a/rust/arrow/Cargo.toml +++ b/rust/arrow/Cargo.toml @@ -45,10 +45,14 @@ csv = "1.0" num = "0.2" regex = "1.1" lazy_static = "1.2" -packed_simd = "0.3.1" +packed_simd = { version = "0.3.1", optional = true } chrono = "0.4" flatbuffers = "0.5.0" +[features] +simd = ["packed_simd"] +default = ["simd"] + [dev-dependencies] criterion = "0.2" lazy_static = "1" diff --git a/rust/arrow/README.md b/rust/arrow/README.md index f3d61842ef2..4bc6b062648 100644 --- a/rust/arrow/README.md +++ b/rust/arrow/README.md @@ -64,6 +64,16 @@ Some manual steps were then performed: - Remove `org::apache::arrow::flatbuffers` namespace - Add includes to each generated file +## SIMD (Single Instruction Multiple Data) + +Arrow uses the [packed_simd](https://crates.io/crates/packed_simd) crate to optimize many of the implementations in the +[compute](https://github.com/apache/arrow/tree/master/rust/arrow/src/compute) module using SIMD intrinsics. These +optimizations are enabled by the `simd` feature flag and are turned on by default, but can be disabled, for example: + +```bash +cargo build --no-default-features +``` + # Publishing to crates.io An Arrow committer can publish this crate after an official project release has diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs index 7ede0394a10..dbcdd95a5f2 100644 --- a/rust/arrow/src/buffer.rs +++ b/rust/arrow/src/buffer.rs @@ -18,6 +18,7 @@ //! The main type in the module is `Buffer`, a contiguous immutable memory region of //! fixed size aligned at a 64-byte boundary. `MutableBuffer` is like `Buffer`, but it can //! be mutated and grown. +#[cfg(feature = "simd")] use packed_simd::u8x64; use std::cmp; @@ -26,7 +27,9 @@ use std::fmt::{Debug, Formatter}; use std::io::{Error as IoError, ErrorKind, Result as IoResult, Write}; use std::mem; use std::ops::{BitAnd, BitOr, Not}; -use std::slice::{from_raw_parts, from_raw_parts_mut}; +use std::slice::from_raw_parts; +#[cfg(feature = "simd")] +use std::slice::from_raw_parts_mut; use std::sync::Arc; use crate::array::{BufferBuilderTrait, UInt8BufferBuilder}; @@ -183,7 +186,7 @@ impl> From for Buffer { } /// Helper function for SIMD `BitAnd` and `BitOr` implementations -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] fn bitwise_bin_op_simd_helper(left: &Buffer, right: &Buffer, op: F) -> Buffer where F: Fn(u8x64, u8x64) -> u8x64, @@ -218,11 +221,15 @@ impl<'a, 'b> BitAnd<&'b Buffer> for &'a Buffer { )); } - if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { - // SIMD implementation if available - Ok(bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a & b)) - } else { - // Default implementation + // SIMD implementation if available + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + { + return Ok(bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a & b)); + } + + // Default implementation + #[allow(unreachable_code)] + { let mut builder = UInt8BufferBuilder::new(self.len()); for i in 0..self.len() { unsafe { @@ -248,12 +255,15 @@ impl<'a, 'b> BitOr<&'b Buffer> for &'a Buffer { )); } - if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { - // SIMD implementation if available - Ok(bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a | b)) - } else { - // Default implementation + // SIMD implementation if available + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + { + return Ok(bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a | b)); + } + // Default implementation + #[allow(unreachable_code)] + { let mut builder = UInt8BufferBuilder::new(self.len()); for i in 0..self.len() { unsafe { @@ -273,8 +283,9 @@ impl Not for &Buffer { type Output = Buffer; fn not(self) -> Buffer { - if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { - // SIMD implementation if available + // SIMD implementation if available + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + { let mut result = MutableBuffer::new(self.len()).with_bitset(self.len(), false); let lanes = u8x64::lanes(); @@ -290,9 +301,12 @@ impl Not for &Buffer { simd_result.write_to_slice_unaligned_unchecked(result_slice); } } - result.freeze() - } else { - // Default implementation + return result.freeze(); + } + + // Default implementation + #[allow(unreachable_code)] + { let mut builder = UInt8BufferBuilder::new(self.len()); for i in 0..self.len() { unsafe { diff --git a/rust/arrow/src/compute/kernels/arithmetic.rs b/rust/arrow/src/compute/kernels/arithmetic.rs index 6a05a3d6e6c..a08979c107b 100644 --- a/rust/arrow/src/compute/kernels/arithmetic.rs +++ b/rust/arrow/src/compute/kernels/arithmetic.rs @@ -22,16 +22,22 @@ //! `RUSTFLAGS="-C target-feature=+avx2"` for example. See the documentation //! [here](https://doc.rust-lang.org/stable/core/arch/) for more information. +#[cfg(feature = "simd")] use std::mem; use std::ops::{Add, Div, Mul, Sub}; +#[cfg(feature = "simd")] use std::slice::from_raw_parts_mut; +#[cfg(feature = "simd")] use std::sync::Arc; use num::{One, Zero}; use crate::array::*; +#[cfg(feature = "simd")] use crate::bitmap::Bitmap; +#[cfg(feature = "simd")] use crate::buffer::MutableBuffer; +#[cfg(feature = "simd")] use crate::compute::util::{apply_bin_op_to_option_bitmap, simd_load_set_invalid}; use crate::datatypes; use crate::error::{ArrowError, Result}; @@ -66,7 +72,7 @@ where } /// SIMD vectorized version of `math_op` above. -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] fn simd_math_op( left: &PrimitiveArray, right: &PrimitiveArray, @@ -125,7 +131,7 @@ where /// SIMD vectorized version of `divide`, the divide kernel needs it's own implementation as there /// is a need to handle situations where a divide by `0` occurs. This is complicated by `NULL` /// slots and padding. -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] fn simd_divide( left: &PrimitiveArray, right: &PrimitiveArray, @@ -201,11 +207,11 @@ where + Div + Zero, { - if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { - simd_math_op(&left, &right, |a, b| a + b) - } else { - math_op(left, right, |a, b| Ok(a + b)) - } + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + return simd_math_op(&left, &right, |a, b| a + b); + + #[allow(unreachable_code)] + math_op(left, right, |a, b| Ok(a + b)) } /// Perform `left - right` operation on two arrays. If either left or right value is null @@ -222,11 +228,11 @@ where + Div + Zero, { - if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { - simd_math_op(&left, &right, |a, b| a - b) - } else { - math_op(left, right, |a, b| Ok(a - b)) - } + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + return simd_math_op(&left, &right, |a, b| a - b); + + #[allow(unreachable_code)] + math_op(left, right, |a, b| Ok(a - b)) } /// Perform `left * right` operation on two arrays. If either left or right value is null @@ -243,11 +249,11 @@ where + Div + Zero, { - if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { - simd_math_op(&left, &right, |a, b| a * b) - } else { - math_op(left, right, |a, b| Ok(a * b)) - } + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + return simd_math_op(&left, &right, |a, b| a * b); + + #[allow(unreachable_code)] + math_op(left, right, |a, b| Ok(a * b)) } /// Perform `left / right` operation on two arrays. If either left or right value is null @@ -266,17 +272,17 @@ where + Zero + One, { - if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { - simd_divide(&left, &right) - } else { - math_op(left, right, |a, b| { - if b.is_zero() { - Err(ArrowError::DivideByZero) - } else { - Ok(a / b) - } - }) - } + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + return simd_divide(&left, &right); + + #[allow(unreachable_code)] + math_op(left, right, |a, b| { + if b.is_zero() { + Err(ArrowError::DivideByZero) + } else { + Ok(a / b) + } + }) } #[cfg(test)] diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs index a0a64438101..d6398115f0a 100644 --- a/rust/arrow/src/compute/kernels/comparison.rs +++ b/rust/arrow/src/compute/kernels/comparison.rs @@ -22,11 +22,15 @@ //! `RUSTFLAGS="-C target-feature=+avx2"` for example. See the documentation //! [here](https://doc.rust-lang.org/stable/core/arch/) for more information. +#[cfg(feature = "simd")] use std::sync::Arc; use crate::array::*; +#[cfg(feature = "simd")] use crate::compute::util::apply_bin_op_to_option_bitmap; -use crate::datatypes::{ArrowNumericType, BooleanType, DataType}; +use crate::datatypes::ArrowNumericType; +#[cfg(feature = "simd")] +use crate::datatypes::{BooleanType, DataType}; use crate::error::{ArrowError, Result}; /// Helper function to perform boolean lambda function on values from two arrays, this @@ -65,7 +69,7 @@ where /// Helper function to perform boolean lambda function on values from two arrays using /// SIMD. -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] fn simd_compare_op( left: &PrimitiveArray, right: &PrimitiveArray, @@ -116,11 +120,11 @@ pub fn eq(left: &PrimitiveArray, right: &PrimitiveArray) -> Result(left: &PrimitiveArray, right: &PrimitiveArray) -> Result(left: &PrimitiveArray, right: &PrimitiveArray) -> Result false, - (None, _) => true, - (_, None) => false, - (Some(aa), Some(bb)) => aa < bb, - }) - } + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + return simd_compare_op(left, right, |a, b| T::lt(a, b)); + + #[allow(unreachable_code)] + compare_op(left, right, |a, b| match (a, b) { + (None, None) => false, + (None, _) => true, + (_, None) => false, + (Some(aa), Some(bb)) => aa < bb, + }) } /// Perform `left <= right` operation on two arrays. Null values are less than non-null @@ -162,16 +166,16 @@ pub fn lt_eq( where T: ArrowNumericType, { - if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { - simd_compare_op(left, right, |a, b| T::le(a, b)) - } else { - compare_op(left, right, |a, b| match (a, b) { - (None, None) => true, - (None, _) => true, - (_, None) => false, - (Some(aa), Some(bb)) => aa <= bb, - }) - } + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + return simd_compare_op(left, right, |a, b| T::le(a, b)); + + #[allow(unreachable_code)] + compare_op(left, right, |a, b| match (a, b) { + (None, None) => true, + (None, _) => true, + (_, None) => false, + (Some(aa), Some(bb)) => aa <= bb, + }) } /// Perform `left > right` operation on two arrays. Non-null values are greater than null @@ -180,16 +184,16 @@ pub fn gt(left: &PrimitiveArray, right: &PrimitiveArray) -> Result false, - (None, _) => false, - (_, None) => true, - (Some(aa), Some(bb)) => aa > bb, - }) - } + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + return simd_compare_op(left, right, |a, b| T::gt(a, b)); + + #[allow(unreachable_code)] + compare_op(left, right, |a, b| match (a, b) { + (None, None) => false, + (None, _) => false, + (_, None) => true, + (Some(aa), Some(bb)) => aa > bb, + }) } /// Perform `left >= right` operation on two arrays. Non-null values are greater than null @@ -201,16 +205,16 @@ pub fn gt_eq( where T: ArrowNumericType, { - if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { - simd_compare_op(left, right, |a, b| T::ge(a, b)) - } else { - compare_op(left, right, |a, b| match (a, b) { - (None, None) => true, - (None, _) => false, - (_, None) => true, - (Some(aa), Some(bb)) => aa >= bb, - }) - } + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + return simd_compare_op(left, right, |a, b| T::ge(a, b)); + + #[allow(unreachable_code)] + compare_op(left, right, |a, b| match (a, b) { + (None, None) => true, + (None, _) => false, + (_, None) => true, + (Some(aa), Some(bb)) => aa >= bb, + }) } #[cfg(test)] diff --git a/rust/arrow/src/compute/util.rs b/rust/arrow/src/compute/util.rs index 8cd5a113dc9..a6ddf06fdf0 100644 --- a/rust/arrow/src/compute/util.rs +++ b/rust/arrow/src/compute/util.rs @@ -20,9 +20,12 @@ use crate::array::*; use crate::bitmap::Bitmap; use crate::buffer::Buffer; +#[cfg(feature = "simd")] use crate::datatypes::*; use crate::error::Result; +#[cfg(feature = "simd")] use num::One; +#[cfg(feature = "simd")] use std::cmp::min; /// Applies a given binary operation, `op`, to two references to `Option`'s. @@ -96,6 +99,7 @@ pub(super) fn take_value_indices_from_list( /// Lanes of the SIMD mask can be set to 'valid' (`true`) if the corresponding array slot is not /// `NULL`, as indicated by it's `Bitmap`, and is within the length of the array. Lanes outside the /// length represent padding and are set to 'invalid' (`false`). +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] unsafe fn is_valid( bitmap: &Option, i: usize, @@ -133,6 +137,7 @@ where /// Note that `array` below has it's own `Bitmap` separate from the `bitmap` argument. This /// function is used to prepare `array`'s for binary operations. The `bitmap` argument is the /// `Bitmap` after the binary operation. +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] pub(super) unsafe fn simd_load_set_invalid( array: &PrimitiveArray, bitmap: &Option, @@ -221,6 +226,7 @@ mod tests { } #[test] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] fn test_is_valid() { let a = Int32Array::from(vec![ Some(15), @@ -248,6 +254,7 @@ mod tests { } #[test] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] fn test_simd_load_set_invalid() { let a = Int64Array::from(vec![None, Some(15), Some(5), Some(0)]); let new_bitmap = &Some(Bitmap::from(Buffer::from([0b00001010]))); diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index 822b953a7d7..5698ccbc1bd 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -23,10 +23,12 @@ use std::fmt; use std::mem::size_of; +#[cfg(feature = "simd")] use std::ops::{Add, Div, Mul, Sub}; use std::slice::from_raw_parts; use std::str::FromStr; +#[cfg(feature = "simd")] use packed_simd::*; use serde_derive::{Deserialize, Serialize}; use serde_json::{json, Number, Value, Value::Number as VNumber}; @@ -303,7 +305,7 @@ make_type!( /// A subtype of primitive type that represents numeric values. /// /// SIMD operations are defined in this trait if available on the target system. -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] pub trait ArrowNumericType: ArrowPrimitiveType where Self::Simd: Add @@ -370,12 +372,15 @@ where fn write(simd_result: Self::Simd, slice: &mut [Self::Native]); } -#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] +#[cfg(any( + not(any(target_arch = "x86", target_arch = "x86_64")), + not(feature = "simd") +))] pub trait ArrowNumericType: ArrowPrimitiveType {} macro_rules! make_numeric_type { ($impl_ty:ty, $native_ty:ty, $simd_ty:ident, $simd_mask_ty:ident) => { - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] impl ArrowNumericType for $impl_ty { type Simd = $simd_ty; @@ -454,7 +459,10 @@ macro_rules! make_numeric_type { unsafe { simd_result.write_to_slice_unaligned_unchecked(slice) }; } } - #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] + #[cfg(any( + not(any(target_arch = "x86", target_arch = "x86_64")), + not(feature = "simd") + ))] impl ArrowNumericType for $impl_ty {} }; } diff --git a/rust/arrow/src/util/bit_util.rs b/rust/arrow/src/util/bit_util.rs index 7924f5f388a..21c28366a10 100644 --- a/rust/arrow/src/util/bit_util.rs +++ b/rust/arrow/src/util/bit_util.rs @@ -17,6 +17,7 @@ //! Utils for working with bits +#[cfg(feature = "simd")] use packed_simd::u8x64; static BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128]; @@ -131,7 +132,7 @@ pub fn ceil(value: usize, divisor: usize) -> usize { /// Note that each slice should be 64 bytes and it is the callers responsibility to ensure /// that this is the case. If passed slices larger than 64 bytes the operation will only /// be performed on the first 64 bytes. Slices less than 64 bytes will panic. -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] pub unsafe fn bitwise_bin_op_simd(left: &[u8], right: &[u8], result: &mut [u8], op: F) where F: Fn(u8x64, u8x64) -> u8x64, @@ -302,8 +303,8 @@ mod tests { assert_eq!(ceil(10000000000, 1000000000), 10); } - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[test] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] fn test_bitwise_and_simd() { let buf1 = [0b00110011u8; 64]; let buf2 = [0b11110000u8; 64]; @@ -314,8 +315,8 @@ mod tests { } } - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[test] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] fn test_bitwise_or_simd() { let buf1 = [0b00110011u8; 64]; let buf2 = [0b11110000u8; 64];