Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions ci/rust-build-main.bat
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ rustup default nightly
rustup show
cargo build --target %TARGET% --all-targets --release || exit /B
@echo
@echo Build with no default features
@echo ------------------------------
pushd arrow
cargo build --target %TARGET% --all-targets --no-default-features || exit /B
popd
@echo
@echo Test (release)
@echo --------------
cargo test --target %TARGET% --release || exit /B
Expand Down
3 changes: 3 additions & 0 deletions ci/travis_script_rust.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ rustup show
cargo +stable fmt --all -- --check

RUSTFLAGS="-D warnings" cargo build --all-targets
pushd arrow
cargo build --no-default-features
popd
cargo test

# run examples
Expand Down
6 changes: 5 additions & 1 deletion rust/arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,14 @@ csv = "1.0"
num = "0.2"
regex = "1.1"
lazy_static = "1.2"
packed_simd = "0.3.1"
packed_simd = { version = "0.3.1", optional = true }
chrono = "0.4"
flatbuffers = "0.5.0"

[features]
simd = ["packed_simd"]
default = ["simd"]

[dev-dependencies]
criterion = "0.2"
lazy_static = "1"
Expand Down
10 changes: 10 additions & 0 deletions rust/arrow/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,16 @@ Some manual steps were then performed:
- Remove `org::apache::arrow::flatbuffers` namespace
- Add includes to each generated file

## SIMD (Single Instruction Multiple Data)

Arrow uses the [packed_simd](https://crates.io/crates/packed_simd) crate to optimize many of the implementations in the
[compute](https://github.com/apache/arrow/tree/master/rust/arrow/src/compute) module using SIMD intrinsics. These
optimizations are enabled by the `simd` feature flag and are turned on by default, but can be disabled, for example:

```bash
cargo build --no-default-features
```

# Publishing to crates.io

An Arrow committer can publish this crate after an official project release has
Expand Down
48 changes: 31 additions & 17 deletions rust/arrow/src/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
//! The main type in the module is `Buffer`, a contiguous immutable memory region of
//! fixed size aligned at a 64-byte boundary. `MutableBuffer` is like `Buffer`, but it can
//! be mutated and grown.
#[cfg(feature = "simd")]
use packed_simd::u8x64;

use std::cmp;
Expand All @@ -26,7 +27,9 @@ use std::fmt::{Debug, Formatter};
use std::io::{Error as IoError, ErrorKind, Result as IoResult, Write};
use std::mem;
use std::ops::{BitAnd, BitOr, Not};
use std::slice::{from_raw_parts, from_raw_parts_mut};
use std::slice::from_raw_parts;
#[cfg(feature = "simd")]
use std::slice::from_raw_parts_mut;
use std::sync::Arc;

use crate::array::{BufferBuilderTrait, UInt8BufferBuilder};
Expand Down Expand Up @@ -183,7 +186,7 @@ impl<T: AsRef<[u8]>> From<T> for Buffer {
}

/// Helper function for SIMD `BitAnd` and `BitOr` implementations
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
fn bitwise_bin_op_simd_helper<F>(left: &Buffer, right: &Buffer, op: F) -> Buffer
where
F: Fn(u8x64, u8x64) -> u8x64,
Expand Down Expand Up @@ -218,11 +221,15 @@ impl<'a, 'b> BitAnd<&'b Buffer> for &'a Buffer {
));
}

if cfg!(any(target_arch = "x86", target_arch = "x86_64")) {
// SIMD implementation if available
Ok(bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a & b))
} else {
// Default implementation
// SIMD implementation if available
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
{
return Ok(bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a & b));
}

// Default implementation
#[allow(unreachable_code)]
{
let mut builder = UInt8BufferBuilder::new(self.len());
for i in 0..self.len() {
unsafe {
Expand All @@ -248,12 +255,15 @@ impl<'a, 'b> BitOr<&'b Buffer> for &'a Buffer {
));
}

if cfg!(any(target_arch = "x86", target_arch = "x86_64")) {
// SIMD implementation if available
Ok(bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a | b))
} else {
// Default implementation
// SIMD implementation if available
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
{
return Ok(bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a | b));
}

// Default implementation
#[allow(unreachable_code)]
{
let mut builder = UInt8BufferBuilder::new(self.len());
for i in 0..self.len() {
unsafe {
Expand All @@ -273,8 +283,9 @@ impl Not for &Buffer {
type Output = Buffer;

fn not(self) -> Buffer {
if cfg!(any(target_arch = "x86", target_arch = "x86_64")) {
// SIMD implementation if available
// SIMD implementation if available
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
{
let mut result =
MutableBuffer::new(self.len()).with_bitset(self.len(), false);
let lanes = u8x64::lanes();
Expand All @@ -290,9 +301,12 @@ impl Not for &Buffer {
simd_result.write_to_slice_unaligned_unchecked(result_slice);
}
}
result.freeze()
} else {
// Default implementation
return result.freeze();
}

// Default implementation
#[allow(unreachable_code)]
{
let mut builder = UInt8BufferBuilder::new(self.len());
for i in 0..self.len() {
unsafe {
Expand Down
62 changes: 34 additions & 28 deletions rust/arrow/src/compute/kernels/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,22 @@
//! `RUSTFLAGS="-C target-feature=+avx2"` for example. See the documentation
//! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.

#[cfg(feature = "simd")]
use std::mem;
use std::ops::{Add, Div, Mul, Sub};
#[cfg(feature = "simd")]
use std::slice::from_raw_parts_mut;
#[cfg(feature = "simd")]
use std::sync::Arc;

use num::{One, Zero};

use crate::array::*;
#[cfg(feature = "simd")]
use crate::bitmap::Bitmap;
#[cfg(feature = "simd")]
use crate::buffer::MutableBuffer;
#[cfg(feature = "simd")]
use crate::compute::util::{apply_bin_op_to_option_bitmap, simd_load_set_invalid};
use crate::datatypes;
use crate::error::{ArrowError, Result};
Expand Down Expand Up @@ -66,7 +72,7 @@ where
}

/// SIMD vectorized version of `math_op` above.
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
fn simd_math_op<T, F>(
left: &PrimitiveArray<T>,
right: &PrimitiveArray<T>,
Expand Down Expand Up @@ -125,7 +131,7 @@ where
/// SIMD vectorized version of `divide`, the divide kernel needs it's own implementation as there
/// is a need to handle situations where a divide by `0` occurs. This is complicated by `NULL`
/// slots and padding.
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
fn simd_divide<T>(
left: &PrimitiveArray<T>,
right: &PrimitiveArray<T>,
Expand Down Expand Up @@ -201,11 +207,11 @@ where
+ Div<Output = T::Native>
+ Zero,
{
if cfg!(any(target_arch = "x86", target_arch = "x86_64")) {
simd_math_op(&left, &right, |a, b| a + b)
} else {
math_op(left, right, |a, b| Ok(a + b))
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
return simd_math_op(&left, &right, |a, b| a + b);

#[allow(unreachable_code)]
math_op(left, right, |a, b| Ok(a + b))
}

/// Perform `left - right` operation on two arrays. If either left or right value is null
Expand All @@ -222,11 +228,11 @@ where
+ Div<Output = T::Native>
+ Zero,
{
if cfg!(any(target_arch = "x86", target_arch = "x86_64")) {
simd_math_op(&left, &right, |a, b| a - b)
} else {
math_op(left, right, |a, b| Ok(a - b))
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
return simd_math_op(&left, &right, |a, b| a - b);

#[allow(unreachable_code)]
math_op(left, right, |a, b| Ok(a - b))
}

/// Perform `left * right` operation on two arrays. If either left or right value is null
Expand All @@ -243,11 +249,11 @@ where
+ Div<Output = T::Native>
+ Zero,
{
if cfg!(any(target_arch = "x86", target_arch = "x86_64")) {
simd_math_op(&left, &right, |a, b| a * b)
} else {
math_op(left, right, |a, b| Ok(a * b))
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
return simd_math_op(&left, &right, |a, b| a * b);

#[allow(unreachable_code)]
math_op(left, right, |a, b| Ok(a * b))
}

/// Perform `left / right` operation on two arrays. If either left or right value is null
Expand All @@ -266,17 +272,17 @@ where
+ Zero
+ One,
{
if cfg!(any(target_arch = "x86", target_arch = "x86_64")) {
simd_divide(&left, &right)
} else {
math_op(left, right, |a, b| {
if b.is_zero() {
Err(ArrowError::DivideByZero)
} else {
Ok(a / b)
}
})
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
return simd_divide(&left, &right);

#[allow(unreachable_code)]
math_op(left, right, |a, b| {
if b.is_zero() {
Err(ArrowError::DivideByZero)
} else {
Ok(a / b)
}
})
}

#[cfg(test)]
Expand Down
Loading