Skip to content

Commit

Permalink
[compute] use auto vectorized compute for some cases
Browse files Browse the repository at this point in the history
  • Loading branch information
sundy-li committed Aug 10, 2021
1 parent 0d9dca9 commit f35cf4a
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 34 deletions.
6 changes: 3 additions & 3 deletions common/datavalues/src/arrays/arithmetic.rs
Expand Up @@ -193,13 +193,13 @@ where
match (rhs.len(), dtype) {
// TODO(sundy): add more specific cases
// TODO(sundy): fastmod https://lemire.me/blog/2019/02/08/faster-remainders-when-the-divisor-is-a-constant-beating-compilers-and-libdivide/
(1111, DataType::UInt8) => {
(1, DataType::UInt8) => {
let opt_rhs = rhs.get(0);
match opt_rhs {
None => Ok(DFUInt8Array::full_null(self.len()).into_series()),
Some(rhs) => {
let array: DFUInt8Array =
self.apply_cast_numeric(|a| AsPrimitive::<u8>::as_(a % rhs));
let array: DFUInt8Array = self
.apply_cast_numeric(|a| AsPrimitive::<u8>::as_(a - (a / rhs) * rhs));
Ok(array.into_series())
}
}
Expand Down
55 changes: 48 additions & 7 deletions common/datavalues/src/arrays/ops/agg.rs
Expand Up @@ -4,12 +4,15 @@

use std::fmt::Debug;
use std::ops::Add;
use std::ops::AddAssign;

use common_arrow::arrow::array::Array;
use common_arrow::arrow::compute::aggregate;
use common_arrow::arrow::types::simd::Simd;
use common_arrow::arrow::types::NativeType;
use common_exception::ErrorCode;
use common_exception::Result;
use num::cast::AsPrimitive;
use num::Num;
use num::NumCast;
use num::Zero;
Expand Down Expand Up @@ -61,17 +64,55 @@ pub trait ArrayAgg: Debug {
impl<T> ArrayAgg for DataArray<T>
where
T: DFNumericType,
T::Native: NativeType + Simd + PartialOrd + Num + NumCast + Zero + Into<DataValue>,
T::Native: NativeType
+ Simd
+ PartialOrd
+ Num
+ NumCast
+ Zero
+ Into<DataValue>
+ AsPrimitive<<T::LargestType as DFPrimitiveType>::Native>,

<T::LargestType as DFPrimitiveType>::Native: Into<DataValue> + AddAssign + Default,

<T::Native as Simd>::Simd: Add<Output = <T::Native as Simd>::Simd>
+ aggregate::Sum<T::Native>
+ aggregate::SimdOrd<T::Native>,
Option<T::Native>: Into<DataValue>,
{
fn sum(&self) -> Result<DataValue> {
Ok(match aggregate::sum(self.downcast_ref()) {
Some(x) => x.into(),
None => DataValue::from(self.data_type()),
})
let array = self.downcast_ref();
// if largest type is self and there is nullable, we just use simd
// sum is faster in auto vectorized than manual simd
let null_count = self.null_count();
if null_count > 0 && (T::SIZE == <T::LargestType as DFNumericType>::SIZE) {
return Ok(match aggregate::sum(array) {
Some(x) => x.into(),
None => DataValue::from(self.data_type()),
});
}

let mut sum = <T::LargestType as DFPrimitiveType>::Native::default();
if null_count == 0 {
//fast path
array.values().as_slice().iter().for_each(|f| {
sum += f.as_();
});
} else {
if let Some(c) = array.validity() {
array
.values()
.as_slice()
.iter()
.zip(c.into_iter())
.for_each(|(f, v)| {
if v {
sum += f.as_();
}
});
}
}
Ok(sum.into())
}

fn min(&self) -> Result<DataValue> {
Expand Down Expand Up @@ -130,8 +171,8 @@ impl ArrayAgg for DFBooleanArray {
if self.all_is_null() {
return Ok(DataValue::Boolean(None));
}
let sum = self.downcast_iter().fold(0, |acc: u32, x| match x {
Some(v) => acc + v as u32,
let sum = self.downcast_iter().fold(0, |acc: u64, x| match x {
Some(v) => acc + v as u64,
None => acc,
});

Expand Down
1 change: 0 additions & 1 deletion common/datavalues/src/data_array_filter.rs
Expand Up @@ -9,7 +9,6 @@ use common_arrow::arrow::compute::filter::build_filter;
use common_exception::Result;

use crate::prelude::*;

pub struct DataArrayFilter;

impl DataArrayFilter {
Expand Down
24 changes: 1 addition & 23 deletions common/datavalues/src/series/wrap.rs
Expand Up @@ -136,29 +136,7 @@ macro_rules! impl_dyn_array {
}

fn sum(&self) -> Result<DataValue> {
if !is_numeric(&self.0.data_type()) {
return self.0.sum();
}

if matches!(
self.0.data_type(),
DataType::Float64 | DataType::UInt64 | DataType::Int64
) {
return self.0.sum();
}

if is_floating(&self.0.data_type()) {
let s = self.cast_with_type(&DataType::Float64)?;
return s.sum();
}

if is_signed_numeric(&self.0.data_type()) {
let s = self.cast_with_type(&DataType::Int64)?;
return s.sum();
}

let s = self.cast_with_type(&DataType::UInt64)?;
s.sum()
self.0.sum()
}

fn max(&self) -> Result<DataValue> {
Expand Down

0 comments on commit f35cf4a

Please sign in to comment.