Skip to content

Commit

Permalink
ARROW-11039: [Rust] Performance improvement for utf-8 to float cast
Browse files Browse the repository at this point in the history
Utilize `lexical_core::parse` for faster parsing.

```
cast utf8 to f32        time:   [25.840 us 25.878 us 25.921 us]
                        change: [-45.735% -45.590% -45.408%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 7 outliers among 100 measurements (7.00%)
  1 (1.00%) low mild
  3 (3.00%) high mild
  3 (3.00%) high severe
```

Closes #9018 from Dandandan/perf_cast_float

Lead-authored-by: Daniël Heres <danielheres@gmail.com>
Co-authored-by: Heres, Daniel <danielheres@gmail.com>
Signed-off-by: Jorge C. Leitao <jorgecarleitao@gmail.com>
  • Loading branch information
Dandandan authored and jorgecarleitao committed Dec 27, 2020
1 parent 4d97d83 commit 2f68741
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 7 deletions.
6 changes: 6 additions & 0 deletions rust/arrow/benches/cast_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ fn add_benchmark(c: &mut Criterion) {
let i32_array = build_array::<Int32Type>(512);
let i64_array = build_array::<Int64Type>(512);
let f32_array = build_array::<Float32Type>(512);
let f32_utf8_array = cast(&build_array::<Float32Type>(512), &DataType::Utf8).unwrap();

let f64_array = build_array::<Float64Type>(512);
let date64_array = build_array::<Date64Type>(512);
let date32_array = build_array::<Date32Type>(512);
Expand Down Expand Up @@ -188,6 +190,10 @@ fn add_benchmark(c: &mut Criterion) {
)
})
});
c.bench_function("cast utf8 to f32", |b| {
b.iter(|| cast_array(&f32_utf8_array, DataType::Float32))
});

c.bench_function("cast timestamp_ms to i64 512", |b| {
b.iter(|| cast_array(&time_ms_array, DataType::Int64))
});
Expand Down
13 changes: 6 additions & 7 deletions rust/arrow/src/compute/kernels/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -923,28 +923,27 @@ where
}

/// Cast numeric types to Utf8
fn cast_string_to_numeric<TO>(from: &ArrayRef) -> Result<ArrayRef>
fn cast_string_to_numeric<T>(from: &ArrayRef) -> Result<ArrayRef>
where
TO: ArrowNumericType,
T: ArrowNumericType,
<T as ArrowPrimitiveType>::Native: lexical_core::FromLexical,
{
Ok(Arc::new(string_to_numeric_cast::<TO>(
Ok(Arc::new(string_to_numeric_cast::<T>(
from.as_any().downcast_ref::<StringArray>().unwrap(),
)))
}

fn string_to_numeric_cast<T>(from: &StringArray) -> PrimitiveArray<T>
where
T: ArrowNumericType,
<T as ArrowPrimitiveType>::Native: lexical_core::FromLexical,
{
(0..from.len())
.map(|i| {
if from.is_null(i) {
None
} else {
match from.value(i).parse::<T::Native>() {
Ok(v) => Some(v),
Err(_) => None,
}
lexical_core::parse(from.value(i).as_bytes()).ok()
}
})
.collect()
Expand Down

0 comments on commit 2f68741

Please sign in to comment.