Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions datafusion/functions/benches/to_char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,66 @@ fn criterion_benchmark(c: &mut Criterion) {
)
})
});

// These bellow 02 benchmarks use Date32 data with format strings that contain
// time specifiers (%H, %M, %S, ...). Arrow's Date32 formatter cannot
// handle time specifiers and falls back to a Date64 cast.

// Covers full fallback (every row triggers the cast)
c.bench_function("to_char_array_date32_datetime_patterns_1000", |b| {
let mut rng = rand::rng();
let data_arr = generate_date32_array(&mut rng);
let batch_len = data_arr.len();
let data = ColumnarValue::Array(Arc::new(data_arr) as ArrayRef);
let patterns = ColumnarValue::Array(Arc::new(generate_datetime_pattern_array(
&mut rng,
)) as ArrayRef);

b.iter(|| {
black_box(
to_char()
.invoke_with_args(ScalarFunctionArgs {
args: vec![data.clone(), patterns.clone()],
arg_fields: vec![
Field::new("a", data.data_type(), true).into(),
Field::new("b", patterns.data_type(), true).into(),
],
number_rows: batch_len,
return_field: Field::new("f", DataType::Utf8, true).into(),
config_options: Arc::clone(&config_options),
})
.expect("to_char should work on valid values"),
)
})
});

// Covers partial fallback (roughly half the rows trigger it)
c.bench_function("to_char_array_date32_mixed_patterns_1000", |b| {
let mut rng = rand::rng();
let data_arr = generate_date32_array(&mut rng);
let batch_len = data_arr.len();
let data = ColumnarValue::Array(Arc::new(data_arr) as ArrayRef);
let patterns = ColumnarValue::Array(Arc::new(generate_mixed_pattern_array(
&mut rng,
)) as ArrayRef);

b.iter(|| {
black_box(
to_char()
.invoke_with_args(ScalarFunctionArgs {
args: vec![data.clone(), patterns.clone()],
arg_fields: vec![
Field::new("a", data.data_type(), true).into(),
Field::new("b", patterns.data_type(), true).into(),
],
number_rows: batch_len,
return_field: Field::new("f", DataType::Utf8, true).into(),
config_options: Arc::clone(&config_options),
})
.expect("to_char should work on valid values"),
)
})
});
}

criterion_group!(benches, criterion_benchmark);
Expand Down
40 changes: 30 additions & 10 deletions datafusion/functions/src/datetime/to_char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,11 @@ fn to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
StringBuilder::with_capacity(data_array.len(), data_array.len() * fmt_len);
let mut buffer = String::with_capacity(fmt_len);

// Lazily computed Date64 cast of the entire array, used when a Date32
// format string contains time specifiers that the Date32 formatter
// cannot handle. Cast once and reuse for all subsequent rows
let mut date64_array: Option<ArrayRef> = None;

for idx in 0..data_array.len() {
if format_array.is_null(idx) || data_array.is_null(idx) {
builder.append_null();
Expand All @@ -266,13 +271,18 @@ fn to_char_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {
// buffer and `append_value` on success.
match formatter.value(idx).write(&mut buffer) {
Ok(()) => builder.append_value(&buffer),
// Retry with Date64 (see comment in to_char_scalar).
Err(_) if data_type == &Date32 => {
buffer.clear();
let date64_value = cast(&data_array.slice(idx, 1), &Date64)?;
let retry_fmt =
ArrayFormatter::try_new(date64_value.as_ref(), &format_options)?;
retry_fmt.value(0).write(&mut buffer)?;
let date64_ref = match &date64_array {
Some(arr) => arr.as_ref(),
None => {
date64_array = Some(cast(data_array.as_ref(), &Date64)?);
date64_array.as_ref().unwrap().as_ref()
}
};
let retry_options = build_format_options(&Date64, format)?;
Comment thread
kumarUjjawal marked this conversation as resolved.
let retry_fmt = ArrayFormatter::try_new(date64_ref, &retry_options)?;
retry_fmt.value(idx).write(&mut buffer)?;
builder.append_value(&buffer);
}
Err(e) => return Err(e.into()),
Expand Down Expand Up @@ -307,11 +317,21 @@ mod tests {

#[test]
fn test_array_array() {
let array_array_data = vec![(
Arc::new(Date32Array::from(vec![18506, 18507])) as ArrayRef,
StringArray::from(vec!["%Y::%m::%d", "%Y::%m::%d %S::%M::%H %f"]),
StringArray::from(vec!["2020::09::01", "2020::09::02 00::00::00 000000000"]),
)];
let array_array_data = vec![
(
Arc::new(Date32Array::from(vec![18506, 18507])) as ArrayRef,
StringArray::from(vec!["%Y::%m::%d", "%Y::%m::%d %S::%M::%H %f"]),
StringArray::from(vec![
"2020::09::01",
"2020::09::02 00::00::00 000000000",
]),
),
(
Arc::new(Date32Array::from(vec![18506, 18507])) as ArrayRef,
StringArray::from(vec!["%Y::%m::%d %H:%M:%S", "%d-%m-%Y %H:%M"]),
StringArray::from(vec!["2020::09::01 00:00:00", "02-09-2020 00:00"]),
),
];

for (value, format, expected) in array_array_data {
let batch_len = value.len();
Expand Down
Loading