Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ simd = ["datafusion/simd"]
snmalloc = ["snmalloc-rs"]

[dependencies]
arrow = "24.0.0"
arrow = "25.0.0"
datafusion = { path = "../datafusion/core" }
env_logger = "0.9"
futures = "0.3"
mimalloc = { version = "0.1", optional = true, default-features = false }
num_cpus = "1.13.0"
object_store = "0.5.0"
parquet = "24.0.0"
parquet = "25.0.0"
rand = "0.8.4"
serde = { version = "1.0.136", features = ["derive"] }
serde_json = "1.0.78"
Expand Down
25 changes: 13 additions & 12 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ rust-version = "1.62"
readme = "README.md"

[dependencies]
arrow = "24.0.0"
arrow = "25.0.0"
clap = { version = "3", features = ["derive", "cargo"] }
datafusion = { path = "../datafusion/core", version = "13.0.0" }
dirs = "4.0.0"
Expand Down
4 changes: 2 additions & 2 deletions datafusion-examples/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ path = "examples/avro_sql.rs"
required-features = ["datafusion/avro"]

[dev-dependencies]
arrow = "24.0.0"
arrow-flight = "24.0.0"
arrow = "25.0.0"
arrow-flight = "25.0.0"
async-trait = "0.1.41"
datafusion = { path = "../datafusion/core" }
datafusion-common = { path = "../datafusion/common" }
Expand Down
4 changes: 2 additions & 2 deletions datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ pyarrow = ["pyo3", "arrow/pyarrow"]

[dependencies]
apache-avro = { version = "0.14", default-features = false, features = ["snappy"], optional = true }
arrow = { version = "24.0.0", default-features = false }
arrow = { version = "25.0.0", default-features = false }
cranelift-module = { version = "0.88.0", optional = true }
object_store = { version = "0.5.0", default-features = false, optional = true }
ordered-float = "3.0"
parquet = { version = "24.0.0", default-features = false, optional = true }
parquet = { version = "25.0.0", default-features = false, optional = true }
pyo3 = { version = "0.17.1", optional = true }
sqlparser = "0.25"
15 changes: 5 additions & 10 deletions datafusion/common/src/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ use arrow::{
TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
DECIMAL128_MAX_PRECISION,
},
util::decimal::Decimal128,
};
use ordered_float::OrderedFloat;

Expand Down Expand Up @@ -1696,7 +1695,7 @@ impl ScalarValue {
if array.is_null(index) {
ScalarValue::Decimal128(None, precision, scale)
} else {
ScalarValue::Decimal128(Some(array.value(index).as_i128()), precision, scale)
ScalarValue::Decimal128(Some(array.value(index)), precision, scale)
}
}

Expand Down Expand Up @@ -1881,11 +1880,7 @@ impl ScalarValue {
}
match value {
None => array.is_null(index),
Some(v) => {
!array.is_null(index)
&& array.value(index)
== Decimal128::new(precision, scale, &v.to_le_bytes())
}
Some(v) => !array.is_null(index) && array.value(index) == *v,
}
}

Expand Down Expand Up @@ -2501,15 +2496,15 @@ mod tests {
let array = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
assert_eq!(1, array.len());
assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
assert_eq!(123i128, array.value(0).as_i128());
assert_eq!(123i128, array.value(0));

// decimal scalar to array with size
let array = decimal_value.to_array_of_size(10);
let array_decimal = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
assert_eq!(10, array.len());
assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
assert_eq!(123i128, array_decimal.value(0).as_i128());
assert_eq!(123i128, array_decimal.value(9).as_i128());
assert_eq!(123i128, array_decimal.value(0));
assert_eq!(123i128, array_decimal.value(9));
// test eq array
assert!(decimal_value.eq_array(&array, 1));
assert!(decimal_value.eq_array(&array, 5));
Expand Down
6 changes: 3 additions & 3 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ unicode_expressions = ["datafusion-physical-expr/regex_expressions", "datafusion
[dependencies]
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
apache-avro = { version = "0.14", optional = true }
arrow = { version = "24.0.0", features = ["prettyprint"] }
arrow = { version = "25.0.0", features = ["prettyprint"] }
async-compression = { version = "0.3.14", features = ["bzip2", "gzip", "futures-io", "tokio"] }
async-trait = "0.1.41"
bytes = "1.1"
Expand All @@ -81,7 +81,7 @@ num_cpus = "1.13.0"
object_store = "0.5.0"
ordered-float = "3.0"
parking_lot = "0.12"
parquet = { version = "24.0.0", features = ["arrow", "async"] }
parquet = { version = "25.0.0", features = ["arrow", "async"] }
paste = "^1.0"
percent-encoding = "2.2.0"
pin-project-lite = "^0.2.7"
Expand All @@ -98,7 +98,7 @@ url = "2.2"
uuid = { version = "1.0", features = ["v4"] }

[dev-dependencies]
arrow = { version = "24.0.0", features = ["prettyprint", "dyn_cmp_dict"] }
arrow = { version = "25.0.0", features = ["prettyprint", "dyn_cmp_dict"] }
async-trait = "0.1.53"
criterion = "0.4"
csv = "1.1.6"
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/fuzz-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
arrow = { version = "24.0.0", features = ["prettyprint"] }
arrow = { version = "25.0.0", features = ["prettyprint"] }
env_logger = "0.9.0"
rand = "0.8"
14 changes: 4 additions & 10 deletions datafusion/core/src/physical_plan/hash_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,29 +62,23 @@ fn hash_decimal128<'a>(
if array.null_count() == 0 {
if mul_col {
for (i, hash) in hashes_buffer.iter_mut().enumerate() {
Comment on lines 63 to 64
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can we use hash_array here now instead?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potentially, I think I would rather keep this as a simple upgrade PR, and address any simplifications that fall out of the new decimal abstractions as a follow up

*hash = combine_hashes(
random_state.hash_one(&array.value(i).as_i128()),
*hash,
);
*hash = combine_hashes(random_state.hash_one(&array.value(i)), *hash);
}
} else {
for (i, hash) in hashes_buffer.iter_mut().enumerate() {
*hash = random_state.hash_one(&array.value(i).as_i128());
*hash = random_state.hash_one(&array.value(i));
}
}
} else if mul_col {
for (i, hash) in hashes_buffer.iter_mut().enumerate() {
if !array.is_null(i) {
*hash = combine_hashes(
random_state.hash_one(&array.value(i).as_i128()),
*hash,
);
*hash = combine_hashes(random_state.hash_one(&array.value(i)), *hash);
}
}
} else {
for (i, hash) in hashes_buffer.iter_mut().enumerate() {
if !array.is_null(i) {
*hash = random_state.hash_one(&array.value(i).as_i128());
*hash = random_state.hash_one(&array.value(i));
}
}
}
Expand Down
11 changes: 7 additions & 4 deletions datafusion/core/src/test/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,14 +242,17 @@ pub fn table_with_decimal() -> Arc<dyn TableProvider> {
}

fn make_decimal() -> RecordBatch {
let mut decimal_builder = Decimal128Builder::with_capacity(20, 10, 3);
let mut decimal_builder = Decimal128Builder::with_capacity(20);
for i in 110000..110010 {
decimal_builder.append_value(i as i128).unwrap();
decimal_builder.append_value(i as i128);
}
for i in 100000..100010 {
decimal_builder.append_value(-i as i128).unwrap();
decimal_builder.append_value(-i as i128);
}
let array = decimal_builder.finish();
let array = decimal_builder
.finish()
.with_precision_and_scale(10, 3)
.unwrap();
let schema = Schema::new(vec![Field::new("c1", array.data_type().clone(), true)]);
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array)]).unwrap()
}
Expand Down
35 changes: 26 additions & 9 deletions datafusion/core/tests/sql/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -560,9 +560,9 @@ async fn register_tpch_csv_data(
DataType::Int64 => {
cols.push(Box::new(Int64Builder::with_capacity(records.len())))
}
DataType::Decimal128(p, s) => cols.push(Box::new(
Decimal128Builder::with_capacity(records.len(), *p, *s),
)),
DataType::Decimal128(_, _) => {
cols.push(Box::new(Decimal128Builder::with_capacity(records.len())))
}
_ => {
let msg = format!("Not implemented: {}", field.data_type());
Err(DataFusionError::Plan(msg))?
Expand Down Expand Up @@ -600,7 +600,7 @@ async fn register_tpch_csv_data(
.unwrap();
let val = val.trim().replace('.', "");
let value_i128 = val.parse::<i128>().unwrap();
sb.append_value(value_i128)?;
sb.append_value(value_i128);
}
_ => Err(DataFusionError::Plan(format!(
"Not implemented: {}",
Expand All @@ -609,7 +609,21 @@ async fn register_tpch_csv_data(
}
}
}
let cols: Vec<ArrayRef> = cols.iter_mut().map(|it| it.finish()).collect();
let cols: Vec<ArrayRef> = cols
.iter_mut()
.zip(schema.fields())
.map(|(it, field)| match field.data_type() {
DataType::Decimal128(p, s) => Arc::new(
it.as_any_mut()
.downcast_mut::<Decimal128Builder>()
.unwrap()
.finish()
.with_precision_and_scale(*p, *s)
.unwrap(),
),
_ => it.finish(),
})
.collect();

let batch = RecordBatch::try_new(Arc::clone(&schema), cols)?;

Expand Down Expand Up @@ -879,14 +893,17 @@ pub fn table_with_decimal() -> Arc<dyn TableProvider> {
}

fn make_decimal() -> RecordBatch {
let mut decimal_builder = Decimal128Builder::with_capacity(20, 10, 3);
let mut decimal_builder = Decimal128Builder::with_capacity(20);
for i in 110000..110010 {
decimal_builder.append_value(i as i128).unwrap();
decimal_builder.append_value(i as i128);
}
for i in 100000..100010 {
decimal_builder.append_value(-i as i128).unwrap();
decimal_builder.append_value(-i as i128);
}
let array = decimal_builder.finish();
let array = decimal_builder
.finish()
.with_precision_and_scale(10, 3)
.unwrap();
let schema = Schema::new(vec![Field::new("c1", array.data_type().clone(), true)]);
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array)]).unwrap()
}
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ path = "src/lib.rs"

[dependencies]
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
arrow = { version = "24.0.0", default-features = false }
arrow = { version = "25.0.0", default-features = false }
datafusion-common = { path = "../common", version = "13.0.0" }
log = "^0.4"
sqlparser = "0.25"
2 changes: 1 addition & 1 deletion datafusion/jit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ path = "src/lib.rs"
jit = []

[dependencies]
arrow = { version = "24.0.0", default-features = false }
arrow = { version = "25.0.0", default-features = false }
cranelift = "0.88.0"
cranelift-jit = "0.88.0"
cranelift-module = "0.88.0"
Expand Down
2 changes: 1 addition & 1 deletion datafusion/optimizer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ default = ["unicode_expressions"]
unicode_expressions = []

[dependencies]
arrow = { version = "24.0.0", features = ["prettyprint"] }
arrow = { version = "25.0.0", features = ["prettyprint"] }
async-trait = "0.1.41"
chrono = { version = "0.4", default-features = false }
datafusion-common = { path = "../common", version = "13.0.0" }
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ unicode_expressions = ["unicode-segmentation"]

[dependencies]
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
arrow = { version = "24.0.0", features = ["prettyprint"] }
arrow = { version = "25.0.0", features = ["prettyprint"] }
blake2 = { version = "^0.10.2", optional = true }
blake3 = { version = "1.0", optional = true }
chrono = { version = "0.4", default-features = false }
Expand Down
6 changes: 3 additions & 3 deletions datafusion/physical-expr/src/aggregate/min_max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,17 +177,17 @@ macro_rules! typed_min_max_batch_decimal128 {
for i in 1..array.len() {
result = result.$OP(array.value(i));
}
ScalarValue::Decimal128(Some(result.as_i128()), *$PRECISION, *$SCALE)
ScalarValue::Decimal128(Some(result), *$PRECISION, *$SCALE)
} else {
let mut result = 0_i128;
let mut has_value = false;
for i in 0..array.len() {
if !has_value && array.is_valid(i) {
has_value = true;
result = array.value(i).as_i128();
result = array.value(i);
}
if array.is_valid(i) {
result = result.$OP(array.value(i).as_i128());
result = result.$OP(array.value(i));
}
}
ScalarValue::Decimal128(Some(result), *$PRECISION, *$SCALE)
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-expr/src/aggregate/sum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ fn sum_decimal_batch(values: &ArrayRef, precision: u8, scale: u8) -> Result<Scal
}

let result = array.into_iter().fold(0_i128, |s, element| match element {
Some(v) => s + v.as_i128(),
Some(v) => s + v,
None => s,
});

Expand Down
Loading