Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ repository = "https://github.com/apache/arrow-datafusion"
rust-version = "1.59"

[dependencies]
arrow = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "a03d4eef5640e05dddf99fc2357ad6d58b5337cb" }
arrow = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "8277f9a051c90482ff9557a91c5dc3926d9ba19e" }
clap = { version = "3", features = ["derive", "cargo"] }
datafusion = { path = "../datafusion/core", version = "7.0.0", features = ["parquet-all-compressions"] }
dirs = "4.0.0"
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ path = "examples/avro_sql.rs"
required-features = ["datafusion/avro"]

[dev-dependencies]
arrow-flight = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "a03d4eef5640e05dddf99fc2357ad6d58b5337cb" }
arrow-flight = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "8277f9a051c90482ff9557a91c5dc3926d9ba19e" }
async-trait = "0.1.41"
datafusion = { path = "../datafusion/core" }
futures = "0.3"
Expand Down
4 changes: 2 additions & 2 deletions datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ pyarrow = ["pyo3"]
parquet-all-compressions = ["parquet/snap", "parquet/brotli", "parquet/flate2", "parquet/lz4", "parquet/zstd"]

[dependencies]
arrow = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "a03d4eef5640e05dddf99fc2357ad6d58b5337cb", features = ["prettyprint"] }
arrow = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "8277f9a051c90482ff9557a91c5dc3926d9ba19e", features = ["prettyprint"] }
avro-rs = { version = "0.13", features = ["snappy"], optional = true }
cranelift-module = { version = "0.82.0", optional = true }
ordered-float = "2.10"
parquet = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "a03d4eef5640e05dddf99fc2357ad6d58b5337cb", features = ["arrow", "base64"], default-features = false, optional = true }
parquet = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "8277f9a051c90482ff9557a91c5dc3926d9ba19e", features = ["arrow", "base64"], default-features = false, optional = true }
pyo3 = { version = "0.16", optional = true }
sqlparser = { git = 'https://github.com/cube-js/sqlparser-rs.git', rev = "16f051486de78a23a0ff252155dd59fc2d35497d" }
4 changes: 2 additions & 2 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ parquet-all-compressions = ["datafusion-common/parquet-all-compressions", "parqu

[dependencies]
ahash = { version = "0.7", default-features = false }
arrow = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "a03d4eef5640e05dddf99fc2357ad6d58b5337cb", features = ["prettyprint"] }
arrow = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "8277f9a051c90482ff9557a91c5dc3926d9ba19e", features = ["prettyprint"] }
async-trait = "0.1.41"
avro-rs = { version = "0.13", features = ["snappy"], optional = true }
chrono = { version = "0.4", default-features = false }
Expand All @@ -76,7 +76,7 @@ num_cpus = "1.13.0"
ordered-float = "2.10"
parking_lot = "0.12"
# All compression codes are disabled by default in our fork because it increases compilation time significantly.
parquet = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "a03d4eef5640e05dddf99fc2357ad6d58b5337cb", features = ["arrow", "base64"], default-features = false }
parquet = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "8277f9a051c90482ff9557a91c5dc3926d9ba19e", features = ["arrow", "base64"], default-features = false }
paste = "^1.0"
pin-project-lite= "^0.2.7"
pyo3 = { version = "0.16", optional = true }
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/fuzz-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
arrow = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "a03d4eef5640e05dddf99fc2357ad6d58b5337cb", features = ["prettyprint"] }
arrow = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "8277f9a051c90482ff9557a91c5dc3926d9ba19e", features = ["prettyprint"] }
env_logger = "0.9.0"
rand = "0.8"
19 changes: 8 additions & 11 deletions datafusion/core/src/physical_optimizer/pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1055,17 +1055,14 @@ mod tests {
num_containers: 1,
};

let batch =
build_statistics_record_batch(&statistics, &required_columns).unwrap();
let expected = vec![
"+--------+",
"| s1_min |",
"+--------+",
"| |",
"+--------+",
];

assert_batches_eq!(expected, &[batch]);
// NOTE(cubesql): error is returned on invalid cast
let result =
build_statistics_record_batch(&statistics, &required_columns).unwrap_err();
assert!(
result.to_string().contains("Cannot cast binary to string"),
"{}",
result
);
}

#[test]
Expand Down
3 changes: 2 additions & 1 deletion datafusion/core/src/physical_plan/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1929,7 +1929,8 @@ mod tests {
// u32 AND bool
col("c2").and(bool_expr),
// utf8 LIKE u32
col("c1").like(col("c2")),
// NOTE(cubesql): valid
//col("c1").like(col("c2")),
];
for case in cases {
let logical_plan = LogicalPlanBuilder::scan_csv(
Expand Down
3 changes: 2 additions & 1 deletion datafusion/core/tests/sql/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1057,7 +1057,8 @@ async fn test_cast_expressions() -> Result<()> {
test_expression!("CAST('0' AS INT)", "0");
test_expression!("CAST(NULL AS INT)", "NULL");
test_expression!("TRY_CAST('0' AS INT)", "0");
test_expression!("TRY_CAST('x' AS INT)", "NULL");
// NOTE(cubesql): throws an error for this cast as PostgreSQL does
//test_expression!("TRY_CAST('x' AS INT)", "NULL");
Ok(())
}

Expand Down
2 changes: 1 addition & 1 deletion datafusion/cube_ext/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ name = "cube_ext"
path = "src/lib.rs"

[dependencies]
arrow = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "a03d4eef5640e05dddf99fc2357ad6d58b5337cb", features = ["prettyprint"] }
arrow = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "8277f9a051c90482ff9557a91c5dc3926d9ba19e", features = ["prettyprint"] }
chrono = { version = "0.4.16", package = "chrono", default-features = false, features = ["clock"] }
datafusion-common = { path = "../common", version = "7.0.0" }
datafusion-expr = { path = "../expr", version = "7.0.0" }
2 changes: 1 addition & 1 deletion datafusion/expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,6 @@ path = "src/lib.rs"

[dependencies]
ahash = { version = "0.7", default-features = false }
arrow = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "a03d4eef5640e05dddf99fc2357ad6d58b5337cb", features = ["prettyprint"] }
arrow = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "8277f9a051c90482ff9557a91c5dc3926d9ba19e", features = ["prettyprint"] }
datafusion-common = { path = "../common", version = "7.0.0" }
sqlparser = { git = 'https://github.com/cube-js/sqlparser-rs.git', rev = "16f051486de78a23a0ff252155dd59fc2d35497d" }
44 changes: 34 additions & 10 deletions datafusion/expr/src/binary_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,13 @@ pub fn coerce_types(
fn bitwise_coercion(left_type: &DataType, right_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;

if !is_numeric(left_type) || !is_numeric(right_type) {
return None;
// If one of the sides is numeric and the other is a string, coercion to number is allowed
match (is_numeric(left_type), is_numeric(right_type)) {
(true, true) => (),
(false, false) => return None,
_ => return string_with_any_coercion(left_type, right_type),
}

if left_type == right_type && !is_dictionary(left_type) {
return Some(left_type.clone());
}
Expand Down Expand Up @@ -242,9 +246,13 @@ fn comparison_binary_numeric_coercion(
rhs_type: &DataType,
) -> Option<DataType> {
use arrow::datatypes::DataType::*;
if !is_numeric(lhs_type) || !is_numeric(rhs_type) {
return None;
};

// If one of the sides is numeric and the other is a string, coercion to number is allowed
match (is_numeric(lhs_type), is_numeric(lhs_type)) {
(true, true) => (),
(false, false) => return None,
_ => return string_with_any_coercion(lhs_type, rhs_type),
}

// same type => all good
if lhs_type == rhs_type {
Expand Down Expand Up @@ -322,9 +330,12 @@ fn mathematics_numerical_coercion(
use arrow::datatypes::DataType::*;

// error on any non-numeric type
if !is_numeric(lhs_type) || !is_numeric(rhs_type) {
return None;
};
// If one of the sides is numeric and the other is a string, coercion to number is allowed
match (is_numeric(lhs_type), is_numeric(rhs_type)) {
(true, true) => (),
(false, false) => return None,
_ => return string_with_any_coercion(lhs_type, rhs_type),
}

// exponentiation is always Float64
if mathematics_op == &Operator::Exponentiate {
Expand Down Expand Up @@ -557,6 +568,7 @@ fn string_boolean_equality_coercion(
fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
string_coercion(lhs_type, rhs_type)
.or_else(|| dictionary_coercion(lhs_type, rhs_type))
.or_else(|| string_with_any_coercion(lhs_type, rhs_type))
}

/// Coercion rules for Temporal columns: the type that both lhs and rhs can be
Expand Down Expand Up @@ -668,6 +680,7 @@ fn eq_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
.or_else(|| dictionary_coercion(lhs_type, rhs_type))
.or_else(|| temporal_coercion(lhs_type, rhs_type))
.or_else(|| null_coercion(lhs_type, rhs_type))
.or_else(|| string_with_any_coercion(lhs_type, rhs_type))
}

pub fn distinct_coercion(
Expand Down Expand Up @@ -803,6 +816,17 @@ fn interval_add_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<Dat
}
}

/// coercion rules for expressions where string can be cast to other type
fn string_with_any_coercion(
lhs_type: &DataType,
rhs_type: &DataType,
) -> Option<DataType> {
match (lhs_type, rhs_type) {
(DataType::Utf8, t) | (t, DataType::Utf8) => Some(t.clone()),
_ => None,
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand All @@ -815,10 +839,10 @@ mod tests {

fn test_coercion_error() -> Result<()> {
let result_type =
coerce_types(&DataType::Float32, &Operator::Plus, &DataType::Utf8);
coerce_types(&DataType::Float32, &Operator::Plus, &DataType::Date32);

if let Err(DataFusionError::Plan(e)) = result_type {
assert_eq!(e, "'Float32 + Utf8' can't be evaluated because there isn't a common type to coerce the types to");
assert_eq!(e, "'Float32 + Date32' can't be evaluated because there isn't a common type to coerce the types to");
Ok(())
} else {
Err(DataFusionError::Internal(
Expand Down
2 changes: 1 addition & 1 deletion datafusion/jit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ path = "src/lib.rs"
jit = []

[dependencies]
arrow = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "a03d4eef5640e05dddf99fc2357ad6d58b5337cb" }
arrow = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "8277f9a051c90482ff9557a91c5dc3926d9ba19e" }
cranelift = "0.82.0"
cranelift-jit = "0.82.0"
cranelift-module = "0.82.0"
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ unicode_expressions = ["unicode-segmentation"]

[dependencies]
ahash = { version = "0.7", default-features = false }
arrow = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "a03d4eef5640e05dddf99fc2357ad6d58b5337cb", features = ["prettyprint"] }
arrow = { git = 'https://github.com/cube-js/arrow-rs.git', rev = "8277f9a051c90482ff9557a91c5dc3926d9ba19e", features = ["prettyprint"] }
blake2 = { version = "^0.10.2", optional = true }
blake3 = { version = "1.0", optional = true }
chrono = { version = "0.4.20", default-features = false }
Expand Down
7 changes: 5 additions & 2 deletions datafusion/physical-expr/src/expressions/try_cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -526,13 +526,16 @@ mod tests {

#[test]
fn test_try_cast_utf8_i32() -> Result<()> {
// NOTE(cubesql): casting invalid Utf8 to Int32 throws an error like in PostgreSQL
generic_test_cast!(
StringArray,
DataType::Utf8,
vec!["a", "2", "3", "b", "5"],
// vec!["a", "2", "3", "b", "5"],
vec!["2", "3", "5"],
Int32Array,
DataType::Int32,
vec![None, Some(2), Some(3), None, Some(5)]
// vec![None, Some(2), Some(3), None, Some(5)]
vec![Some(2), Some(3), Some(5)]
);
Ok(())
}
Expand Down
Loading