Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 62 additions & 1 deletion datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,18 @@ impl TreeNodeRewriter for ConstEvaluator<'_> {
ConstSimplifyResult::NotSimplified(s, m) => {
Ok(Transformed::no(Expr::Literal(s, m)))
}
ConstSimplifyResult::SimplifyRuntimeError(_, expr) => {
ConstSimplifyResult::SimplifyRuntimeError(err, expr) => {
// For CAST expressions with literal inputs, propagate the error at plan time rather than deferring to execution time.
// This provides clearer error messages and fails fast.
if let Expr::Cast(Cast { ref expr, .. })
| Expr::TryCast(TryCast { ref expr, .. }) = expr
{
if matches!(expr.as_ref(), Expr::Literal(_, _)) {
return Err(err);
}
}
// For other expressions (like CASE, COALESCE), preserve the original
// to allow short-circuit evaluation at execution time
Ok(Transformed::yes(expr))
}
},
Expand Down Expand Up @@ -4968,6 +4979,56 @@ mod tests {
);
}

#[test]
fn simplify_cast_literal() {
// Test that CAST(literal) expressions are evaluated at plan time

// CAST(123 AS Int64) should become 123i64
let expr = Expr::Cast(Cast::new(Box::new(lit(123i32)), DataType::Int64));
let expected = lit(123i64);
assert_eq!(simplify(expr), expected);

// CAST(1761630189642 AS Timestamp(Nanosecond, Some("+00:00")))
// Integer to timestamp cast
let expr = Expr::Cast(Cast::new(
Box::new(lit(1761630189642i64)),
DataType::Timestamp(
arrow::datatypes::TimeUnit::Nanosecond,
Some("+00:00".into()),
),
));
// Should evaluate to a timestamp literal
let result = simplify(expr);
match result {
Expr::Literal(ScalarValue::TimestampNanosecond(Some(val), tz), _) => {
assert_eq!(val, 1761630189642i64);
assert_eq!(tz.as_deref(), Some("+00:00"));
}
other => panic!("Expected TimestampNanosecond literal, got: {other:?}"),
}

// Test CAST of invalid string to timestamp - should return an error at plan time
// This represents the case from the issue: CAST(Utf8("1761630189642") AS Timestamp)
// "1761630189642" is NOT a valid timestamp string format
let expr = Expr::Cast(Cast::new(
Box::new(lit("1761630189642")),
DataType::Timestamp(
arrow::datatypes::TimeUnit::Nanosecond,
Some("+00:00".into()),
),
));

// The simplification should now fail with an error at plan time
let schema = test_schema();
let props = ExecutionProps::new();
let simplifier =
ExprSimplifier::new(SimplifyContext::new(&props).with_schema(schema));
let result = simplifier.simplify(expr);
assert!(result.is_err(), "Expected error for invalid cast");
let err_msg = result.unwrap_err().to_string();
assert_contains!(err_msg, "Error parsing timestamp");
}

fn if_not_null(expr: Expr, then: bool) -> Expr {
Expr::Case(Case {
expr: Some(expr.is_not_null().into()),
Expand Down
6 changes: 3 additions & 3 deletions datafusion/sqllogictest/test_files/arrow_typeof.slt
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ select arrow_cast(interval '30 minutes', 'Duration(Second)');
----
0 days 0 hours 30 mins 0 secs

query error DataFusion error: This feature is not implemented: Unsupported CAST from Utf8 to Duration\(s\)
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*This feature is not implemented: Unsupported CAST from Utf8 to Duration\(s\)
select arrow_cast('30 minutes', 'Duration(Second)');


Expand All @@ -337,7 +337,7 @@ select arrow_cast(timestamp '2000-01-01T00:00:00Z', 'Timestamp(Nanosecond, Some(
----
2000-01-01T00:00:00+08:00

statement error DataFusion error: Arrow error: Parser error: Invalid timezone "\+25:00": failed to parse timezone
statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Parser error: Invalid timezone "\+25:00": failed to parse timezone
select arrow_cast(timestamp '2000-01-01T00:00:00', 'Timestamp(Nanosecond, Some( "+25:00" ))');


Expand Down Expand Up @@ -406,7 +406,7 @@ select arrow_cast([1], 'FixedSizeList(1, Int64)');
----
[1]

query error DataFusion error: Arrow error: Cast error: Cannot cast to FixedSizeList\(4\): value at index 0 has length 3
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast to FixedSizeList\(4\): value at index 0 has length 3
select arrow_cast(make_array(1, 2, 3), 'FixedSizeList(4, Int64)');

query ?
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/cte.slt
Original file line number Diff line number Diff line change
Expand Up @@ -764,7 +764,7 @@ WITH RECURSIVE my_cte AS (

# Test issue: https://github.com/apache/datafusion/issues/9794
# Non-recursive term and recursive term have different types, and cannot be casted
query error DataFusion error: Arrow error: Cast error: Cannot cast string 'abc' to value of Int64 type
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'abc' to value of Int64 type
WITH RECURSIVE my_cte AS (
SELECT 1 AS a
UNION ALL
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/errors.slt
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ SELECT
LIMIT 5;


query error DataFusion error: Arrow error: Cast error: Cannot cast string 'foo' to value of Int64 type
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'foo' to value of Int64 type
create table foo as values (1), ('foo');

query error DataFusion error: Error during planning: Substring without for/from is not valid
Expand Down
10 changes: 5 additions & 5 deletions datafusion/sqllogictest/test_files/map.slt
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ SELECT MAKE_MAP('POST', 41, 'HEAD', 53, 'PATCH', 30);
----
{POST: 41, HEAD: 53, PATCH: 30}

query error DataFusion error: Arrow error: Cast error: Cannot cast string 'ab' to value of Int64 type
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'ab' to value of Int64 type
SELECT MAKE_MAP('POST', 41, 'HEAD', 'ab', 'PATCH', 30);

# Map keys can not be NULL
Expand Down Expand Up @@ -523,7 +523,7 @@ SELECT MAP { 'a': 1, 'b': 3 };
----
{a: 1, b: 3}

query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
SELECT MAP { 'a': 1, 2: 3 };

# accessing map with non-string key
Expand Down Expand Up @@ -670,7 +670,7 @@ SELECT map_entries(MAP { 'a': 1, 'b': 3 });
----
[{key: a, value: 1}, {key: b, value: 3}]

query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
SELECT map_entries(MAP { 'a': 1, 2: 3 });

query ?
Expand Down Expand Up @@ -721,7 +721,7 @@ SELECT map_keys(MAP { 'a': 1, 'b': 3 });
----
[a, b]

query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
SELECT map_keys(MAP { 'a': 1, 2: 3 });

query ?
Expand Down Expand Up @@ -768,7 +768,7 @@ NULL

# Tests for map_values

query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
SELECT map_values(MAP { 'a': 1, 2: 3 });

query ?
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/nullif.slt
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ select nullif(1.0, 2);
----
1

query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
select nullif(2, 'a');

query T
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/select.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1775,7 +1775,7 @@ DROP TABLE test;
query error DataFusion error: Arrow error: Parser error: Error parsing timestamp from 'I AM NOT A TIMESTAMP': error parsing date
SELECT to_timestamp('I AM NOT A TIMESTAMP');

query error DataFusion error: Arrow error: Cast error: Cannot cast string '' to value of Int32 type
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string '' to value of Int32 type
SELECT CAST('' AS int);

# See issue: https://github.com/apache/datafusion/issues/8978
Expand Down
8 changes: 4 additions & 4 deletions datafusion/sqllogictest/test_files/struct.slt
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ Struct("r": nullable Utf8, "c": nullable Float64)
statement ok
drop table t;

query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Float64 type
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Float64 type
create table t as values({r: 'a', c: 1}), ({c: 2.3, r: 'b'});

##################################
Expand Down Expand Up @@ -554,14 +554,14 @@ statement ok
drop table t;

# row() with incorrect order
statement error DataFusion error: Arrow error: Cast error: Cannot cast string 'blue' to value of Float32 type
create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as values
statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'blue' to value of Float32 type
create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as values
(row('red', 1), row(2.3, 'blue')),
(row('purple', 1), row('green', 2.3));

# out of order struct literal
# TODO: This query should not fail
statement error DataFusion error: Arrow error: Cast error: Cannot cast string 'b' to value of Int32 type
statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'b' to value of Int32 type
create table t(a struct(r varchar, c int)) as values ({r: 'a', c: 1}), ({c: 2, r: 'b'});

##################################
Expand Down
6 changes: 3 additions & 3 deletions datafusion/sqllogictest/test_files/timestamps.slt
Original file line number Diff line number Diff line change
Expand Up @@ -691,11 +691,11 @@ select
----
08:09:10.123456789 13:14:15.123456 13:14:15.123 13:14:15

query error DataFusion error: Arrow error: Cast error: Cannot cast string 'not a time' to value of Time64\(ns\) type
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'not a time' to value of Time64\(ns\) type
SELECT TIME 'not a time' as time;

# invalid time
query error DataFusion error: Arrow error: Cast error: Cannot cast string '24:01:02' to value of Time64\(ns\) type
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string '24:01:02' to value of Time64\(ns\) type
SELECT TIME '24:01:02' as time;

# invalid timezone
Expand Down Expand Up @@ -3271,7 +3271,7 @@ statement error The to_local_time function can only accept Timestamp as the arg
select to_local_time('2024-04-01T00:00:20Z');

# invalid timezone
statement error DataFusion error: Arrow error: Parser error: Invalid timezone "Europe/timezone": failed to parse timezone
statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Parser error: Invalid timezone "Europe/timezone": failed to parse timezone
select to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/timezone');

# valid query
Expand Down