Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions datafusion/expr-common/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1039,7 +1039,6 @@ pub fn binary_numeric_coercion(
pub fn decimal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;

// Prefer decimal data type over floating point for comparison operation
match (lhs_type, rhs_type) {
// Same decimal types
(lhs_type, rhs_type)
Expand All @@ -1059,7 +1058,19 @@ pub fn decimal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<Data
{
get_wider_decimal_type_cross_variant(lhs_type, rhs_type)
}
// Decimal + non-decimal types
// Decimal + floating point: floating point wins. Decimal cannot
// represent NaN or ±Inf, so a Decimal common type would fail to
// cast values that the Float side may legitimately hold. This
// matches PostgreSQL's numeric-vs-float resolution.
(
Float16 | Float32 | Float64,
Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
) => Some(lhs_type.clone()),
(
Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
Float16 | Float32 | Float64,
) => Some(rhs_type.clone()),
// Decimal + integer types: promote to Decimal.
(Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _), _) => {
get_common_decimal_type(lhs_type, rhs_type)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,14 @@ fn test_decimal_binary_comparison_coercion() -> Result<()> {
DataType::Decimal128(20, 8),
DataType::Null,
];
// Float types win over Decimal because Decimal cannot represent NaN/±Inf.
let result_types = [
DataType::Decimal128(20, 3),
DataType::Decimal128(20, 3),
DataType::Decimal128(20, 3),
DataType::Decimal128(23, 3),
DataType::Decimal128(24, 7),
DataType::Decimal128(32, 15),
DataType::Float32,
DataType::Float64,
DataType::Decimal128(38, 10),
DataType::Decimal128(25, 8),
DataType::Decimal128(20, 3),
Expand Down Expand Up @@ -460,11 +461,13 @@ fn test_type_coercion_compare() -> Result<()> {
Operator::Lt,
DataType::Decimal128(22, 2)
);
// Float wins over Decimal in comparison coercion: Decimal cannot
// represent NaN/±Inf, so the common super-type must be the float.
test_coercion_binary_rule!(
DataType::Float64,
DataType::Decimal128(10, 3),
Operator::Gt,
DataType::Decimal128(30, 15)
DataType::Float64
);
test_coercion_binary_rule!(
DataType::Int64,
Expand Down
19 changes: 19 additions & 0 deletions datafusion/sqllogictest/test_files/decimal.slt
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,25 @@ select arrow_typeof(null <= a), null <= a from (values (1.1::decimal)) as t(a);
----
Boolean NULL

# Float wins over Decimal in comparison coercion (issue #14272).
# Decimal cannot represent NaN/±Inf, so the common super-type must be the
# float — otherwise comparing a Decimal against +Inf would error trying to
# cast +Inf into the Decimal range.
query B
select '1'::decimal(10,0) = arrow_cast('inf', 'Float64');
----
false

query B
select '1'::decimal(10,0) < arrow_cast('inf', 'Float64');
----
true

query B
select arrow_cast('NaN', 'Float64') = '1'::decimal(10,0);
----
false

query R
select try_cast(1234567 as decimal(7,3));
----
Expand Down
18 changes: 10 additions & 8 deletions datafusion/sqllogictest/test_files/operator.slt
Original file line number Diff line number Diff line change
Expand Up @@ -299,22 +299,23 @@ physical_plan
01)FilterExec: int64@3 < -5 AND CAST(uint64@7 AS Decimal128(20, 0)) < Some(-5),20,0 AND float64@9 < -5 AND decimal@10 < Some(-500),5,2
02)--DataSourceExec: partitions=1, partition_sizes=[1]

## < decimal (expect casts for integers to float)
## < decimal (expect casts for integers and decimal to float, since Float wins
## over Decimal in comparison coercion — Decimal cannot hold NaN/±Inf)
query TT
EXPLAIN SELECT * FROM numeric_types
WHERE int64 < 5.1 AND uint64 < 5.1 AND float64 < 5.1 AND decimal < 5.1;
----
physical_plan
01)FilterExec: CAST(int64@3 AS Float64) < 5.1 AND CAST(uint64@7 AS Float64) < 5.1 AND float64@9 < 5.1 AND decimal@10 < Some(510),5,2
01)FilterExec: CAST(int64@3 AS Float64) < 5.1 AND CAST(uint64@7 AS Float64) < 5.1 AND float64@9 < 5.1 AND CAST(decimal@10 AS Float64) < 5.1
02)--DataSourceExec: partitions=1, partition_sizes=[1]

## < negative decimal (expect casts for integers to float)
## < negative decimal (same Float-wins behavior as above)
query TT
EXPLAIN SELECT * FROM numeric_types
WHERE int64 < -5.1 AND uint64 < -5.1 AND float64 < -5.1 AND decimal < -5.1;
----
physical_plan
01)FilterExec: CAST(int64@3 AS Float64) < -5.1 AND CAST(uint64@7 AS Float64) < -5.1 AND float64@9 < -5.1 AND decimal@10 < Some(-510),5,2
01)FilterExec: CAST(int64@3 AS Float64) < -5.1 AND CAST(uint64@7 AS Float64) < -5.1 AND float64@9 < -5.1 AND CAST(decimal@10 AS Float64) < -5.1
02)--DataSourceExec: partitions=1, partition_sizes=[1]


Expand All @@ -338,22 +339,23 @@ physical_plan
01)FilterExec: int64@3 = -5 AND CAST(uint64@7 AS Decimal128(20, 0)) = Some(-5),20,0 AND float64@9 = -5 AND decimal@10 = Some(-500),5,2
02)--DataSourceExec: partitions=1, partition_sizes=[1]

## = decimal (expect casts for integers to float)
## = decimal (expect casts for integers and decimal to float, since Float wins
## over Decimal in comparison coercion — Decimal cannot hold NaN/±Inf)
query TT
EXPLAIN SELECT * FROM numeric_types
WHERE int64 = 5.1 AND uint64 = 5.1 AND float64 = 5.1 AND decimal = 5.1;
----
physical_plan
01)FilterExec: CAST(int64@3 AS Float64) = 5.1 AND CAST(uint64@7 AS Float64) = 5.1 AND float64@9 = 5.1 AND decimal@10 = Some(510),5,2
01)FilterExec: CAST(int64@3 AS Float64) = 5.1 AND CAST(uint64@7 AS Float64) = 5.1 AND float64@9 = 5.1 AND CAST(decimal@10 AS Float64) = 5.1
02)--DataSourceExec: partitions=1, partition_sizes=[1]

## = negative decimal (expect casts for integers to float)
## = negative decimal (same Float-wins behavior as above)
query TT
EXPLAIN SELECT * FROM numeric_types
WHERE int64 = -5.1 AND uint64 = -5.1 AND float64 = -5.1 AND decimal = -5.1;
----
physical_plan
01)FilterExec: CAST(int64@3 AS Float64) = -5.1 AND CAST(uint64@7 AS Float64) = -5.1 AND float64@9 = -5.1 AND decimal@10 = Some(-510),5,2
01)FilterExec: CAST(int64@3 AS Float64) = -5.1 AND CAST(uint64@7 AS Float64) = -5.1 AND float64@9 = -5.1 AND CAST(decimal@10 AS Float64) = -5.1
02)--DataSourceExec: partitions=1, partition_sizes=[1]


Expand Down
66 changes: 66 additions & 0 deletions docs/source/library-user-guide/upgrading/55.0.0.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
<!---
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

# Upgrade Guides

## DataFusion 55.0.0

**Note:** DataFusion `55.0.0` has not been released yet. The information provided
in this section pertains to features and changes that have already been merged
to the main branch and are awaiting release in this version.

### Decimal / floating-point coercion now picks the floating-point type

Previously, any context that needed a common type for a `Decimal` and a
floating-point value (`Float16`, `Float32`, or `Float64`) chose the decimal
type. This produced errors for legitimate floating-point inputs that have
no decimal representation:

```sql
-- Before: errored with "Cast error: Cannot cast to Decimal128(...). Overflowing on inf"
SELECT '1'::decimal(10,0) = arrow_cast('inf', 'Float64');
```

DataFusion now coerces the decimal side to the floating-point type instead.
Decimal types cannot represent `NaN`, `±Infinity`, or values outside their
precision/scale range, so the float is the only choice that is always
representable. This also matches the behavior of PostgreSQL, DuckDB, and the
existing rule for arithmetic operators in DataFusion.

**Migration guide:**

Most queries become more correct with no source change required — previously
errored queries (against `NaN`, `±Inf`, or out-of-range values) now succeed,
and previously-coerced expressions that went through `Decimal128(30, 15)`
are now done in the natural float type.

The one behavior to be aware of: a decimal value with more than ~15–17
significant digits will lose precision when cast to `Float64`. If your query
needs to preserve full decimal precision, cast the float operand to the
decimal type explicitly, or use a decimal literal instead of a float-typed
value:

```sql
-- If high-precision decimal comparison matters, force the float into the
-- decimal domain explicitly:
SELECT * FROM t WHERE big_decimal_col > CAST(my_float AS DECIMAL(38, 10));
```

See [#14272](https://github.com/apache/datafusion/issues/14272) for the
original report and discussion.
1 change: 1 addition & 0 deletions docs/source/library-user-guide/upgrading/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Upgrade Guides
.. toctree::
:maxdepth: 1

DataFusion 55.0.0 <55.0.0>
DataFusion 54.0.0 <54.0.0>
DataFusion 53.0.0 <53.0.0>
DataFusion 52.0.0 <52.0.0>
Expand Down
Loading