From 086d948fd9bdf17835f3289f92d3f4e467364755 Mon Sep 17 00:00:00 2001 From: starocean999 Date: Wed, 20 May 2026 10:42:22 +0800 Subject: [PATCH] [fix](fe) Preserve narrowing datetimev2 casts in simplify in predicate (#63343) ### What problem does this PR solve? SimplifyInPredicate could incorrectly remove narrowing DATETIMEV2 casts inside IN predicates. For expressions such as `CAST(datetimev2(6) AS DATETIMEV2(3)) IN (...)`, the rewrite could turn the predicate into a direct comparison on the original DATETIMEV2(6) column, which is not semantics-preserving because the cast uses precision reduction and rounding. The rule also used a binary-based microsecond alignment check instead of a decimal scale check, which could incorrectly treat some literals as losslessly convertible. This change restricts the rewrite to non-narrowing DATETIMEV2 casts and fixes the literal alignment check to use decimal scale factors. It also adds FE unit tests and a regression case for the reported DATETIMEV2 scenario. --- .../expression/rules/SimplifyInPredicate.java | 11 ++-- .../expression/SimplifyInPredicateTest.java | 62 +++++++++++++++---- .../test_simplify_in_predicate.out | 7 +++ .../test_simplify_in_predicate.groovy | 28 +++++++++ 4 files changed, 93 insertions(+), 15 deletions(-) create mode 100644 regression-test/data/nereids_syntax_p0/test_simplify_in_predicate.out diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyInPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyInPredicate.java index 70a964929e22d2..b66db8e66f0d03 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyInPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyInPredicate.java @@ -63,10 +63,12 @@ && canLosslessConvertToDateV2Literal((DateTimeV2Literal) literal))) { } else if (cast.child().getDataType().isDateTimeV2Type() && expr.child(1) instanceof DateTimeV2Literal) { List literals = expr.children().subList(1, expr.children().size()); + DateTimeV2Type castType = (DateTimeV2Type) cast.getDataType(); DateTimeV2Type compareType = (DateTimeV2Type) cast.child().getDataType(); - if (literals.stream().allMatch(literal -> literal instanceof DateTimeV2Literal - && canLosslessConvertToLowScaleLiteral( - (DateTimeV2Literal) literal, compareType.getScale()))) { + if (castType.getScale() >= compareType.getScale() + && literals.stream().allMatch(literal -> literal instanceof DateTimeV2Literal + && canLosslessConvertToLowScaleLiteral( + (DateTimeV2Literal) literal, compareType.getScale()))) { ImmutableList.Builder children = ImmutableList.builder(); children.add(cast.child()); literals.forEach(l -> children.add(new DateTimeV2Literal(compareType, @@ -99,6 +101,7 @@ private static DateV2Literal convertToDateV2Literal(DateTimeV2Literal literal) { } private static boolean canLosslessConvertToLowScaleLiteral(DateTimeV2Literal literal, int targetScale) { - return literal.getMicroSecond() % (1L << (DateTimeV2Type.MAX_SCALE - targetScale)) == 0; + long scaleFactor = (long) Math.pow(10, DateTimeV2Type.MAX_SCALE - targetScale); + return literal.getMicroSecond() % scaleFactor == 0; } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/SimplifyInPredicateTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/SimplifyInPredicateTest.java index 09fc7346f56659..90a4aed8d07929 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/SimplifyInPredicateTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/SimplifyInPredicateTest.java @@ -31,14 +31,22 @@ public class SimplifyInPredicateTest extends ExpressionRewriteTestHelper { + private ExpressionRuleExecutor newRewriteExecutor() { + return new ExpressionRuleExecutor(ImmutableList.of( + bottomUp( + FoldConstantRule.INSTANCE, + SimplifyInPredicate.INSTANCE))); + } + + private ExpressionRuleExecutor newFoldExecutor() { + return new ExpressionRuleExecutor(ImmutableList.of( + bottomUp( + FoldConstantRule.INSTANCE))); + } + @Test public void test() { - executor = new ExpressionRuleExecutor(ImmutableList.of( - bottomUp( - FoldConstantRule.INSTANCE, - SimplifyInPredicate.INSTANCE - ) - )); + executor = newRewriteExecutor(); Map mem = Maps.newHashMap(); Expression rewrittenExpression = PARSER.parseExpression("cast(CA as DATETIME) in ('1992-01-31 00:00:00', '1992-02-01 00:00:00')"); // after parse and type coercion: CAST(CAST(CA AS DATETIMEV2(0)) AS DATETIMEV2(6)) IN ('1992-01-31 00:00:00.000000', '1992-02-01 00:00:00.000000') @@ -49,12 +57,44 @@ public void test() { rewrittenExpression = executor.rewrite(rewrittenExpression, context); Expression expectedExpression = PARSER.parseExpression("CA in (cast('1992-01-31' as date), cast('1992-02-01' as date))"); expectedExpression = replaceUnboundSlot(expectedExpression, mem); - executor = new ExpressionRuleExecutor(ImmutableList.of( - bottomUp( - FoldConstantRule.INSTANCE - ) - )); + executor = newFoldExecutor(); expectedExpression = executor.rewrite(expectedExpression, context); Assertions.assertEquals(expectedExpression, rewrittenExpression); } + + @Test + public void testDoNotEliminateNarrowingDateTimeV2Cast() { + ExpressionRuleExecutor rewriteExecutor = newRewriteExecutor(); + ExpressionRuleExecutor foldExecutor = newFoldExecutor(); + Map mem = Maps.newHashMap(); + Expression rewrittenExpression = PARSER.parseExpression("cast(AA as DATETIMEV2(3)) in " + + "('2024-01-01 12:34:56.123000', '2024-01-01 09:30:01.000000', '2024-01-01 22:00:00.000000')"); + rewrittenExpression = typeCoercion(replaceUnboundSlot(rewrittenExpression, mem)); + rewrittenExpression = rewriteExecutor.rewrite(rewrittenExpression, context); + Expression rewrittenAgain = rewriteExecutor.rewrite(rewrittenExpression, context); + + Expression expectedExpression = PARSER.parseExpression("cast(AA as DATETIMEV2(3)) in " + + "(cast('2024-01-01 12:34:56.123' as DATETIMEV2(3)), " + + "cast('2024-01-01 09:30:01.000' as DATETIMEV2(3)), " + + "cast('2024-01-01 22:00:00.000' as DATETIMEV2(3)))"); + expectedExpression = replaceUnboundSlot(expectedExpression, mem); + expectedExpression = foldExecutor.rewrite(expectedExpression, context); + + Assertions.assertEquals(expectedExpression, rewrittenExpression); + Assertions.assertEquals(rewrittenExpression, rewrittenAgain); + } + + @Test + public void testDateTimeV2LiteralMustAlignWithTargetScale() { + ExpressionRuleExecutor rewriteExecutor = newRewriteExecutor(); + Map mem = Maps.newHashMap(); + Expression rewrittenExpression = PARSER.parseExpression("cast(cast(AA as DATETIMEV2(3)) as DATETIMEV2(6)) " + + "in ('2024-01-01 12:34:56.123128')"); + rewrittenExpression = typeCoercion(replaceUnboundSlot(rewrittenExpression, mem)); + Expression originalExpression = rewrittenExpression; + + rewrittenExpression = rewriteExecutor.rewrite(rewrittenExpression, context); + + Assertions.assertEquals(originalExpression, rewrittenExpression); + } } diff --git a/regression-test/data/nereids_syntax_p0/test_simplify_in_predicate.out b/regression-test/data/nereids_syntax_p0/test_simplify_in_predicate.out new file mode 100644 index 00000000000000..17aa212b13454f --- /dev/null +++ b/regression-test/data/nereids_syntax_p0/test_simplify_in_predicate.out @@ -0,0 +1,7 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !datetimev2_narrow_cast -- +1 +2 +3 +4 + diff --git a/regression-test/suites/nereids_syntax_p0/test_simplify_in_predicate.groovy b/regression-test/suites/nereids_syntax_p0/test_simplify_in_predicate.groovy index 0079d5a2bdeaa3..f7b5cbd55b1b35 100644 --- a/regression-test/suites/nereids_syntax_p0/test_simplify_in_predicate.groovy +++ b/regression-test/suites/nereids_syntax_p0/test_simplify_in_predicate.groovy @@ -35,4 +35,32 @@ suite("test_simplify_in_predicate") { sql "verbose select * from test_simplify_in_predicate_t where a in ('1992-01-31', '1992-02-01', '1992-02-02', '1992-02-03', '1992-02-04');" notContains "CAST" } + + sql 'drop table if exists test_simplify_in_predicate_datetimev2_t' + sql """CREATE TABLE IF NOT EXISTS `test_simplify_in_predicate_datetimev2_t` ( + id INT NOT NULL, + ts6 DATETIMEV2(6) NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + );""" + sql """INSERT INTO test_simplify_in_predicate_datetimev2_t VALUES + (1, '2024-01-01 12:34:56.123456'), + (2, '2024-01-01 09:30:00.999999'), + (3, '2024-01-01 09:30:01.000000'), + (4, '2024-01-01 22:00:00.000001');""" + + order_qt_datetimev2_narrow_cast """ + SELECT id + FROM test_simplify_in_predicate_datetimev2_t + WHERE CAST(ts6 AS DATETIMEV2(3)) IN ( + CAST('2024-01-01 12:34:56.123000' AS DATETIMEV2(6)), + CAST('2024-01-01 09:30:01.000000' AS DATETIMEV2(6)), + CAST('2024-01-01 22:00:00.000000' AS DATETIMEV2(6)) + ) + ORDER BY 1 + """ + } \ No newline at end of file