From fbdf959e05ecfc65e983724878c9bab6ab84f975 Mon Sep 17 00:00:00 2001 From: morrySnow Date: Wed, 13 May 2026 16:05:04 +0800 Subject: [PATCH] [fix](fe) Fix deep nested complex type subtype validation bypass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What problem does this PR solve? Issue Number: close #DORIS-25584 Problem Summary: In `DataType.validateCatalogDataType()`, all three complex-type branches (ARRAY, MAP, STRUCT) only called `validateNestedType` when the direct child type was `instanceof ScalarType`. When the child was itself a complex type (e.g. a MAP inside an ARRAY), the guard failed and the entire subtree was skipped — so BITMAP/HLL/JSONB/VARIANT used as elements at depth 3+ were silently accepted. Example: `ARRAY>` - ARRAY branch checks itemType (MAP) → not ScalarType → SKIP - Inner MAP is never validated → BITMAP as map key accepted silently Fix: remove the `instanceof ScalarType` guard; call `validateNestedType(parent, child)` for all child types regardless of whether they are scalar or complex. Also move the STRUCT duplicate field name check outside the former ScalarType guard so it applies to all field types. ### Release note BITMAP, HLL, JSONB, and VARIANT are now correctly rejected as ARRAY/MAP/STRUCT sub-elements even when nested 3 or more levels deep. ### Check List (For Author) - Test: Regression test / Unit Test - Behavior changed: Yes — depth-3+ nesting with BITMAP/HLL/JSONB/VARIANT now throws AnalysisException instead of being silently accepted - Does this need documentation: No Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../apache/doris/nereids/types/DataType.java | 23 ++---- .../doris/nereids/types/DataTypeTest.java | 54 +++++++++++++ .../test_complex_disallowed_subtypes.groovy | 77 +++++++++++++++++++ 3 files changed, 138 insertions(+), 16 deletions(-) create mode 100644 regression-test/suites/datatype_p0/complex_types/test_complex_disallowed_subtypes.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java index 61041674db29bd..6bd830b7dffc21 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java @@ -898,22 +898,15 @@ private static void validateCatalogDataType(Type catalogType) { if (catalogType.isScalarType()) { validateScalarType((ScalarType) catalogType); } else if (catalogType.isComplexType()) { - // now we not support array / map / struct nesting complex type if (catalogType.isArrayType()) { Type itemType = ((org.apache.doris.catalog.ArrayType) catalogType).getItemType(); - if (itemType instanceof ScalarType) { - validateNestedType(catalogType, (ScalarType) itemType); - } + validateNestedType(catalogType, itemType); } if (catalogType.isMapType()) { org.apache.doris.catalog.MapType mt = (org.apache.doris.catalog.MapType) catalogType; - if (mt.getKeyType() instanceof ScalarType) { - validateNestedType(catalogType, (ScalarType) mt.getKeyType()); - } - if (mt.getValueType() instanceof ScalarType) { - validateNestedType(catalogType, (ScalarType) mt.getValueType()); - } + validateNestedType(catalogType, mt.getKeyType()); + validateNestedType(catalogType, mt.getValueType()); } if (catalogType.isStructType()) { ArrayList fields = @@ -921,12 +914,10 @@ private static void validateCatalogDataType(Type catalogType) { Set fieldNames = new HashSet<>(); for (org.apache.doris.catalog.StructField field : fields) { Type fieldType = field.getType(); - if (fieldType instanceof ScalarType) { - validateNestedType(catalogType, (ScalarType) fieldType); - if (!fieldNames.add(field.getName())) { - throw new AnalysisException("Duplicate field name " + field.getName() - + " in struct " + catalogType.toSql()); - } + validateNestedType(catalogType, fieldType); + if (!fieldNames.add(field.getName())) { + throw new AnalysisException("Duplicate field name " + field.getName() + + " in struct " + catalogType.toSql()); } } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/types/DataTypeTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/types/DataTypeTest.java index 3f5a4a4ca0f244..59509fce805848 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/types/DataTypeTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/types/DataTypeTest.java @@ -18,6 +18,7 @@ package org.apache.doris.nereids.types; import org.apache.doris.catalog.Type; +import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.types.coercion.AnyDataType; import org.apache.doris.nereids.types.coercion.FractionalType; import org.apache.doris.nereids.types.coercion.IntegralType; @@ -600,4 +601,57 @@ public void testDefaultConcreteType() { Assertions.assertEquals(DateType.INSTANCE, DateType.INSTANCE.defaultConcreteType()); Assertions.assertEquals(DateTimeType.INSTANCE, DateTimeType.INSTANCE.defaultConcreteType()); } + + // DORIS-25584: special aggregate/semi-structured types must be rejected as complex-type elements + // at any nesting depth, not only at depth 2. + + @Test + public void testArrayMapBitmapKeyRejected() { + // ARRAY> — BITMAP as map key must be caught at depth 3 + DataType type = ArrayType.of(MapType.of(BitmapType.INSTANCE, IntegerType.INSTANCE)); + Assertions.assertThrows(AnalysisException.class, type::validateDataType); + } + + @Test + public void testArrayMapBitmapValueRejected() { + // ARRAY> — BITMAP as map value must be caught at depth 3 + DataType type = ArrayType.of(MapType.of(VarcharType.SYSTEM_DEFAULT, BitmapType.INSTANCE)); + Assertions.assertThrows(AnalysisException.class, type::validateDataType); + } + + @Test + public void testStructArrayHllRejected() { + // STRUCT> — HLL inside array inside struct must be caught + DataType type = new StructType( + ImmutableList.of(new StructField("a", ArrayType.of(HllType.INSTANCE), true, ""))); + Assertions.assertThrows(AnalysisException.class, type::validateDataType); + } + + @Test + public void testMapArrayJsonbRejected() { + // MAP> — JSONB inside array as map value must be caught + DataType type = MapType.of(VarcharType.SYSTEM_DEFAULT, ArrayType.of(JsonType.INSTANCE)); + Assertions.assertThrows(AnalysisException.class, type::validateDataType); + } + + @Test + public void testArrayBitmapDirectlyRejected() { + // ARRAY at depth 2 — must still be caught (regression guard) + DataType type = ArrayType.of(BitmapType.INSTANCE); + Assertions.assertThrows(AnalysisException.class, type::validateDataType); + } + + @Test + public void testMapHllValueDirectlyRejected() { + // MAP at depth 2 — must still be caught (regression guard) + DataType type = MapType.of(IntegerType.INSTANCE, HllType.INSTANCE); + Assertions.assertThrows(AnalysisException.class, type::validateDataType); + } + + @Test + public void testDeepValidComplexNestingAccepted() { + // ARRAY> — valid 3-level nesting must still be accepted + DataType type = ArrayType.of(MapType.of(VarcharType.SYSTEM_DEFAULT, IntegerType.INSTANCE)); + Assertions.assertDoesNotThrow(type::validateDataType); + } } diff --git a/regression-test/suites/datatype_p0/complex_types/test_complex_disallowed_subtypes.groovy b/regression-test/suites/datatype_p0/complex_types/test_complex_disallowed_subtypes.groovy new file mode 100644 index 00000000000000..36d261e9d0a42c --- /dev/null +++ b/regression-test/suites/datatype_p0/complex_types/test_complex_disallowed_subtypes.groovy @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// DORIS-25584: BITMAP/HLL/JSONB must be rejected as complex-type elements at any nesting depth. +// Previously depth-3 (and deeper) nesting was silently accepted; now all depths are validated. + +suite("test_complex_disallowed_subtypes") { + // ---- setup ---- + sql "DROP TABLE IF EXISTS t_complex_disallowed_subtypes_1" + sql "DROP TABLE IF EXISTS t_complex_disallowed_subtypes_2" + sql "DROP TABLE IF EXISTS t_complex_disallowed_subtypes_3" + sql "DROP TABLE IF EXISTS t_complex_disallowed_subtypes_4" + sql "DROP TABLE IF EXISTS t_complex_valid" + + // ---- depth-2: must still be rejected (regression guard) ---- + + test { + sql "CREATE TABLE t_complex_disallowed_subtypes_1 (k INT, v ARRAY) DUPLICATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 1 PROPERTIES('replication_num'='1')" + exception "unsupported sub-type" + } + + test { + sql "CREATE TABLE t_complex_disallowed_subtypes_2 (k INT, v MAP) DUPLICATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 1 PROPERTIES('replication_num'='1')" + exception "unsupported sub-type" + } + + // ---- depth-3: were silently accepted before the fix, must now be rejected ---- + + test { + // ARRAY> + sql "CREATE TABLE t_complex_disallowed_subtypes_1 (k INT, v ARRAY>) DUPLICATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 1 PROPERTIES('replication_num'='1')" + exception "unsupported sub-type" + } + + test { + // ARRAY> + sql "CREATE TABLE t_complex_disallowed_subtypes_2 (k INT, v ARRAY>) DUPLICATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 1 PROPERTIES('replication_num'='1')" + exception "unsupported sub-type" + } + + test { + // STRUCT> + sql "CREATE TABLE t_complex_disallowed_subtypes_3 (k INT, v STRUCT>) DUPLICATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 1 PROPERTIES('replication_num'='1')" + exception "unsupported sub-type" + } + + test { + // MAP> + sql "CREATE TABLE t_complex_disallowed_subtypes_4 (k INT, v MAP>) DUPLICATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 1 PROPERTIES('replication_num'='1')" + exception "unsupported sub-type" + } + + // ---- valid deep nesting must still be accepted ---- + + sql """ + CREATE TABLE t_complex_valid (k INT, v ARRAY>) + DUPLICATE KEY(k) + DISTRIBUTED BY HASH(k) BUCKETS 1 + PROPERTIES('replication_num'='1') + """ + + sql "DROP TABLE IF EXISTS t_complex_valid" +}