From f1db522bbd4c81e627eb1dc4efec4330d18396bf Mon Sep 17 00:00:00 2001 From: linzhenqi Date: Thu, 28 May 2026 19:41:50 +0800 Subject: [PATCH] [Enhancement](udf) Reject bitmap, hll, and quantile_state in udf create --- .../plans/commands/CreateFunctionCommand.java | 33 ++++ .../doris/catalog/CreateFunctionTest.java | 35 ++++ ...test_pythonudaf_object_types_inline.groovy | 184 ++++++++++++++++++ .../test_pythonudf_object_types_inline.groovy | 105 ++++++++++ ...test_pythonudtf_object_types_inline.groovy | 105 ++++++++++ 5 files changed, 462 insertions(+) create mode 100644 regression-test/suites/pythonudaf_p0/test_pythonudaf_object_types_inline.groovy create mode 100644 regression-test/suites/pythonudf_p0/test_pythonudf_object_types_inline.groovy create mode 100644 regression-test/suites/pythonudtf_p0/test_pythonudtf_object_types_inline.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateFunctionCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateFunctionCommand.java index bc5edcbb59ba52..4a367bb8079a80 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateFunctionCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateFunctionCommand.java @@ -37,6 +37,7 @@ import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ScalarFunction; import org.apache.doris.catalog.ScalarType; +import org.apache.doris.catalog.StructField; import org.apache.doris.catalog.StructType; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; @@ -346,6 +347,7 @@ private void analyzeCommon(ConnectContext ctx) throws AnalysisException { } if (binaryType == Function.BinaryType.JAVA_UDF) { FunctionUtil.checkEnableJavaUdf(); + checkUdfSupportedTypes(); if (!isAggregate && !isTableFunction) { volatility = analyzeVolatility(); } @@ -363,6 +365,7 @@ private void analyzeCommon(ConnectContext ctx) throws AnalysisException { extractExpirationTime(); } else if (binaryType == Function.BinaryType.PYTHON_UDF) { FunctionUtil.checkEnablePythonUdf(); + checkUdfSupportedTypes(); if (!isAggregate && !isTableFunction) { volatility = analyzeVolatility(); } @@ -418,6 +421,36 @@ private static boolean validatePythonRuntimeVersion(String runtimeVersionString) return runtimeVersionString != null && PYTHON_VERSION_PATTERN.matcher(runtimeVersionString).matches(); } + private void checkUdfSupportedTypes() throws AnalysisException { + Type[] argTypes = argsDef.getArgTypes(); + for (int i = 0; i < argTypes.length; i++) { + checkUdfSupportedType(argTypes[i], "argument " + (i + 1)); + } + checkUdfSupportedType(returnType.toCatalogDataType(), "return"); + if (intermediateType != null) { + checkUdfSupportedType(intermediateType.toCatalogDataType(), "intermediate"); + } + } + + private void checkUdfSupportedType(Type type, String typePosition) throws AnalysisException { + // Reject bitmap/hll/quantile_state type + if (type.isObjectStored()) { + throw new AnalysisException(String.format( + "%s does not support %s type %s", binaryType, typePosition, type.toSql())); + } + + if (type.isArrayType()) { + checkUdfSupportedType(((ArrayType) type).getItemType(), typePosition + " element"); + } else if (type.isMapType()) { + checkUdfSupportedType(((MapType) type).getKeyType(), typePosition + " key"); + checkUdfSupportedType(((MapType) type).getValueType(), typePosition + " value"); + } else if (type.isStructType()) { + for (StructField field : ((StructType) type).getFields()) { + checkUdfSupportedType(field.getType(), typePosition + " field " + field.getName()); + } + } + } + private Boolean parseBooleanFromProperties(String propertyString) throws AnalysisException { String valueOfString = properties.get(propertyString); if (valueOfString == null) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java index 426a45074b85c3..e6741b9e54ce8a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java @@ -130,6 +130,35 @@ public void test() throws Exception { Assert.assertEquals(FunctionVolatility.VOLATILE, findFunction(db, "py_default").getVolatility()); } + @Test + public void testCreatePythonFunctionRejectsObjectTypes() throws Exception { + ConnectContext ctx = UtFrameUtils.createDefaultCtx(); + createDatabase(ctx, "create database py_obj_type_db;"); + dorisAssert = new DorisAssert(ctx); + dorisAssert.useDatabase("py_obj_type_db"); + + assertCreateFunctionAnalysisException(ctx, "create function py_obj_type_db.py_bitmap_arg(bitmap) returns int " + + "properties('type'='PYTHON_UDF', 'symbol'='evaluate', 'runtime_version'='3.10.2');", + "PYTHON_UDF does not support argument 1 type bitmap"); + assertCreateFunctionAnalysisException(ctx, "create function py_obj_type_db.j_bitmap_arg(bitmap) returns int " + + "properties('type'='JAVA_UDF', 'symbol'='evaluate');", + "JAVA_UDF does not support argument 1 type bitmap"); + assertCreateFunctionAnalysisException(ctx, "create function py_obj_type_db.py_hll_ret(int) returns hll " + + "properties('type'='PYTHON_UDF', 'symbol'='evaluate', 'runtime_version'='3.10.2');", + "PYTHON_UDF does not support return type hll"); + assertCreateFunctionAnalysisException(ctx, "create aggregate function py_obj_type_db.py_quantile_arg" + + "(quantile_state) returns int properties('type'='PYTHON_UDF', 'symbol'='Agg', " + + "'runtime_version'='3.10.2');", + "PYTHON_UDF does not support argument 1 type quantile_state"); + assertCreateFunctionAnalysisException(ctx, "create aggregate function py_obj_type_db.j_quantile_arg" + + "(quantile_state) returns int properties('type'='JAVA_UDF', 'symbol'='Agg');", + "JAVA_UDF does not support argument 1 type quantile_state"); + assertCreateFunctionAnalysisException(ctx, "create tables function py_obj_type_db.py_bitmap_table(int) " + + "returns array properties('type'='PYTHON_UDF', 'symbol'='evaluate', " + + "'runtime_version'='3.10.2');", + "ARRAY unsupported sub-type: bitmap"); + } + @Test public void testCreateGlobalFunction() throws Exception { ConnectContext ctx = UtFrameUtils.createDefaultCtx(); @@ -215,6 +244,12 @@ private void createFunction(String sql, ConnectContext connectContext) throws Ex } } + private void assertCreateFunctionAnalysisException(ConnectContext ctx, String sql, String message) { + Exception exception = Assert.assertThrows(Exception.class, () -> createFunction(sql, ctx)); + Assert.assertTrue("Expected error to contain: " + message + ", actual: " + exception.getMessage(), + exception.getMessage().contains(message)); + } + private boolean containsIgnoreCase(String str, String sub) { return str.toLowerCase().contains(sub.toLowerCase()); } diff --git a/regression-test/suites/pythonudaf_p0/test_pythonudaf_object_types_inline.groovy b/regression-test/suites/pythonudaf_p0/test_pythonudaf_object_types_inline.groovy new file mode 100644 index 00000000000000..d37f5d33074a4a --- /dev/null +++ b/regression-test/suites/pythonudaf_p0/test_pythonudaf_object_types_inline.groovy @@ -0,0 +1,184 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_pythonudaf_object_types_inline") { + def runtime_version = getPythonUdfRuntimeVersion() + + test { + sql """ + CREATE AGGREGATE FUNCTION py_obj_udaf_bitmap_arg(bitmap) + RETURNS BIGINT + PROPERTIES ( + "type" = "PYTHON_UDF", + "symbol" = "Agg", + "runtime_version" = "${runtime_version}" + ) + AS \$\$ +class Agg: + def __init__(self): + self.sum = 0 + def accumulate(self, v): + pass + def merge(self, other): + pass + def finish(self): + return self.sum + @property + def aggregate_state(self): + return self.sum +\$\$; + """ + exception "does not support argument 1 type bitmap" + } + + test { + sql """ + CREATE AGGREGATE FUNCTION py_obj_udaf_hll_ret(int) + RETURNS HLL + PROPERTIES ( + "type" = "PYTHON_UDF", + "symbol" = "Agg", + "runtime_version" = "${runtime_version}" + ) + AS \$\$ +class Agg: + def __init__(self): + self.state = None + def accumulate(self, v): + pass + def merge(self, other): + pass + def finish(self): + return self.state + @property + def aggregate_state(self): + return self.state +\$\$; + """ + exception "does not support return type hll" + } + + test { + sql """ + CREATE AGGREGATE FUNCTION py_obj_udaf_quantile_state(quantile_state) + RETURNS BIGINT + INTERMEDIATE BIGINT + PROPERTIES ( + "type" = "PYTHON_UDF", + "symbol" = "Agg", + "runtime_version" = "${runtime_version}" + ) + AS \$\$ +class Agg: + def __init__(self): + self.state = 0 + def accumulate(self, v): + pass + def merge(self, other): + pass + def finish(self): + return self.state + @property + def aggregate_state(self): + return self.state +\$\$; + """ + exception "does not support argument 1 type quantile_state" + } + + test { + sql """ + CREATE AGGREGATE FUNCTION py_obj_udaf_bitmap_intermediate(int) + RETURNS BIGINT + INTERMEDIATE BITMAP + PROPERTIES ( + "type" = "PYTHON_UDF", + "symbol" = "Agg", + "runtime_version" = "${runtime_version}" + ) + AS \$\$ +class Agg: + def __init__(self): + self.state = 0 + def accumulate(self, v): + pass + def merge(self, other): + pass + def finish(self): + return self.state + @property + def aggregate_state(self): + return self.state +\$\$; + """ + exception "does not support intermediate type bitmap" + } + + test { + sql """ + CREATE AGGREGATE FUNCTION py_obj_udaf_array_bitmap(int) + RETURNS ARRAY + PROPERTIES ( + "type" = "PYTHON_UDF", + "symbol" = "Agg", + "runtime_version" = "${runtime_version}" + ) + AS \$\$ +class Agg: + def __init__(self): + self.state = None + def accumulate(self, v): + pass + def merge(self, other): + pass + def finish(self): + return self.state + @property + def aggregate_state(self): + return self.state +\$\$; + """ + exception "ARRAY unsupported sub-type: bitmap" + } + + test { + sql """ + CREATE AGGREGATE FUNCTION py_obj_udaf_struct_bitmap(int) + RETURNS STRUCT>> + PROPERTIES ( + "type" = "PYTHON_UDF", + "symbol" = "Agg", + "runtime_version" = "${runtime_version}" + ) + AS \$\$ +class Agg: + def __init__(self): + self.state = None + def accumulate(self, v): + pass + def merge(self, other): + pass + def finish(self): + return self.state + @property + def aggregate_state(self): + return self.state +\$\$; + """ + exception "ARRAY unsupported sub-type: hll" + } +} diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_object_types_inline.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_object_types_inline.groovy new file mode 100644 index 00000000000000..b141e6d503ea26 --- /dev/null +++ b/regression-test/suites/pythonudf_p0/test_pythonudf_object_types_inline.groovy @@ -0,0 +1,105 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_pythonudf_object_types_inline") { + def runtime_version = getPythonUdfRuntimeVersion() + + test { + sql """ + CREATE FUNCTION py_obj_udf_bitmap_arg(bitmap) + RETURNS INT + PROPERTIES ( + "type" = "PYTHON_UDF", + "symbol" = "evaluate", + "runtime_version" = "${runtime_version}" + ) + AS \$\$ +def evaluate(v): + return 1 +\$\$; + """ + exception "does not support argument 1 type bitmap" + } + + test { + sql """ + CREATE FUNCTION py_obj_udf_hll_ret(int) + RETURNS HLL + PROPERTIES ( + "type" = "PYTHON_UDF", + "symbol" = "evaluate", + "runtime_version" = "${runtime_version}" + ) + AS \$\$ +def evaluate(v): + return None +\$\$; + """ + exception "does not support return type hll" + } + + test { + sql """ + CREATE FUNCTION py_obj_udf_array_bitmap(array) + RETURNS ARRAY + PROPERTIES ( + "type" = "PYTHON_UDF", + "symbol" = "evaluate", + "runtime_version" = "${runtime_version}" + ) + AS \$\$ +def evaluate(v): + return None +\$\$; + """ + exception "ARRAY unsupported sub-type: bitmap" + } + + test { + sql """ + CREATE FUNCTION py_obj_udf_map_bitmap(map) + RETURNS INT + PROPERTIES ( + "type" = "PYTHON_UDF", + "symbol" = "evaluate", + "runtime_version" = "${runtime_version}" + ) + AS \$\$ +def evaluate(v): + return 1 +\$\$; + """ + exception "MAP unsupported sub-type: bitmap" + } + + test { + sql """ + CREATE FUNCTION py_obj_udf_struct_bitmap(INT) + RETURNS STRUCT>> + PROPERTIES ( + "type" = "PYTHON_UDF", + "symbol" = "evaluate", + "runtime_version" = "${runtime_version}" + ) + AS \$\$ +def evaluate(v): + return None +\$\$; + """ + exception "STRUCT unsupported sub-type: bitmap" + } +} diff --git a/regression-test/suites/pythonudtf_p0/test_pythonudtf_object_types_inline.groovy b/regression-test/suites/pythonudtf_p0/test_pythonudtf_object_types_inline.groovy new file mode 100644 index 00000000000000..0d4259a6e6e02d --- /dev/null +++ b/regression-test/suites/pythonudtf_p0/test_pythonudtf_object_types_inline.groovy @@ -0,0 +1,105 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_pythonudtf_object_types_inline") { + def runtime_version = getPythonUdfRuntimeVersion() + + test { + sql """ + CREATE TABLES FUNCTION py_obj_udtf_bitmap_arg(bitmap) + RETURNS ARRAY> + PROPERTIES ( + "type" = "PYTHON_UDF", + "symbol" = "evaluate", + "runtime_version" = "${runtime_version}" + ) + AS \$\$ +def evaluate(v): + yield (1,) +\$\$; + """ + exception "does not support argument 1 type bitmap" + } + + test { + sql """ + CREATE TABLES FUNCTION py_obj_udtf_hll_ret(int) + RETURNS ARRAY + PROPERTIES ( + "type" = "PYTHON_UDF", + "symbol" = "evaluate", + "runtime_version" = "${runtime_version}" + ) + AS \$\$ +def evaluate(v): + yield (1,) +\$\$; + """ + exception "ARRAY unsupported sub-type: hll" + } + + test { + sql """ + CREATE TABLES FUNCTION py_obj_udtf_quantile_state(quantile_state) + RETURNS ARRAY> + PROPERTIES ( + "type" = "PYTHON_UDF", + "symbol" = "evaluate", + "runtime_version" = "${runtime_version}" + ) + AS \$\$ +def evaluate(v): + yield (1,) +\$\$; + """ + exception "does not support argument 1 type quantile_state" + } + + test { + sql """ + CREATE TABLES FUNCTION py_obj_udtf_array_bitmap(array) + RETURNS ARRAY> + PROPERTIES ( + "type" = "PYTHON_UDF", + "symbol" = "evaluate", + "runtime_version" = "${runtime_version}" + ) + AS \$\$ +def evaluate(v): + yield (1,) +\$\$; + """ + exception "ARRAY unsupported sub-type: bitmap" + } + + test { + sql """ + CREATE TABLES FUNCTION py_obj_udtf_struct_bitmap(int) + RETURNS ARRAY>>> + PROPERTIES ( + "type" = "PYTHON_UDF", + "symbol" = "evaluate", + "runtime_version" = "${runtime_version}" + ) + AS \$\$ +def evaluate(v): + yield (1,) +\$\$; + """ + exception "ARRAY unsupported sub-type: bitmap" + } +}