diff --git a/be/src/udf/python/python_server.py b/be/src/udf/python/python_server.py index f759f2054a3a5a..d6d60cc4c9946c 100644 --- a/be/src/udf/python/python_server.py +++ b/be/src/udf/python/python_server.py @@ -812,28 +812,44 @@ def load(self) -> AdaptivePythonUDF: class ModuleUDFLoader(UDFLoader): """Loads a UDF from a Python module file (.py).""" + # Module names that are forbidden for UDFs because they conflict with + # modules already imported by the server process. Loading a user module + # with one of these names would overwrite the entry in sys.modules and + # could break the server itself. + _FORBIDDEN_MODULE_NAMES: frozenset = frozenset({ + "argparse", "base64", "gc", "importlib", "inspect", "ipaddress", + "json", "sys", "os", "traceback", "logging", "time", "threading", + "pickle", "abc", "contextlib", "typing", "datetime", "enum", + "pathlib", "pandas", "pd", "pyarrow", "pa", "flight", + "logging.handlers", + }) + # Class-level lock dictionary for thread-safe module imports # Using RLock allows the same thread to acquire the lock multiple times - # Key: (location, module_name) tuple to avoid conflicts between different locations - _import_locks: Dict[Tuple[str, str], threading.RLock] = {} + + # Key for _import_locks: module_name only (not location) + # sys.modules is a global dict keyed by module name. + # we need to ensure that imports with the same module name + # do not interfere with each other across different threads, + # even if they come from different file paths. + _import_locks: Dict[str, threading.Lock] = {} _import_locks_lock = threading.Lock() - _module_cache: Dict[Tuple[str, str], Any] = {} + + # Key for _module_cache: location only + # since location already contains a unique function_id + _module_cache: Dict[str, Any] = {} _module_cache_lock = threading.Lock() @classmethod - def _get_import_lock(cls, location: str, module_name: str) -> threading.RLock: + def _get_import_lock(cls, module_name: str) -> threading.Lock: """ - Get or create a reentrant lock for the given location and module name. + Get or create a reentrant lock for the given module name. Uses double-checked locking pattern for optimal performance: - Fast path: return existing lock without acquiring global lock - Slow path: create new lock under global lock protection - - Args: - location: The directory path where the module is located - module_name: The full module name to import """ - cache_key = (location, module_name) + cache_key = module_name # Fast path: check without lock (read-only, safe for most cases) if cache_key in cls._import_locks: @@ -843,7 +859,7 @@ def _get_import_lock(cls, location: str, module_name: str) -> threading.RLock: with cls._import_locks_lock: # Double-check: another thread might have created it while we waited if cache_key not in cls._import_locks: - cls._import_locks[cache_key] = threading.RLock() + cls._import_locks[cache_key] = threading.Lock() return cls._import_locks[cache_key] def load(self) -> AdaptivePythonUDF: @@ -911,20 +927,46 @@ def parse_symbol(self, symbol: str): return package_name, module_name, func_name + @staticmethod + def _clear_modules_from_sys(full_module_name: str) -> None: + """Remove a module and all its ancestor packages from sys.modules. + + To prevent the same module from being polluted by old caches + when loaded from different paths. + e.g., the pkg under path_a affecting the pkg.mdu_a under path_b, + the ancestor chain is cleared after each import. + + This ensures that subsequent imports always start from a fresh state. + """ + parts = full_module_name.split(".") + for i in range(len(parts)): + ancestor = ".".join(parts[: i + 1]) + sys.modules.pop(ancestor, None) + def _get_or_import_module(self, location: str, full_module_name: str) -> Any: """ Get module from cache or import it (thread-safe). - Uses a location-aware cache to prevent conflicts when different locations - have modules with the same name. + The cache is keyed by location alone, which already contains a unique + function_id assigned by the FE catalog. """ - cache_key = (location, full_module_name) + # Reject module names that would shadow server-critical modules + top_level_name = full_module_name.split(".")[0] + if top_level_name in ModuleUDFLoader._FORBIDDEN_MODULE_NAMES: + raise ImportError( + f"Module name '{full_module_name}' is not allowed for UDFs " + f"because it conflicts with a module used by the server. " + f"Please rename your module to avoid shadowing built-in or " + f"server-critical modules." + ) + + cache_key = location - # Use a per-(location, module) lock to prevent race conditions during import - import_lock = ModuleUDFLoader._get_import_lock(location, full_module_name) + # Use a per-module lock to prevent race conditions during import + import_lock = ModuleUDFLoader._get_import_lock(full_module_name) with import_lock: - # Fast path: check location-aware cache first + # Fast path: check cache first if cache_key in ModuleUDFLoader._module_cache: cached_module = ModuleUDFLoader._module_cache[cache_key] if cached_module is not None and ( @@ -935,25 +977,19 @@ def _get_or_import_module(self, location: str, full_module_name: str) -> Any: else: del ModuleUDFLoader._module_cache[cache_key] - # Before importing, clear any existing module with the same name in sys.modules - # that might have been loaded from a different location - if full_module_name in sys.modules: - existing_module = sys.modules[full_module_name] - existing_file = getattr(existing_module, "__file__", None) - # Check if the existing module is from a different location - if existing_file and not existing_file.startswith(location): - del sys.modules[full_module_name] + self._clear_modules_from_sys(full_module_name) with temporary_sys_path(location): try: module = importlib.import_module(full_module_name) - # Store in location-aware cache ModuleUDFLoader._module_cache[cache_key] = module + # Evict from sys.modules so future imports from a + # different location are not poisoned by this one. + self._clear_modules_from_sys(full_module_name) return module except Exception: # Clean up any partially-imported modules - if full_module_name in sys.modules: - del sys.modules[full_module_name] + self._clear_modules_from_sys(full_module_name) if cache_key in ModuleUDFLoader._module_cache: del ModuleUDFLoader._module_cache[cache_key] raise @@ -2540,8 +2576,8 @@ def _clear_modules_from_location(self, location: str) -> list: # This ensures no concurrent _get_or_import_module is in progress # for this (location, module_name) pair. for key in keys_to_remove: - loc, module_name = key - import_lock = ModuleUDFLoader._get_import_lock(loc, module_name) + _, module_name = key + import_lock = ModuleUDFLoader._get_import_lock(module_name) with import_lock: with ModuleUDFLoader._module_cache_lock: diff --git a/regression-test/data/pythonudaf_p0/test_pythonudaf_forbidden_module.out b/regression-test/data/pythonudaf_p0/test_pythonudaf_forbidden_module.out new file mode 100644 index 00000000000000..63a8df5a61dd9e --- /dev/null +++ b/regression-test/data/pythonudaf_p0/test_pythonudaf_forbidden_module.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !mid_forbidden_udaf_ok -- +63 + diff --git a/regression-test/data/pythonudaf_p0/test_pythonudaf_pkg_isolation.out b/regression-test/data/pythonudaf_p0/test_pythonudaf_pkg_isolation.out new file mode 100644 index 00000000000000..a55082ac7f931a --- /dev/null +++ b/regression-test/data/pythonudaf_p0/test_pythonudaf_pkg_isolation.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !pkg_isolation_1 -- +6 2006 + +-- !pkg_isolation_2 -- +6 1006 + +-- !pkg_isolation_3 -- +1006 3006 + +-- !pkg_isolation_4 -- +6 1006 2006 3006 + diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_forbidden_module.out b/regression-test/data/pythonudf_p0/test_pythonudf_forbidden_module.out new file mode 100644 index 00000000000000..1dbed3318aaccf --- /dev/null +++ b/regression-test/data/pythonudf_p0/test_pythonudf_forbidden_module.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !mid_forbidden_ok -- +20 + diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_pkg_isolation.out b/regression-test/data/pythonudf_p0/test_pythonudf_pkg_isolation.out new file mode 100644 index 00000000000000..a782ca4cc03d2d --- /dev/null +++ b/regression-test/data/pythonudf_p0/test_pythonudf_pkg_isolation.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !pkg_isolation_1 -- +15 105 + +-- !pkg_isolation_2 -- +15 25 + +-- !pkg_isolation_3 -- +25 205 + +-- !pkg_isolation_4 -- +20 30 110 210 + diff --git a/regression-test/data/pythonudtf_p0/test_pythonudtf_forbidden_module.out b/regression-test/data/pythonudtf_p0/test_pythonudtf_forbidden_module.out new file mode 100644 index 00000000000000..f2f262556218f0 --- /dev/null +++ b/regression-test/data/pythonudtf_p0/test_pythonudtf_forbidden_module.out @@ -0,0 +1,6 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !mid_forbidden_udtf_ok -- +10 20 +20 30 +30 40 + diff --git a/regression-test/data/pythonudtf_p0/test_pythonudtf_pkg_isolation.out b/regression-test/data/pythonudtf_p0/test_pythonudtf_pkg_isolation.out new file mode 100644 index 00000000000000..1b0866050441b1 --- /dev/null +++ b/regression-test/data/pythonudtf_p0/test_pythonudtf_pkg_isolation.out @@ -0,0 +1,17 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !pkg_isolation_1 -- +1 201 +2 202 + +-- !pkg_isolation_2 -- +1 101 +2 102 + +-- !pkg_isolation_3 -- +101 301 +102 302 + +-- !pkg_isolation_4 -- +1 101 201 301 +2 102 202 302 + diff --git a/regression-test/suites/pythonudaf_p0/test_pythonudaf_forbidden_module.groovy b/regression-test/suites/pythonudaf_p0/test_pythonudaf_forbidden_module.groovy new file mode 100644 index 00000000000000..35a9f00a7b4862 --- /dev/null +++ b/regression-test/suites/pythonudaf_p0/test_pythonudaf_forbidden_module.groovy @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_pythonudaf_forbidden_module") { + // Test that top-level UDAF module names shadowing server-critical modules + // are rejected, while a packaged UDAF with a forbidden middle module name still works. + + def pyPath = """${context.file.parent}/udaf_scripts/python_udaf_forbidden_module.zip""" + scp_udf_file_to_all_be(pyPath) + def runtime_version = "3.8.10" + def forbiddenCases = [ + [name: "os", function: "py_forbidden_os_udaf", symbol: "os.ForbiddenUDAF"], + [name: "pathlib", function: "py_forbidden_pathlib_udaf", symbol: "pathlib.ForbiddenUDAF"], + [name: "pickle", function: "py_forbidden_pickle_udaf", symbol: "pickle.ForbiddenUDAF"], + [name: "datetime", function: "py_forbidden_datetime_udaf", symbol: "datetime.ForbiddenUDAF"], + ] + log.info("Python Zip path: ${pyPath}".toString()) + + try { + // Create test table + sql """ DROP TABLE IF EXISTS udaf_forbidden_test """ + sql """ + CREATE TABLE udaf_forbidden_test ( + id INT, + val INT + ) DISTRIBUTED BY HASH(id) PROPERTIES("replication_num" = "1"); + """ + + sql """ INSERT INTO udaf_forbidden_test VALUES (1, 10), (2, 20), (3, 30); """ + + forbiddenCases.each { forbiddenCase -> + sql """ DROP FUNCTION IF EXISTS ${forbiddenCase.function}(INT); """ + sql """ + CREATE AGGREGATE FUNCTION ${forbiddenCase.function}(INT) + RETURNS BIGINT + PROPERTIES ( + "type" = "PYTHON_UDF", + "file" = "file://${pyPath}", + "symbol" = "${forbiddenCase.symbol}", + "runtime_version" = "${runtime_version}" + ); + """ + + test { + sql """ SELECT ${forbiddenCase.function}(val) FROM udaf_forbidden_test; """ + exception "is not allowed for UDFs" + } + } + + sql """ DROP FUNCTION IF EXISTS py_mid_forbidden_udaf_ok(INT); """ + sql """ + CREATE AGGREGATE FUNCTION py_mid_forbidden_udaf_ok(INT) + RETURNS BIGINT + PROPERTIES ( + "type" = "PYTHON_UDF", + "file" = "file://${pyPath}", + "symbol" = "safepkg_udaf.pathlib.SafePathlibUDAF", + "runtime_version" = "${runtime_version}" + ); + """ + + qt_mid_forbidden_udaf_ok """ SELECT py_mid_forbidden_udaf_ok(val) AS result FROM udaf_forbidden_test; """ + + } finally { + forbiddenCases.each { forbiddenCase -> + try_sql("DROP FUNCTION IF EXISTS ${forbiddenCase.function}(INT);") + } + try_sql("DROP FUNCTION IF EXISTS py_mid_forbidden_udaf_ok(INT);") + try_sql("DROP TABLE IF EXISTS udaf_forbidden_test") + } +} diff --git a/regression-test/suites/pythonudaf_p0/test_pythonudaf_pkg_isolation.groovy b/regression-test/suites/pythonudaf_p0/test_pythonudaf_pkg_isolation.groovy new file mode 100644 index 00000000000000..f4d88026964c92 --- /dev/null +++ b/regression-test/suites/pythonudaf_p0/test_pythonudaf_pkg_isolation.groovy @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite('test_pythonudaf_pkg_isolation') { + def runtime_version = '3.8.10' + def zipA = """${context.file.parent}/udaf_scripts/python_udaf_pkg_a/python_udaf_pkg_test.zip""" + def zipB = """${context.file.parent}/udaf_scripts/python_udaf_pkg_b/python_udaf_pkg_test.zip""" + + scp_udf_file_to_all_be(zipA) + scp_udf_file_to_all_be(zipB) + + sql '''DROP TABLE IF EXISTS py_udaf_pkg_tbl''' + sql ''' + CREATE TABLE py_udaf_pkg_tbl ( + v INT + ) ENGINE=OLAP + DUPLICATE KEY(v) + DISTRIBUTED BY HASH(v) BUCKETS 1 + PROPERTIES("replication_num" = "1"); + ''' + sql '''INSERT INTO py_udaf_pkg_tbl VALUES (1), (2), (3);''' + + try { + // Case 1: Same package, same module, different zip paths + sql '''DROP FUNCTION IF EXISTS py_pkg_a_sum_x(INT)''' + sql '''DROP FUNCTION IF EXISTS py_pkg_b_sum_x(INT)''' + sql """ + CREATE AGGREGATE FUNCTION py_pkg_a_sum_x(INT) RETURNS BIGINT PROPERTIES ( + "type" = "PYTHON_UDF", + "file" = "file://${zipA}", + "symbol" = "mypkg.mod_x.SumAgg", + "runtime_version" = "${runtime_version}" + ) + """ + sql """ + CREATE AGGREGATE FUNCTION py_pkg_b_sum_x(INT) RETURNS BIGINT PROPERTIES ( + "type" = "PYTHON_UDF", + "file" = "file://${zipB}", + "symbol" = "mypkg.mod_x.SumAgg", + "runtime_version" = "${runtime_version}" + ) + """ + + qt_pkg_isolation_1 '''SELECT py_pkg_a_sum_x(v), py_pkg_b_sum_x(v) FROM py_udaf_pkg_tbl;''' + + // Case 2: Same package, different modules, same zip + sql '''DROP FUNCTION IF EXISTS py_pkg_a_sum_y(INT)''' + sql """ + CREATE AGGREGATE FUNCTION py_pkg_a_sum_y(INT) RETURNS BIGINT PROPERTIES ( + "type" = "PYTHON_UDF", + "file" = "file://${zipA}", + "symbol" = "mypkg.mod_y.SumAgg", + "runtime_version" = "${runtime_version}" + ) + """ + + qt_pkg_isolation_2 '''SELECT py_pkg_a_sum_x(v), py_pkg_a_sum_y(v) FROM py_udaf_pkg_tbl;''' + + // Case 3: Same package, different modules, different zips + sql '''DROP FUNCTION IF EXISTS py_pkg_b_sum_y(INT)''' + sql """ + CREATE AGGREGATE FUNCTION py_pkg_b_sum_y(INT) RETURNS BIGINT PROPERTIES ( + "type" = "PYTHON_UDF", + "file" = "file://${zipB}", + "symbol" = "mypkg.mod_y.SumAgg", + "runtime_version" = "${runtime_version}" + ) + """ + + qt_pkg_isolation_3 '''SELECT py_pkg_a_sum_y(v), py_pkg_b_sum_y(v) FROM py_udaf_pkg_tbl;''' + + // Case 4: All four combinations together + qt_pkg_isolation_4 '''SELECT py_pkg_a_sum_x(v), py_pkg_a_sum_y(v), py_pkg_b_sum_x(v), py_pkg_b_sum_y(v) FROM py_udaf_pkg_tbl;''' + + } finally { + try_sql('DROP FUNCTION IF EXISTS py_pkg_a_sum_x(INT);') + try_sql('DROP FUNCTION IF EXISTS py_pkg_a_sum_y(INT);') + try_sql('DROP FUNCTION IF EXISTS py_pkg_b_sum_x(INT);') + try_sql('DROP FUNCTION IF EXISTS py_pkg_b_sum_y(INT);') + } +} diff --git a/regression-test/suites/pythonudaf_p0/udaf_scripts/datetime.py b/regression-test/suites/pythonudaf_p0/udaf_scripts/datetime.py new file mode 100644 index 00000000000000..fde0c7f2638b83 --- /dev/null +++ b/regression-test/suites/pythonudaf_p0/udaf_scripts/datetime.py @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ForbiddenUDAF: + """A UDAF module that shadows the built-in datetime module.""" + + def __init__(self): + self.total = 0 + + def accumulate(self, value): + if value is not None: + self.total += value + + def merge(self, other_state): + if other_state is not None: + self.total += other_state + + def finish(self): + return self.total + + @property + def aggregate_state(self): + return self.total diff --git a/regression-test/suites/pythonudaf_p0/udaf_scripts/forbidden/datetime.py b/regression-test/suites/pythonudaf_p0/udaf_scripts/forbidden/datetime.py new file mode 100644 index 00000000000000..656ce9bfa61c90 --- /dev/null +++ b/regression-test/suites/pythonudaf_p0/udaf_scripts/forbidden/datetime.py @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ForbiddenUDAF: + """A UDAF module under the forbidden package.""" + + def __init__(self): + self.total = 0 + + def accumulate(self, value): + if value is not None: + self.total += value + + def merge(self, other_state): + if other_state is not None: + self.total += other_state + + def finish(self): + return self.total + + @property + def aggregate_state(self): + return self.total diff --git a/regression-test/suites/pythonudaf_p0/udaf_scripts/forbidden/os.py b/regression-test/suites/pythonudaf_p0/udaf_scripts/forbidden/os.py new file mode 100644 index 00000000000000..656ce9bfa61c90 --- /dev/null +++ b/regression-test/suites/pythonudaf_p0/udaf_scripts/forbidden/os.py @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ForbiddenUDAF: + """A UDAF module under the forbidden package.""" + + def __init__(self): + self.total = 0 + + def accumulate(self, value): + if value is not None: + self.total += value + + def merge(self, other_state): + if other_state is not None: + self.total += other_state + + def finish(self): + return self.total + + @property + def aggregate_state(self): + return self.total diff --git a/regression-test/suites/pythonudaf_p0/udaf_scripts/forbidden/pathlib.py b/regression-test/suites/pythonudaf_p0/udaf_scripts/forbidden/pathlib.py new file mode 100644 index 00000000000000..656ce9bfa61c90 --- /dev/null +++ b/regression-test/suites/pythonudaf_p0/udaf_scripts/forbidden/pathlib.py @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ForbiddenUDAF: + """A UDAF module under the forbidden package.""" + + def __init__(self): + self.total = 0 + + def accumulate(self, value): + if value is not None: + self.total += value + + def merge(self, other_state): + if other_state is not None: + self.total += other_state + + def finish(self): + return self.total + + @property + def aggregate_state(self): + return self.total diff --git a/regression-test/suites/pythonudaf_p0/udaf_scripts/forbidden/pickle.py b/regression-test/suites/pythonudaf_p0/udaf_scripts/forbidden/pickle.py new file mode 100644 index 00000000000000..656ce9bfa61c90 --- /dev/null +++ b/regression-test/suites/pythonudaf_p0/udaf_scripts/forbidden/pickle.py @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ForbiddenUDAF: + """A UDAF module under the forbidden package.""" + + def __init__(self): + self.total = 0 + + def accumulate(self, value): + if value is not None: + self.total += value + + def merge(self, other_state): + if other_state is not None: + self.total += other_state + + def finish(self): + return self.total + + @property + def aggregate_state(self): + return self.total diff --git a/regression-test/suites/pythonudaf_p0/udaf_scripts/os.py b/regression-test/suites/pythonudaf_p0/udaf_scripts/os.py new file mode 100644 index 00000000000000..2c904bb063a4d7 --- /dev/null +++ b/regression-test/suites/pythonudaf_p0/udaf_scripts/os.py @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ForbiddenUDAF: + """A UDAF module that shadows the built-in os module.""" + + def __init__(self): + self.total = 0 + + def accumulate(self, value): + if value is not None: + self.total += value + + def merge(self, other_state): + if other_state is not None: + self.total += other_state + + def finish(self): + return self.total + + @property + def aggregate_state(self): + return self.total diff --git a/regression-test/suites/pythonudaf_p0/udaf_scripts/pathlib.py b/regression-test/suites/pythonudaf_p0/udaf_scripts/pathlib.py new file mode 100644 index 00000000000000..894f0e35b9229e --- /dev/null +++ b/regression-test/suites/pythonudaf_p0/udaf_scripts/pathlib.py @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ForbiddenUDAF: + """A UDAF module that shadows the built-in pathlib module.""" + + def __init__(self): + self.total = 0 + + def accumulate(self, value): + if value is not None: + self.total += value + + def merge(self, other_state): + if other_state is not None: + self.total += other_state + + def finish(self): + return self.total + + @property + def aggregate_state(self): + return self.total diff --git a/regression-test/suites/pythonudaf_p0/udaf_scripts/pickle.py b/regression-test/suites/pythonudaf_p0/udaf_scripts/pickle.py new file mode 100644 index 00000000000000..ee89f2b3a538a0 --- /dev/null +++ b/regression-test/suites/pythonudaf_p0/udaf_scripts/pickle.py @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ForbiddenUDAF: + """A UDAF module that shadows the built-in pickle module.""" + + def __init__(self): + self.total = 0 + + def accumulate(self, value): + if value is not None: + self.total += value + + def merge(self, other_state): + if other_state is not None: + self.total += other_state + + def finish(self): + return self.total + + @property + def aggregate_state(self): + return self.total diff --git a/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_forbidden_module.zip b/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_forbidden_module.zip new file mode 100644 index 00000000000000..4e297d9b841f80 Binary files /dev/null and b/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_forbidden_module.zip differ diff --git a/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_pkg_a/mypkg/mod_x.py b/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_pkg_a/mypkg/mod_x.py new file mode 100644 index 00000000000000..497f60b2e0ebe0 --- /dev/null +++ b/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_pkg_a/mypkg/mod_x.py @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pickle + + +class SumAgg: + def __init__(self): + self.sum = 0 + + def init(self): + self.sum = 0 + + @property + def aggregate_state(self): + return self.sum + + def accumulate(self, val): + if val is not None: + self.sum += val + + def merge(self, other_state): + if other_state is not None: + self.sum += other_state + + def serialize(self): + return pickle.dumps(self.sum) + + def deserialize(self, data): + self.sum = pickle.loads(data) + + def finish(self): + return self.sum diff --git a/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_pkg_a/mypkg/mod_y.py b/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_pkg_a/mypkg/mod_y.py new file mode 100644 index 00000000000000..0500c66807f219 --- /dev/null +++ b/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_pkg_a/mypkg/mod_y.py @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pickle + + +class SumAgg: + def __init__(self): + self.sum = 0 + + def init(self): + self.sum = 0 + + @property + def aggregate_state(self): + return self.sum + + def accumulate(self, val): + if val is not None: + self.sum += val + + def merge(self, other_state): + if other_state is not None: + self.sum += other_state + + def serialize(self): + return pickle.dumps(self.sum) + + def deserialize(self, data): + self.sum = pickle.loads(data) + + def finish(self): + return self.sum + 1000 diff --git a/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_pkg_a/python_udaf_pkg_test.zip b/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_pkg_a/python_udaf_pkg_test.zip new file mode 100644 index 00000000000000..9996f6c72bbeda Binary files /dev/null and b/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_pkg_a/python_udaf_pkg_test.zip differ diff --git a/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_pkg_b/mypkg/mod_x.py b/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_pkg_b/mypkg/mod_x.py new file mode 100644 index 00000000000000..bb9f3d5b644ea4 --- /dev/null +++ b/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_pkg_b/mypkg/mod_x.py @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pickle + + +class SumAgg: + def __init__(self): + self.sum = 0 + + def init(self): + self.sum = 0 + + @property + def aggregate_state(self): + return self.sum + + def accumulate(self, val): + if val is not None: + self.sum += val + + def merge(self, other_state): + if other_state is not None: + self.sum += other_state + + def serialize(self): + return pickle.dumps(self.sum) + + def deserialize(self, data): + self.sum = pickle.loads(data) + + def finish(self): + return self.sum + 2000 diff --git a/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_pkg_b/mypkg/mod_y.py b/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_pkg_b/mypkg/mod_y.py new file mode 100644 index 00000000000000..429cefd5cb23d3 --- /dev/null +++ b/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_pkg_b/mypkg/mod_y.py @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pickle + + +class SumAgg: + def __init__(self): + self.sum = 0 + + def init(self): + self.sum = 0 + + @property + def aggregate_state(self): + return self.sum + + def accumulate(self, val): + if val is not None: + self.sum += val + + def merge(self, other_state): + if other_state is not None: + self.sum += other_state + + def serialize(self): + return pickle.dumps(self.sum) + + def deserialize(self, data): + self.sum = pickle.loads(data) + + def finish(self): + return self.sum + 3000 diff --git a/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_pkg_b/python_udaf_pkg_test.zip b/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_pkg_b/python_udaf_pkg_test.zip new file mode 100644 index 00000000000000..3a7341fade02cc Binary files /dev/null and b/regression-test/suites/pythonudaf_p0/udaf_scripts/python_udaf_pkg_b/python_udaf_pkg_test.zip differ diff --git a/regression-test/suites/pythonudaf_p0/udaf_scripts/safepkg_udaf/pathlib.py b/regression-test/suites/pythonudaf_p0/udaf_scripts/safepkg_udaf/pathlib.py new file mode 100644 index 00000000000000..baa695a0637ece --- /dev/null +++ b/regression-test/suites/pythonudaf_p0/udaf_scripts/safepkg_udaf/pathlib.py @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class SafePathlibUDAF: + """A valid packaged UDAF whose middle module name is forbidden-like.""" + + def __init__(self): + self.total = 0 + + def accumulate(self, value): + if value is not None: + self.total += value + 1 + + def merge(self, other_state): + if other_state is not None: + self.total += other_state + + def finish(self): + return self.total + + @property + def aggregate_state(self): + return self.total diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_forbidden_module.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_forbidden_module.groovy new file mode 100644 index 00000000000000..8f27349d024c5a --- /dev/null +++ b/regression-test/suites/pythonudf_p0/test_pythonudf_forbidden_module.groovy @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_pythonudf_forbidden_module") { + // Test that top-level UDF module names shadowing server-critical modules + // are rejected, while a packaged UDF with a forbidden middle module name still works. + + def pyPath = """${context.file.parent}/udf_scripts/python_udf_forbidden_module.zip""" + scp_udf_file_to_all_be(pyPath) + def runtime_version = "3.8.10" + def forbiddenCases = [ + [name: "threading", function: "py_forbidden_threading", symbol: "threading.evaluate"], + [name: "json", function: "py_forbidden_json", symbol: "json.evaluate"], + [name: "sys", function: "py_forbidden_sys", symbol: "sys.evaluate"], + [name: "logging", function: "py_forbidden_logging", symbol: "logging.evaluate"], + ] + log.info("Python Zip path: ${pyPath}".toString()) + + try { + forbiddenCases.each { forbiddenCase -> + sql """ DROP FUNCTION IF EXISTS ${forbiddenCase.function}(INT); """ + sql """ + CREATE FUNCTION ${forbiddenCase.function}(INT) + RETURNS INT + PROPERTIES ( + "type" = "PYTHON_UDF", + "file" = "file://${pyPath}", + "symbol" = "${forbiddenCase.symbol}", + "runtime_version" = "${runtime_version}", + "always_nullable" = "true" + ); + """ + + test { + sql """ SELECT ${forbiddenCase.function}(1); """ + exception "is not allowed for UDFs" + } + } + + sql """ DROP FUNCTION IF EXISTS py_mid_forbidden_ok(INT); """ + sql """ + CREATE FUNCTION py_mid_forbidden_ok(INT) + RETURNS INT + PROPERTIES ( + "type" = "PYTHON_UDF", + "file" = "file://${pyPath}", + "symbol" = "safepkg_udf.logging.evaluate", + "runtime_version" = "${runtime_version}", + "always_nullable" = "true" + ); + """ + + qt_mid_forbidden_ok """ SELECT py_mid_forbidden_ok(10) AS result; """ + + } finally { + forbiddenCases.each { forbiddenCase -> + try_sql("DROP FUNCTION IF EXISTS ${forbiddenCase.function}(INT);") + } + try_sql("DROP FUNCTION IF EXISTS py_mid_forbidden_ok(INT);") + } +} diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_pkg_isolation.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_pkg_isolation.groovy new file mode 100644 index 00000000000000..f37e3527909108 --- /dev/null +++ b/regression-test/suites/pythonudf_p0/test_pythonudf_pkg_isolation.groovy @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_pythonudf_pkg_isolation") { + def runtime_version = "3.8.10" + def zipA = """${context.file.parent}/udf_scripts/python_udf_pkg_a/python_udf_pkg_test.zip""" + def zipB = """${context.file.parent}/udf_scripts/python_udf_pkg_b/python_udf_pkg_test.zip""" + + scp_udf_file_to_all_be(zipA) + scp_udf_file_to_all_be(zipB) + + try { + // Case 1: Same package, same module, different zip paths + sql """DROP FUNCTION IF EXISTS py_pkg_a_mod_x(INT)""" + sql """DROP FUNCTION IF EXISTS py_pkg_b_mod_x(INT)""" + sql """ + CREATE FUNCTION py_pkg_a_mod_x(INT) RETURNS INT PROPERTIES ( + "type" = "PYTHON_UDF", + "file" = "file://${zipA}", + "symbol" = "mypkg.mod_x.evaluate", + "runtime_version" = "${runtime_version}" + ) + """ + sql """ + CREATE FUNCTION py_pkg_b_mod_x(INT) RETURNS INT PROPERTIES ( + "type" = "PYTHON_UDF", + "file" = "file://${zipB}", + "symbol" = "mypkg.mod_x.evaluate", + "runtime_version" = "${runtime_version}" + ) + """ + + qt_pkg_isolation_1 """SELECT py_pkg_a_mod_x(5), py_pkg_b_mod_x(5);""" + + // Case 2: Same package, different modules, same zip + sql """DROP FUNCTION IF EXISTS py_pkg_a_mod_y(INT)""" + sql """ + CREATE FUNCTION py_pkg_a_mod_y(INT) RETURNS INT PROPERTIES ( + "type" = "PYTHON_UDF", + "file" = "file://${zipA}", + "symbol" = "mypkg.mod_y.evaluate", + "runtime_version" = "${runtime_version}" + ) + """ + + qt_pkg_isolation_2 """SELECT py_pkg_a_mod_x(5), py_pkg_a_mod_y(5);""" + + // Case 3: Same package, different modules, different zips + sql """DROP FUNCTION IF EXISTS py_pkg_b_mod_y(INT)""" + sql """ + CREATE FUNCTION py_pkg_b_mod_y(INT) RETURNS INT PROPERTIES ( + "type" = "PYTHON_UDF", + "file" = "file://${zipB}", + "symbol" = "mypkg.mod_y.evaluate", + "runtime_version" = "${runtime_version}" + ) + """ + + qt_pkg_isolation_3 """SELECT py_pkg_a_mod_y(5), py_pkg_b_mod_y(5);""" + + // Case 4: All four combinations together + qt_pkg_isolation_4 """SELECT py_pkg_a_mod_x(10), py_pkg_a_mod_y(10), py_pkg_b_mod_x(10), py_pkg_b_mod_y(10);""" + + } finally { + try_sql("DROP FUNCTION IF EXISTS py_pkg_a_mod_x(INT);") + try_sql("DROP FUNCTION IF EXISTS py_pkg_a_mod_y(INT);") + try_sql("DROP FUNCTION IF EXISTS py_pkg_b_mod_x(INT);") + try_sql("DROP FUNCTION IF EXISTS py_pkg_b_mod_y(INT);") + } +} diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/forbidden/json.py b/regression-test/suites/pythonudf_p0/udf_scripts/forbidden/json.py new file mode 100644 index 00000000000000..8b229c0feb825a --- /dev/null +++ b/regression-test/suites/pythonudf_p0/udf_scripts/forbidden/json.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def evaluate(a): + """A UDF module under the forbidden package.""" + if a is None: + return None + return a + 1 diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/forbidden/logging.py b/regression-test/suites/pythonudf_p0/udf_scripts/forbidden/logging.py new file mode 100644 index 00000000000000..8b229c0feb825a --- /dev/null +++ b/regression-test/suites/pythonudf_p0/udf_scripts/forbidden/logging.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def evaluate(a): + """A UDF module under the forbidden package.""" + if a is None: + return None + return a + 1 diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/forbidden/sys.py b/regression-test/suites/pythonudf_p0/udf_scripts/forbidden/sys.py new file mode 100644 index 00000000000000..8b229c0feb825a --- /dev/null +++ b/regression-test/suites/pythonudf_p0/udf_scripts/forbidden/sys.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def evaluate(a): + """A UDF module under the forbidden package.""" + if a is None: + return None + return a + 1 diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/forbidden/threading.py b/regression-test/suites/pythonudf_p0/udf_scripts/forbidden/threading.py new file mode 100644 index 00000000000000..8b229c0feb825a --- /dev/null +++ b/regression-test/suites/pythonudf_p0/udf_scripts/forbidden/threading.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def evaluate(a): + """A UDF module under the forbidden package.""" + if a is None: + return None + return a + 1 diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/json.py b/regression-test/suites/pythonudf_p0/udf_scripts/json.py new file mode 100644 index 00000000000000..409bbb56006c05 --- /dev/null +++ b/regression-test/suites/pythonudf_p0/udf_scripts/json.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def evaluate(a): + """A UDF module that shadows the built-in json module.""" + if a is None: + return None + return a + 1 diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/logging.py b/regression-test/suites/pythonudf_p0/udf_scripts/logging.py new file mode 100644 index 00000000000000..4e884e5c158449 --- /dev/null +++ b/regression-test/suites/pythonudf_p0/udf_scripts/logging.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def evaluate(a): + """A UDF module that shadows the built-in logging module.""" + if a is None: + return None + return a + 1 diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_forbidden_module.zip b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_forbidden_module.zip new file mode 100644 index 00000000000000..0b4e2b150c1be4 Binary files /dev/null and b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_forbidden_module.zip differ diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_pkg_a/mypkg/mod_x.py b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_pkg_a/mypkg/mod_x.py new file mode 100644 index 00000000000000..7207d16bd80285 --- /dev/null +++ b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_pkg_a/mypkg/mod_x.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +def evaluate(x): + if x is None: + return None + return x + 10 diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_pkg_a/mypkg/mod_y.py b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_pkg_a/mypkg/mod_y.py new file mode 100644 index 00000000000000..4eaebd1c255612 --- /dev/null +++ b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_pkg_a/mypkg/mod_y.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +def evaluate(x): + if x is None: + return None + return x + 20 diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_pkg_a/python_udf_pkg_test.zip b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_pkg_a/python_udf_pkg_test.zip new file mode 100644 index 00000000000000..e76530f21e1d4a Binary files /dev/null and b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_pkg_a/python_udf_pkg_test.zip differ diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_pkg_b/mypkg/mod_x.py b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_pkg_b/mypkg/mod_x.py new file mode 100644 index 00000000000000..63045049335b45 --- /dev/null +++ b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_pkg_b/mypkg/mod_x.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +def evaluate(x): + if x is None: + return None + return x + 100 diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_pkg_b/mypkg/mod_y.py b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_pkg_b/mypkg/mod_y.py new file mode 100644 index 00000000000000..299a4a71e5fd67 --- /dev/null +++ b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_pkg_b/mypkg/mod_y.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +def evaluate(x): + if x is None: + return None + return x + 200 diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_pkg_b/python_udf_pkg_test.zip b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_pkg_b/python_udf_pkg_test.zip new file mode 100644 index 00000000000000..c1ff0ab123ebe7 Binary files /dev/null and b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_pkg_b/python_udf_pkg_test.zip differ diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/safepkg_udf/logging.py b/regression-test/suites/pythonudf_p0/udf_scripts/safepkg_udf/logging.py new file mode 100644 index 00000000000000..f2a5a3a4885405 --- /dev/null +++ b/regression-test/suites/pythonudf_p0/udf_scripts/safepkg_udf/logging.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def evaluate(a): + """A valid packaged UDF whose middle module name is forbidden-like.""" + if a is None: + return None + return a + 10 diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/sys.py b/regression-test/suites/pythonudf_p0/udf_scripts/sys.py new file mode 100644 index 00000000000000..9c7bd4e098025c --- /dev/null +++ b/regression-test/suites/pythonudf_p0/udf_scripts/sys.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def evaluate(a): + """A UDF module that shadows the built-in sys module.""" + if a is None: + return None + return a + 1 diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/threading.py b/regression-test/suites/pythonudf_p0/udf_scripts/threading.py new file mode 100644 index 00000000000000..c9b69f5c44fc79 --- /dev/null +++ b/regression-test/suites/pythonudf_p0/udf_scripts/threading.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def evaluate(a): + """A UDF module that shadows the built-in threading module.""" + if a is None: + return None + return a + 1 diff --git a/regression-test/suites/pythonudtf_p0/test_pythonudtf_forbidden_module.groovy b/regression-test/suites/pythonudtf_p0/test_pythonudtf_forbidden_module.groovy new file mode 100644 index 00000000000000..848102532fe31a --- /dev/null +++ b/regression-test/suites/pythonudtf_p0/test_pythonudtf_forbidden_module.groovy @@ -0,0 +1,98 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_pythonudtf_forbidden_module") { + // Test that top-level UDTF module names shadowing server-critical modules + // are rejected, while a packaged UDTF with a forbidden middle module name still works. + + def pyPath = """${context.file.parent}/udtf_scripts/python_udtf_forbidden_module.zip""" + scp_udf_file_to_all_be(pyPath) + def runtime_version = "3.8.10" + def forbiddenCases = [ + [name: "importlib", function: "py_forbidden_importlib_udtf", symbol: "importlib.forbidden_udtf"], + [name: "inspect", function: "py_forbidden_inspect_udtf", symbol: "inspect.forbidden_udtf"], + [name: "ipaddress", function: "py_forbidden_ipaddress_udtf", symbol: "ipaddress.forbidden_udtf"], + [name: "base64", function: "py_forbidden_base64_udtf", symbol: "base64.forbidden_udtf"], + ] + log.info("Python Zip path: ${pyPath}".toString()) + + try { + // Create test table + sql """ DROP TABLE IF EXISTS udtf_forbidden_test """ + sql """ + CREATE TABLE udtf_forbidden_test ( + id INT, + val INT + ) ENGINE=OLAP + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES("replication_num" = "1"); + """ + + sql """ INSERT INTO udtf_forbidden_test VALUES (1, 10), (2, 20), (3, 30); """ + + forbiddenCases.each { forbiddenCase -> + sql """ DROP FUNCTION IF EXISTS ${forbiddenCase.function}(INT); """ + sql """ + CREATE TABLES FUNCTION ${forbiddenCase.function}(INT) + RETURNS ARRAY> + PROPERTIES ( + "type" = "PYTHON_UDF", + "file" = "file://${pyPath}", + "symbol" = "${forbiddenCase.symbol}", + "runtime_version" = "${runtime_version}" + ); + """ + + test { + sql """ + SELECT tmp.original, tmp.doubled + FROM udtf_forbidden_test + LATERAL VIEW ${forbiddenCase.function}(val) tmp AS original, doubled + ORDER BY id; + """ + exception "is not allowed for UDFs" + } + } + + sql """ DROP FUNCTION IF EXISTS py_mid_forbidden_udtf_ok(INT); """ + sql """ + CREATE TABLES FUNCTION py_mid_forbidden_udtf_ok(INT) + RETURNS ARRAY> + PROPERTIES ( + "type" = "PYTHON_UDF", + "file" = "file://${pyPath}", + "symbol" = "safepkg_udtf.inspect.safe_udtf", + "runtime_version" = "${runtime_version}" + ); + """ + + qt_mid_forbidden_udtf_ok """ + SELECT tmp.original, tmp.shifted + FROM udtf_forbidden_test + LATERAL VIEW py_mid_forbidden_udtf_ok(val) tmp AS original, shifted + ORDER BY id; + """ + + } finally { + forbiddenCases.each { forbiddenCase -> + try_sql("DROP FUNCTION IF EXISTS ${forbiddenCase.function}(INT);") + } + try_sql("DROP FUNCTION IF EXISTS py_mid_forbidden_udtf_ok(INT);") + try_sql("DROP TABLE IF EXISTS udtf_forbidden_test") + } +} diff --git a/regression-test/suites/pythonudtf_p0/test_pythonudtf_pkg_isolation.groovy b/regression-test/suites/pythonudtf_p0/test_pythonudtf_pkg_isolation.groovy new file mode 100644 index 00000000000000..88ffacc8d7208d --- /dev/null +++ b/regression-test/suites/pythonudtf_p0/test_pythonudtf_pkg_isolation.groovy @@ -0,0 +1,129 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_pythonudtf_pkg_isolation") { + def runtime_version = "3.8.10" + def zipA = """${context.file.parent}/udtf_scripts/python_udtf_pkg_a/python_udtf_pkg_test.zip""" + def zipB = """${context.file.parent}/udtf_scripts/python_udtf_pkg_b/python_udtf_pkg_test.zip""" + + scp_udf_file_to_all_be(zipA) + scp_udf_file_to_all_be(zipB) + + sql """DROP TABLE IF EXISTS py_udtf_pkg_tbl""" + sql """ + CREATE TABLE py_udtf_pkg_tbl ( + v INT + ) ENGINE=OLAP + DUPLICATE KEY(v) + DISTRIBUTED BY HASH(v) BUCKETS 1 + PROPERTIES("replication_num" = "1"); + """ + sql """INSERT INTO py_udtf_pkg_tbl VALUES (1), (2);""" + + try { + // Case 1: Same package, same module, different zip paths + sql """DROP FUNCTION IF EXISTS py_pkg_a_t_x(INT)""" + sql """DROP FUNCTION IF EXISTS py_pkg_b_t_x(INT)""" + sql """ + CREATE TABLES FUNCTION py_pkg_a_t_x(INT) + RETURNS ARRAY + PROPERTIES ( + "type" = "PYTHON_UDF", + "file" = "file://${zipA}", + "symbol" = "mypkg.mod_x.process", + "runtime_version" = "${runtime_version}" + ) + """ + sql """ + CREATE TABLES FUNCTION py_pkg_b_t_x(INT) + RETURNS ARRAY + PROPERTIES ( + "type" = "PYTHON_UDF", + "file" = "file://${zipB}", + "symbol" = "mypkg.mod_x.process", + "runtime_version" = "${runtime_version}" + ) + """ + + qt_pkg_isolation_1 """ + SELECT a.c, b.c + FROM py_udtf_pkg_tbl + LATERAL VIEW py_pkg_a_t_x(v) a AS c + LATERAL VIEW py_pkg_b_t_x(v) b AS c + ORDER BY a.c, b.c; + """ + + // Case 2: Same package, different modules, same zip + sql """DROP FUNCTION IF EXISTS py_pkg_a_t_y(INT)""" + sql """ + CREATE TABLES FUNCTION py_pkg_a_t_y(INT) + RETURNS ARRAY + PROPERTIES ( + "type" = "PYTHON_UDF", + "file" = "file://${zipA}", + "symbol" = "mypkg.mod_y.process", + "runtime_version" = "${runtime_version}" + ) + """ + + qt_pkg_isolation_2 """ + SELECT a.c, b.c + FROM py_udtf_pkg_tbl + LATERAL VIEW py_pkg_a_t_x(v) a AS c + LATERAL VIEW py_pkg_a_t_y(v) b AS c + ORDER BY a.c, b.c; + """ + + // Case 3: Same package, different modules, different zips + sql """DROP FUNCTION IF EXISTS py_pkg_b_t_y(INT)""" + sql """ + CREATE TABLES FUNCTION py_pkg_b_t_y(INT) + RETURNS ARRAY + PROPERTIES ( + "type" = "PYTHON_UDF", + "file" = "file://${zipB}", + "symbol" = "mypkg.mod_y.process", + "runtime_version" = "${runtime_version}" + ) + """ + + qt_pkg_isolation_3 """ + SELECT a.c, b.c + FROM py_udtf_pkg_tbl + LATERAL VIEW py_pkg_a_t_y(v) a AS c + LATERAL VIEW py_pkg_b_t_y(v) b AS c + ORDER BY a.c, b.c; + """ + + // Case 4: All four combinations together + qt_pkg_isolation_4 """ + SELECT ax.c, ay.c, bx.c, b_y.c + FROM py_udtf_pkg_tbl + LATERAL VIEW py_pkg_a_t_x(v) ax AS c + LATERAL VIEW py_pkg_a_t_y(v) ay AS c + LATERAL VIEW py_pkg_b_t_x(v) bx AS c + LATERAL VIEW py_pkg_b_t_y(v) b_y AS c + ORDER BY ax.c, ay.c, bx.c, b_y.c; + """ + + } finally { + try_sql("DROP FUNCTION IF EXISTS py_pkg_a_t_x(INT);") + try_sql("DROP FUNCTION IF EXISTS py_pkg_a_t_y(INT);") + try_sql("DROP FUNCTION IF EXISTS py_pkg_b_t_x(INT);") + try_sql("DROP FUNCTION IF EXISTS py_pkg_b_t_y(INT);") + } +} diff --git a/regression-test/suites/pythonudtf_p0/udtf_scripts/base64.py b/regression-test/suites/pythonudtf_p0/udtf_scripts/base64.py new file mode 100644 index 00000000000000..ee119476bbf86b --- /dev/null +++ b/regression-test/suites/pythonudtf_p0/udtf_scripts/base64.py @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def forbidden_udtf(value): + """A UDTF module that shadows the built-in base64 module.""" + if value is not None: + yield (value, value * 2) diff --git a/regression-test/suites/pythonudtf_p0/udtf_scripts/forbidden/base64.py b/regression-test/suites/pythonudtf_p0/udtf_scripts/forbidden/base64.py new file mode 100644 index 00000000000000..b988dde5a5d04c --- /dev/null +++ b/regression-test/suites/pythonudtf_p0/udtf_scripts/forbidden/base64.py @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def forbidden_udtf(value): + """A UDTF module under the forbidden package.""" + if value is not None: + yield (value, value * 2) diff --git a/regression-test/suites/pythonudtf_p0/udtf_scripts/forbidden/importlib.py b/regression-test/suites/pythonudtf_p0/udtf_scripts/forbidden/importlib.py new file mode 100644 index 00000000000000..b988dde5a5d04c --- /dev/null +++ b/regression-test/suites/pythonudtf_p0/udtf_scripts/forbidden/importlib.py @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def forbidden_udtf(value): + """A UDTF module under the forbidden package.""" + if value is not None: + yield (value, value * 2) diff --git a/regression-test/suites/pythonudtf_p0/udtf_scripts/forbidden/inspect.py b/regression-test/suites/pythonudtf_p0/udtf_scripts/forbidden/inspect.py new file mode 100644 index 00000000000000..b988dde5a5d04c --- /dev/null +++ b/regression-test/suites/pythonudtf_p0/udtf_scripts/forbidden/inspect.py @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def forbidden_udtf(value): + """A UDTF module under the forbidden package.""" + if value is not None: + yield (value, value * 2) diff --git a/regression-test/suites/pythonudtf_p0/udtf_scripts/forbidden/ipaddress.py b/regression-test/suites/pythonudtf_p0/udtf_scripts/forbidden/ipaddress.py new file mode 100644 index 00000000000000..b988dde5a5d04c --- /dev/null +++ b/regression-test/suites/pythonudtf_p0/udtf_scripts/forbidden/ipaddress.py @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def forbidden_udtf(value): + """A UDTF module under the forbidden package.""" + if value is not None: + yield (value, value * 2) diff --git a/regression-test/suites/pythonudtf_p0/udtf_scripts/importlib.py b/regression-test/suites/pythonudtf_p0/udtf_scripts/importlib.py new file mode 100644 index 00000000000000..14158f06416df9 --- /dev/null +++ b/regression-test/suites/pythonudtf_p0/udtf_scripts/importlib.py @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def forbidden_udtf(value): + """A UDTF module that shadows the built-in importlib module.""" + if value is not None: + yield (value, value * 2) diff --git a/regression-test/suites/pythonudtf_p0/udtf_scripts/inspect.py b/regression-test/suites/pythonudtf_p0/udtf_scripts/inspect.py new file mode 100644 index 00000000000000..4928868d5d7197 --- /dev/null +++ b/regression-test/suites/pythonudtf_p0/udtf_scripts/inspect.py @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def forbidden_udtf(value): + """A UDTF module that shadows the built-in inspect module.""" + if value is not None: + yield (value, value * 2) diff --git a/regression-test/suites/pythonudtf_p0/udtf_scripts/ipaddress.py b/regression-test/suites/pythonudtf_p0/udtf_scripts/ipaddress.py new file mode 100644 index 00000000000000..ddab47910777f9 --- /dev/null +++ b/regression-test/suites/pythonudtf_p0/udtf_scripts/ipaddress.py @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def forbidden_udtf(value): + """A UDTF module that shadows the built-in ipaddress module.""" + if value is not None: + yield (value, value * 2) diff --git a/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_forbidden_module.zip b/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_forbidden_module.zip new file mode 100644 index 00000000000000..35f9521c8460c3 Binary files /dev/null and b/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_forbidden_module.zip differ diff --git a/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_pkg_a/mypkg/mod_x.py b/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_pkg_a/mypkg/mod_x.py new file mode 100644 index 00000000000000..4b0f5063422009 --- /dev/null +++ b/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_pkg_a/mypkg/mod_x.py @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +def process(x): + if x is not None: + yield (x,) diff --git a/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_pkg_a/mypkg/mod_y.py b/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_pkg_a/mypkg/mod_y.py new file mode 100644 index 00000000000000..8f8b0ac66e7d88 --- /dev/null +++ b/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_pkg_a/mypkg/mod_y.py @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +def process(x): + if x is not None: + yield (x + 100,) diff --git a/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_pkg_a/python_udtf_pkg_test.zip b/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_pkg_a/python_udtf_pkg_test.zip new file mode 100644 index 00000000000000..0828cd94cb2512 Binary files /dev/null and b/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_pkg_a/python_udtf_pkg_test.zip differ diff --git a/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_pkg_b/mypkg/mod_x.py b/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_pkg_b/mypkg/mod_x.py new file mode 100644 index 00000000000000..d3021f48430e59 --- /dev/null +++ b/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_pkg_b/mypkg/mod_x.py @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +def process(x): + if x is not None: + yield (x + 200,) diff --git a/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_pkg_b/mypkg/mod_y.py b/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_pkg_b/mypkg/mod_y.py new file mode 100644 index 00000000000000..451520472833ff --- /dev/null +++ b/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_pkg_b/mypkg/mod_y.py @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +def process(x): + if x is not None: + yield (x + 300,) diff --git a/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_pkg_b/python_udtf_pkg_test.zip b/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_pkg_b/python_udtf_pkg_test.zip new file mode 100644 index 00000000000000..5f937b546f2315 Binary files /dev/null and b/regression-test/suites/pythonudtf_p0/udtf_scripts/python_udtf_pkg_b/python_udtf_pkg_test.zip differ diff --git a/regression-test/suites/pythonudtf_p0/udtf_scripts/safepkg_udtf/inspect.py b/regression-test/suites/pythonudtf_p0/udtf_scripts/safepkg_udtf/inspect.py new file mode 100644 index 00000000000000..c4cf88273158c1 --- /dev/null +++ b/regression-test/suites/pythonudtf_p0/udtf_scripts/safepkg_udtf/inspect.py @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +def safe_udtf(value): + """A valid packaged UDTF whose middle module name is forbidden-like.""" + if value is not None: + yield (value, value + 10)