Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
504520e
[Feat](udf) Support Python UDF/UDAF/UDTF for Doris #59543
linrrzqqq Apr 23, 2026
d2fe14d
[Fix](p0) Deduplicate table names to avoid cross-suit drops (#60466)
linrrzqqq Feb 4, 2026
ef218d8
[Enhancement](test) Add tests for Python UDF (#60499)
linrrzqqq Feb 9, 2026
6281d64
[Enhancement](udf) SHOW FULL FUNCTION display more information (#60690)
linrrzqqq Feb 26, 2026
1644125
[Enhancement](pyudf) add helper command to show more py info (#60751)
linrrzqqq Feb 26, 2026
a546ff0
[Enhancement](pyudf) Support MemTracker in PythonUdf (#60655)
linrrzqqq Feb 27, 2026
295d2aa
[Enhancement](udf) clear cache when droping function (#60630)
linrrzqqq Mar 2, 2026
b9225ff
[Fix](pythonUdf) Fix pythonUdf helper cmd coredump when `enable_pytho…
linrrzqqq Mar 12, 2026
4b6849d
[Fix](pyudf) Fix concurrent race condition when import module (#61280)
linrrzqqq Mar 27, 2026
712337c
[Fix](pyudf) Fix error type conversion (#61729)
linrrzqqq Mar 31, 2026
6f30e76
[Fix](pyudf) Fix import conflicts for modules with the same top-level…
linrrzqqq Apr 8, 2026
38d8425
[Update](p0) control python version via conf in p0-test (#62338)
linrrzqqq Apr 13, 2026
4132276
[Fix](pyudf) make Python server pool selection alive-aware and versio…
linrrzqqq May 7, 2026
7bafb96
[Enhancement](pyudf) Support parameterless calls for pythonUDF (#62624)
linrrzqqq May 8, 2026
2a01c05
[Log](pyudf) Add progress logs for python process pool init (#62974)
linrrzqqq May 8, 2026
37dde50
[Fix](pyudf) clear Nereids UDF registry on drop database (#62950)
linrrzqqq May 11, 2026
2f9379a
[Fix](pyudf) clear stale UDAF state cache on drop (#63062)
linrrzqqq May 12, 2026
797a825
[Fix](pyudf) Fix python udf error propagation (#62613)
linrrzqqq May 13, 2026
6a24a30
[Fix](p0) rename duplicate pyudf across p0 suites (#63214)
linrrzqqq May 14, 2026
0d5d720
[Enhancement](udf) Support volatility property for scalar UDF (#62698)
linrrzqqq May 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
16 changes: 12 additions & 4 deletions be/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,7 @@ set(DORIS_LINK_LIBS
Storage
Runtime
Service
Udf
Util
DorisGen
Load
Expand Down Expand Up @@ -765,10 +766,16 @@ if (ENABLE_CLANG_COVERAGE AND ENABLE_CLANG_COVERAGE STREQUAL ON AND COMPILER_CLA
endif ()

if (MAKE_TEST)
add_compile_options(-fprofile-arcs -ftest-coverage -DGTEST_USE_OWN_TR1_TUPLE=0)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-arcs -ftest-coverage")
if (NOT OS_MACOSX)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lgcov")
add_compile_options(-DGTEST_USE_OWN_TR1_TUPLE=0)
# Only add GCC-style coverage when NOT using Clang coverage
# to avoid duplicate symbol errors (e.g., __gcov_fork, __gcov_reset)
# between libgcov.a and libclang_rt.profile-x86_64.a
if (NOT (ENABLE_CLANG_COVERAGE STREQUAL "ON" AND COMPILER_CLANG))
add_compile_options(-fprofile-arcs -ftest-coverage)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-arcs -ftest-coverage")
if (NOT OS_MACOSX)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lgcov")
endif()
endif()
add_definitions(-DBE_TEST)
if (ARCH_ARM)
Expand Down Expand Up @@ -879,6 +886,7 @@ add_subdirectory(${SRC_DIR}/storage/index/ann)
add_subdirectory(${SRC_DIR}/storage)
add_subdirectory(${SRC_DIR}/runtime)
add_subdirectory(${SRC_DIR}/service) # this include doris_be
add_subdirectory(${SRC_DIR}/udf)
add_subdirectory(${SRC_DIR}/cloud)
add_subdirectory(${SRC_DIR}/load)
add_subdirectory(${SRC_DIR}/information_schema)
Expand Down
16 changes: 12 additions & 4 deletions be/src/agent/task_worker_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
#include "runtime/index_policy/index_policy_mgr.h"
#include "runtime/memory/global_memory_arbitrator.h"
#include "runtime/snapshot_loader.h"
#include "runtime/user_function_cache.h"
#include "service/backend_options.h"
#include "storage/compaction/cumulative_compaction_time_series_policy.h"
#include "storage/data_dir.h"
Expand All @@ -91,6 +92,7 @@
#include "storage/task/engine_storage_migration_task.h"
#include "storage/txn/txn_manager.h"
#include "storage/utils.h"
#include "udf/python/python_server.h"
#include "util/brpc_client_cache.h"
#include "util/debug_points.h"
#include "util/jni-util.h"
Expand Down Expand Up @@ -2583,12 +2585,18 @@ void clean_trash_callback(StorageEngine& engine, const TAgentTaskRequest& req) {
}

void clean_udf_cache_callback(const TAgentTaskRequest& req) {
const auto& clean_req = req.clean_udf_cache_req;

if (doris::config::enable_java_support) {
LOG(INFO) << "clean udf cache start: " << req.clean_udf_cache_req.function_signature;
static_cast<void>(
Jni::Util::clean_udf_class_load_cache(req.clean_udf_cache_req.function_signature));
LOG(INFO) << "clean udf cache finish: " << req.clean_udf_cache_req.function_signature;
static_cast<void>(Jni::Util::clean_udf_class_load_cache(clean_req.function_signature));
}

if (clean_req.__isset.function_id && clean_req.function_id > 0) {
UserFunctionCache::instance()->drop_function_cache(clean_req.function_id);
PythonServerManager::instance().clear_udaf_state_cache(clean_req.function_id);
}

LOG(INFO) << "clean udf cache finish: function_signature=" << clean_req.function_signature;
}

void report_index_policy_callback(const ClusterInfo* cluster_info) {
Expand Down
19 changes: 19 additions & 0 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include <mutex>
#include <random>
#include <string>
#include <string_view>
#include <utility>
#include <vector>

Expand All @@ -49,6 +50,7 @@
#include "runtime/workload_group/workload_group_manager.h"
#include "storage/storage_engine.h"
#include "util/cpu_info.h"
#include "util/string_util.h"

namespace doris::config {
#include "common/compile_check_avoid_begin.h"
Expand Down Expand Up @@ -1119,6 +1121,23 @@ DEFINE_mInt32(segcompaction_num_threads, "5");
// enable java udf and jdbc scannode
DEFINE_Bool(enable_java_support, "true");

// enable python udf
DEFINE_Bool(enable_python_udf_support, "false");
// python env mode, options: conda, venv
DEFINE_String(python_env_mode, "");
// root path of conda runtime, python_env_mode should be conda
DEFINE_String(python_conda_root_path, "");
// root path of venv runtime, python_env_mode should be venv
DEFINE_String(python_venv_root_path, "${DORIS_HOME}/lib/udf/python");
// python interpreter paths used by venv, e.g. /usr/bin/python3.7:/usr/bin/python3.6
DEFINE_String(python_venv_interpreter_paths, "");
// max python processes in global shared pool, each version can have up to this many processes
// 0 means use CPU core count as default, otherwise use the specified value
DEFINE_mInt32(max_python_process_num, "0");
// Memory limit in bytes for all Python UDF processes; warning is logged when exceeded
// default is 10GB
DEFINE_mInt64(python_udf_processes_memory_limit_bytes, "10737418240");

// Set config randomly to check more issues in github workflow
DEFINE_Bool(enable_fuzzy_mode, "false");

Expand Down
15 changes: 15 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1186,6 +1186,21 @@ DECLARE_mInt32(segcompaction_num_threads);
// enable java udf and jdbc scannode
DECLARE_Bool(enable_java_support);

// enable python udf
DECLARE_Bool(enable_python_udf_support);
// python env mode, options: conda, venv
DECLARE_String(python_env_mode);
// root path of conda runtime, python_env_mode should be conda
DECLARE_String(python_conda_root_path);
// root path of venv runtime, python_env_mode should be venv
DECLARE_String(python_venv_root_path);
// python interpreter paths used by venv, e.g. /usr/bin/python3.7:/usr/bin/python3.6
DECLARE_String(python_venv_interpreter_paths);
// max python processes in global shared pool, each version can have up to this many processes
DECLARE_mInt32(max_python_process_num);
// Memory limit in bytes for all Python UDF processes; warning is logged when exceeded
DECLARE_mInt64(python_udf_processes_memory_limit_bytes);

// Set config randomly to check more issues in github workflow
DECLARE_Bool(enable_fuzzy_mode);

Expand Down
7 changes: 7 additions & 0 deletions be/src/core/data_type_serde/data_type_number_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,13 @@ Status DataTypeNumberSerDe<T>::read_column_from_arrow(IColumn& column,

/// buffers[0] is a null bitmap and buffers[1] are actual values
std::shared_ptr<arrow::Buffer> buffer = arrow_array->data()->buffers[1];

// Handle empty array case: buffer can be null when row_count is 0.
// Passing nullptr to memcpy (via col_data.insert) is undefined behavior even if size is 0.
if (row_count == 0 || buffer == nullptr) {
return Status::OK();
}

const auto* raw_data =
reinterpret_cast<const typename PrimitiveTypeTraits<T>::CppType*>(buffer->data()) +
start;
Expand Down
Loading
Loading